This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
an MPTCP one.
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 7 +++++++
kernel/bpf/verifier.c | 1 +
kernel/trace/bpf_trace.c | 2 ++
net/core/filter.c | 21 +++++++++++++++++++++
scripts/bpf_doc.py | 1 +
tools/include/uapi/linux/bpf.h | 7 +++++++
7 files changed, 40 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..424056fd5335 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2883,6 +2883,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto;
extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto;
+extern const struct bpf_func_proto bpf_mptcpify_proto;
extern const struct bpf_func_proto bpf_copy_from_user_proto;
extern const struct bpf_func_proto bpf_snprintf_btf_proto;
extern const struct bpf_func_proto bpf_snprintf_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6961a7b70028..ef175ea8ee4a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5569,6 +5569,12 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct sock *bpf_mptcpify(void *sk)
+ * Description
+ * Dynamically mptcpify a TCP socket *sk* pointer as an MPTCP one.
+ * Return
+ * *sk* if it's valid, or **NULL** otherwise.
*/
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
@@ -5783,6 +5789,7 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
+ FN(mptcpify, 212, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b54193de762b..fa8d656d25cb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -523,6 +523,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
func_id == BPF_FUNC_skc_to_tcp6_sock ||
func_id == BPF_FUNC_skc_to_udp6_sock ||
func_id == BPF_FUNC_skc_to_mptcp_sock ||
+ func_id == BPF_FUNC_mptcpify ||
func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 03b7f6b8e4f0..26c170a456c5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1926,6 +1926,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_skc_to_unix_sock_proto;
case BPF_FUNC_skc_to_mptcp_sock:
return &bpf_skc_to_mptcp_sock_proto;
+ case BPF_FUNC_mptcpify:
+ return &bpf_mptcpify_proto;
case BPF_FUNC_sk_storage_get:
return &bpf_sk_storage_get_tracing_proto;
case BPF_FUNC_sk_storage_delete:
diff --git a/net/core/filter.c b/net/core/filter.c
index 968139f4a1ac..e439f8b5f203 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
.ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
};
+BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
+{
+ if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
+ sk->sk_protocol = IPPROTO_MPTCP;
+ return (unsigned long)sk;
+ }
+
+ return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_mptcpify_proto = {
+ .func = bpf_mptcpify,
+ .gpl_only = false,
+ .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
+ .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+ .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+};
+
BPF_CALL_1(bpf_sock_from_file, struct file *, file)
{
return (unsigned long)sock_from_file(file);
@@ -11632,6 +11650,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_skc_to_mptcp_sock:
func = &bpf_skc_to_mptcp_sock_proto;
break;
+ case BPF_FUNC_mptcpify:
+ func = &bpf_mptcpify_proto;
+ break;
case BPF_FUNC_ktime_get_coarse_ns:
return &bpf_ktime_get_coarse_ns_proto;
default:
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index eaae2ce78381..7a20ab9c6513 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -751,6 +751,7 @@ class PrinterHelpers(Printer):
'struct file',
'struct bpf_timer',
'struct mptcp_sock',
+ 'struct sock',
'struct bpf_dynptr',
'const struct bpf_dynptr',
'struct iphdr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6961a7b70028..ef175ea8ee4a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5569,6 +5569,12 @@ union bpf_attr {
* 0 on success.
*
* **-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct sock *bpf_mptcpify(void *sk)
+ * Description
+ * Dynamically mptcpify a TCP socket *sk* pointer as an MPTCP one.
+ * Return
+ * *sk* if it's valid, or **NULL** otherwise.
*/
#define ___BPF_FUNC_MAPPER(FN, ctx...) \
FN(unspec, 0, ##ctx) \
@@ -5783,6 +5789,7 @@ union bpf_attr {
FN(user_ringbuf_drain, 209, ##ctx) \
FN(cgrp_storage_get, 210, ##ctx) \
FN(cgrp_storage_delete, 211, ##ctx) \
+ FN(mptcpify, 212, ##ctx) \
/* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
--
2.35.3
Hi Geliang,
On 29/06/2023 04:12, Geliang Tang wrote:
> This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> an MPTCP one.
Nice feature, thank you for working on that!
Is it linked to what Nicolas Rybowski looked at a few years ago? I think
he put info in a Github ticket. Was there not an issue with this
technique? Did you see it and fix it?
I didn't look at the patchset in detail but I have one question, please
see below:
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 968139f4a1ac..e439f8b5f203 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> };
>
> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> +{
> + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> + sk->sk_protocol = IPPROTO_MPTCP;
> + return (unsigned long)sk;
> + }
How do we ensure such modifications are done at the right moment? I
mean: we can only change the protocol ID in very few places, before even
creating the socket (__sock_create()?). If we change it after, we will
break stuff: tcp ops, security labels, etc.
I thought it was not possible to hook at the right place when Nicolas
looked at that and/or ensure the restriction was done but I might be
mistaken and the situation has probably changed.
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 29/06/2023 04:12, Geliang Tang wrote:
> > This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> > define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> > an MPTCP one.
>
> Nice feature, thank you for working on that!
>
> Is it linked to what Nicolas Rybowski looked at a few years ago? I think
> he put info in a Github ticket. Was there not an issue with this
> technique? Did you see it and fix it?
>
> I didn't look at the patchset in detail but I have one question, please
> see below:
>
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 968139f4a1ac..e439f8b5f203 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> > .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> > };
> >
> > +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> > +{
> > + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> > + sk->sk_protocol = IPPROTO_MPTCP;
> > + return (unsigned long)sk;
> > + }
>
> How do we ensure such modifications are done at the right moment? I
> mean: we can only change the protocol ID in very few places, before even
> creating the socket (__sock_create()?). If we change it after, we will
> break stuff: tcp ops, security labels, etc.
You're right, Matt, we need to do the modifications at the very beginning
of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
right place to do the modifications.
The v3 works well now. We can get three MP_CAPABLEs in the log.
Thanks,
-Geliang
>
> I thought it was not possible to hook at the right place when Nicolas
> looked at that and/or ensure the restriction was done but I might be
> mistaken and the situation has probably changed.
>
> Cheers,
> Matt
> --
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
Hi Geliang,
On 03/07/2023 08:54, Geliang Tang wrote:
> On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
>> On 29/06/2023 04:12, Geliang Tang wrote:
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 968139f4a1ac..e439f8b5f203 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
>>> .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
>>> };
>>>
>>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
>>> +{
>>> + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
>>> + sk->sk_protocol = IPPROTO_MPTCP;
>>> + return (unsigned long)sk;
>>> + }
>>
>> How do we ensure such modifications are done at the right moment? I
>> mean: we can only change the protocol ID in very few places, before even
>> creating the socket (__sock_create()?). If we change it after, we will
>> break stuff: tcp ops, security labels, etc.
>
> You're right, Matt, we need to do the modifications at the very beginning
> of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
> right place to do the modifications.
Thank you for this v3 and for your replies: it is clearer with your replies!
I hope such wrapper can be accepted upstream :)
It is then important to fail if bpf_mptcpify() is not executed at the
beginning of this new "socket_create()" wrapper. Will it be the case?
I guess the verifier will make sure bpf_mptcpify() is only called with a
function having "struct socket_args" as first argument so "by accident",
it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
fails if the socket has already been created? (maybe that's something we
need to ask to BPF devs)
One last thing: in your v3, I think you restrict the conversion to IPv4
only sockets, no?
(args->family == AF_INET || args->family == AF_INET6)
(and I guess you saw that the kernel test robot complained about one of
the patch because the new wrapper is not declared in the "include" dir)
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Mon, Jul 03, 2023 at 02:58:33PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 03/07/2023 08:54, Geliang Tang wrote:
> > On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> >> On 29/06/2023 04:12, Geliang Tang wrote:
> >>> diff --git a/net/core/filter.c b/net/core/filter.c
> >>> index 968139f4a1ac..e439f8b5f203 100644
> >>> --- a/net/core/filter.c
> >>> +++ b/net/core/filter.c
> >>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> >>> .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> >>> };
> >>>
> >>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> >>> +{
> >>> + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> >>> + sk->sk_protocol = IPPROTO_MPTCP;
> >>> + return (unsigned long)sk;
> >>> + }
> >>
> >> How do we ensure such modifications are done at the right moment? I
> >> mean: we can only change the protocol ID in very few places, before even
> >> creating the socket (__sock_create()?). If we change it after, we will
> >> break stuff: tcp ops, security labels, etc.
> >
> > You're right, Matt, we need to do the modifications at the very beginning
> > of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
> > right place to do the modifications.
>
> Thank you for this v3 and for your replies: it is clearer with your replies!
>
> I hope such wrapper can be accepted upstream :)
Hi Matt,
I just sent a v4, adding hooks in __sock_create(), instead of adding a
wrapper. I think v4 is much more acceptable by upstream.
>
> It is then important to fail if bpf_mptcpify() is not executed at the
> beginning of this new "socket_create()" wrapper. Will it be the case?
>
> I guess the verifier will make sure bpf_mptcpify() is only called with a
> function having "struct socket_args" as first argument so "by accident",
> it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
> mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
> fails if the socket has already been created? (maybe that's something we
> need to ask to BPF devs)
>
> One last thing: in your v3, I think you restrict the conversion to IPv4
> only sockets, no?
>
> (args->family == AF_INET || args->family == AF_INET6)
I updated this in v4.
Thanks,
-Geliang
>
> (and I guess you saw that the kernel test robot complained about one of
> the patch because the new wrapper is not declared in the "include" dir)
>
> Cheers,
> Matt
> --
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
Hi Geliang,
On 05/07/2023 10:15, Geliang Tang wrote:
> On Mon, Jul 03, 2023 at 02:58:33PM +0200, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 03/07/2023 08:54, Geliang Tang wrote:
>>> On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
>>>> On 29/06/2023 04:12, Geliang Tang wrote:
>>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>>> index 968139f4a1ac..e439f8b5f203 100644
>>>>> --- a/net/core/filter.c
>>>>> +++ b/net/core/filter.c
>>>>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
>>>>> .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
>>>>> };
>>>>>
>>>>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
>>>>> +{
>>>>> + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
>>>>> + sk->sk_protocol = IPPROTO_MPTCP;
>>>>> + return (unsigned long)sk;
>>>>> + }
>>>>
>>>> How do we ensure such modifications are done at the right moment? I
>>>> mean: we can only change the protocol ID in very few places, before even
>>>> creating the socket (__sock_create()?). If we change it after, we will
>>>> break stuff: tcp ops, security labels, etc.
>>>
>>> You're right, Matt, we need to do the modifications at the very beginning
>>> of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
>>> right place to do the modifications.
>>
>> Thank you for this v3 and for your replies: it is clearer with your replies!
>>
>> I hope such wrapper can be accepted upstream :)
>
> Hi Matt,
>
> I just sent a v4, adding hooks in __sock_create(), instead of adding a
> wrapper. I think v4 is much more acceptable by upstream.
Thank you for your reply. Yes, I think the v4 is more acceptable.
>> It is then important to fail if bpf_mptcpify() is not executed at the
>> beginning of this new "socket_create()" wrapper. Will it be the case?
>>
>> I guess the verifier will make sure bpf_mptcpify() is only called with a
>> function having "struct socket_args" as first argument so "by accident",
>> it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
>> mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
>> fails if the socket has already been created? (maybe that's something we
>> need to ask to BPF devs)
>>
>> One last thing: in your v3, I think you restrict the conversion to IPv4
>> only sockets, no?
>>
>> (args->family == AF_INET || args->family == AF_INET6)
>
> I updated this in v4.
Thanks!
Cheers,
Matt
--
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
>
> On 29/06/2023 04:12, Geliang Tang wrote:
> > This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> > define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> > an MPTCP one.
>
> Nice feature, thank you for working on that!
>
> Is it linked to what Nicolas Rybowski looked at a few years ago? I think
> he put info in a Github ticket. Was there not an issue with this
> technique? Did you see it and fix it?
Yes, this series addresses issue #79 "allow 'force to MPTCP' mode: BPF".
Nicolas looked at it before. I assigned it to myself a few days ago.
>
> I didn't look at the patchset in detail but I have one question, please
> see below:
>
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 968139f4a1ac..e439f8b5f203 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> > .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> > };
> >
> > +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> > +{
> > + if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> > + sk->sk_protocol = IPPROTO_MPTCP;
> > + return (unsigned long)sk;
> > + }
>
> How do we ensure such modifications are done at the right moment? I
> mean: we can only change the protocol ID in very few places, before even
> creating the socket (__sock_create()?). If we change it after, we will
> break stuff: tcp ops, security labels, etc.
This will be hooked in BPF_CGROUP_RUN_PROG_INET_SOCK() in inet_create().
See v2, a better version.
>
> I thought it was not possible to hook at the right place when Nicolas
> looked at that and/or ensure the restriction was done but I might be
> mistaken and the situation has probably changed.
The selftest works, proving its ability to convert TCP to MPTCP.
Thanks,
-Geliang
>
> Cheers,
> Matt
> --
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
© 2016 - 2026 Red Hat, Inc.