[PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper

Geliang Tang posted 4 patches 2 years, 7 months ago
Maintainers: Alexei Starovoitov <ast@kernel.org>, Daniel Borkmann <daniel@iogearbox.net>, John Fastabend <john.fastabend@gmail.com>, Andrii Nakryiko <andrii@kernel.org>, Martin KaFai Lau <martin.lau@linux.dev>, Song Liu <song@kernel.org>, Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>, Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>, Jiri Olsa <jolsa@kernel.org>, Steven Rostedt <rostedt@goodmis.org>, Masami Hiramatsu <mhiramat@kernel.org>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Mykola Lysenko <mykolal@fb.com>, Shuah Khan <shuah@kernel.org>, Matthieu Baerts <matthieu.baerts@tessares.net>, Mat Martineau <martineau@kernel.org>
There is a newer version of this series
[PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Geliang Tang 2 years, 7 months ago
This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
an MPTCP one.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       |  7 +++++++
 kernel/bpf/verifier.c          |  1 +
 kernel/trace/bpf_trace.c       |  2 ++
 net/core/filter.c              | 21 +++++++++++++++++++++
 scripts/bpf_doc.py             |  1 +
 tools/include/uapi/linux/bpf.h |  7 +++++++
 7 files changed, 40 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f58895830ada..424056fd5335 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2883,6 +2883,7 @@ extern const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_udp6_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_unix_sock_proto;
 extern const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto;
+extern const struct bpf_func_proto bpf_mptcpify_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_proto;
 extern const struct bpf_func_proto bpf_snprintf_btf_proto;
 extern const struct bpf_func_proto bpf_snprintf_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6961a7b70028..ef175ea8ee4a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5569,6 +5569,12 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct sock *bpf_mptcpify(void *sk)
+ *	Description
+ *		Dynamically mptcpify a TCP socket *sk* pointer as an MPTCP one.
+ *	Return
+ *		*sk* if it's valid, or **NULL** otherwise.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5783,6 +5789,7 @@ union bpf_attr {
 	FN(user_ringbuf_drain, 209, ##ctx)		\
 	FN(cgrp_storage_get, 210, ##ctx)		\
 	FN(cgrp_storage_delete, 211, ##ctx)		\
+	FN(mptcpify, 212, ##ctx)			\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b54193de762b..fa8d656d25cb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -523,6 +523,7 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
 		func_id == BPF_FUNC_skc_to_udp6_sock ||
 		func_id == BPF_FUNC_skc_to_mptcp_sock ||
+		func_id == BPF_FUNC_mptcpify ||
 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
 }
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 03b7f6b8e4f0..26c170a456c5 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1926,6 +1926,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skc_to_unix_sock_proto;
 	case BPF_FUNC_skc_to_mptcp_sock:
 		return &bpf_skc_to_mptcp_sock_proto;
+	case BPF_FUNC_mptcpify:
+		return &bpf_mptcpify_proto;
 	case BPF_FUNC_sk_storage_get:
 		return &bpf_sk_storage_get_tracing_proto;
 	case BPF_FUNC_sk_storage_delete:
diff --git a/net/core/filter.c b/net/core/filter.c
index 968139f4a1ac..e439f8b5f203 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
 	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
 };
 
+BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
+{
+	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
+		sk->sk_protocol = IPPROTO_MPTCP;
+		return (unsigned long)sk;
+	}
+
+	return (unsigned long)NULL;
+}
+
+const struct bpf_func_proto bpf_mptcpify_proto = {
+	.func		= bpf_mptcpify,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_BTF_ID_SOCK_COMMON,
+	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
+};
+
 BPF_CALL_1(bpf_sock_from_file, struct file *, file)
 {
 	return (unsigned long)sock_from_file(file);
@@ -11632,6 +11650,9 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_skc_to_mptcp_sock:
 		func = &bpf_skc_to_mptcp_sock_proto;
 		break;
+	case BPF_FUNC_mptcpify:
+		func = &bpf_mptcpify_proto;
+		break;
 	case BPF_FUNC_ktime_get_coarse_ns:
 		return &bpf_ktime_get_coarse_ns_proto;
 	default:
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index eaae2ce78381..7a20ab9c6513 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -751,6 +751,7 @@ class PrinterHelpers(Printer):
             'struct file',
             'struct bpf_timer',
             'struct mptcp_sock',
+            'struct sock',
             'struct bpf_dynptr',
             'const struct bpf_dynptr',
             'struct iphdr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6961a7b70028..ef175ea8ee4a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5569,6 +5569,12 @@ union bpf_attr {
  *		0 on success.
  *
  *		**-ENOENT** if the bpf_local_storage cannot be found.
+ *
+ * struct sock *bpf_mptcpify(void *sk)
+ *	Description
+ *		Dynamically mptcpify a TCP socket *sk* pointer as an MPTCP one.
+ *	Return
+ *		*sk* if it's valid, or **NULL** otherwise.
  */
 #define ___BPF_FUNC_MAPPER(FN, ctx...)			\
 	FN(unspec, 0, ##ctx)				\
@@ -5783,6 +5789,7 @@ union bpf_attr {
 	FN(user_ringbuf_drain, 209, ##ctx)		\
 	FN(cgrp_storage_get, 210, ##ctx)		\
 	FN(cgrp_storage_delete, 211, ##ctx)		\
+	FN(mptcpify, 212, ##ctx)			\
 	/* */
 
 /* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
-- 
2.35.3
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Matthieu Baerts 2 years, 7 months ago
Hi Geliang,

On 29/06/2023 04:12, Geliang Tang wrote:
> This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> an MPTCP one.

Nice feature, thank you for working on that!

Is it linked to what Nicolas Rybowski looked at a few years ago? I think
he put info in a Github ticket. Was there not an issue with this
technique? Did you see it and fix it?

I didn't look at the patchset in detail but I have one question, please
see below:

> diff --git a/net/core/filter.c b/net/core/filter.c
> index 968139f4a1ac..e439f8b5f203 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
>  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
>  };
>  
> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> +{
> +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> +		sk->sk_protocol = IPPROTO_MPTCP;
> +		return (unsigned long)sk;
> +	}

How do we ensure such modifications are done at the right moment? I
mean: we can only change the protocol ID in very few places, before even
creating the socket (__sock_create()?). If we change it after, we will
break stuff: tcp ops, security labels, etc.

I thought it was not possible to hook at the right place when Nicolas
looked at that and/or ensure the restriction was done but I might be
mistaken and the situation has probably changed.

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Geliang Tang 2 years, 7 months ago
On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 29/06/2023 04:12, Geliang Tang wrote:
> > This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> > define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> > an MPTCP one.
> 
> Nice feature, thank you for working on that!
> 
> Is it linked to what Nicolas Rybowski looked at a few years ago? I think
> he put info in a Github ticket. Was there not an issue with this
> technique? Did you see it and fix it?
> 
> I didn't look at the patchset in detail but I have one question, please
> see below:
> 
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 968139f4a1ac..e439f8b5f203 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> >  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> >  };
> >  
> > +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> > +{
> > +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> > +		sk->sk_protocol = IPPROTO_MPTCP;
> > +		return (unsigned long)sk;
> > +	}
> 
> How do we ensure such modifications are done at the right moment? I
> mean: we can only change the protocol ID in very few places, before even
> creating the socket (__sock_create()?). If we change it after, we will
> break stuff: tcp ops, security labels, etc.

You're right, Matt, we need to do the modifications at the very beginning
of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
right place to do the modifications.

The v3 works well now. We can get three MP_CAPABLEs in the log.

Thanks,
-Geliang

> 
> I thought it was not possible to hook at the right place when Nicolas
> looked at that and/or ensure the restriction was done but I might be
> mistaken and the situation has probably changed.
> 
> Cheers,
> Matt
> -- 
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Matthieu Baerts 2 years, 7 months ago
Hi Geliang,

On 03/07/2023 08:54, Geliang Tang wrote:
> On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
>> On 29/06/2023 04:12, Geliang Tang wrote:
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 968139f4a1ac..e439f8b5f203 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
>>>  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
>>>  };
>>>  
>>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
>>> +{
>>> +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
>>> +		sk->sk_protocol = IPPROTO_MPTCP;
>>> +		return (unsigned long)sk;
>>> +	}
>>
>> How do we ensure such modifications are done at the right moment? I
>> mean: we can only change the protocol ID in very few places, before even
>> creating the socket (__sock_create()?). If we change it after, we will
>> break stuff: tcp ops, security labels, etc.
> 
> You're right, Matt, we need to do the modifications at the very beginning
> of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
> right place to do the modifications.

Thank you for this v3 and for your replies: it is clearer with your replies!

I hope such wrapper can be accepted upstream :)

It is then important to fail if bpf_mptcpify() is not executed at the
beginning of this new "socket_create()" wrapper. Will it be the case?

I guess the verifier will make sure bpf_mptcpify() is only called with a
function having "struct socket_args" as first argument so "by accident",
it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
fails if the socket has already been created? (maybe that's something we
need to ask to BPF devs)

One last thing: in your v3, I think you restrict the conversion to IPv4
only sockets, no?

  (args->family == AF_INET || args->family == AF_INET6)

(and I guess you saw that the kernel test robot complained about one of
the patch because the new wrapper is not declared in the "include" dir)

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Geliang Tang 2 years, 7 months ago
On Mon, Jul 03, 2023 at 02:58:33PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 03/07/2023 08:54, Geliang Tang wrote:
> > On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> >> On 29/06/2023 04:12, Geliang Tang wrote:
> >>> diff --git a/net/core/filter.c b/net/core/filter.c
> >>> index 968139f4a1ac..e439f8b5f203 100644
> >>> --- a/net/core/filter.c
> >>> +++ b/net/core/filter.c
> >>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> >>>  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> >>>  };
> >>>  
> >>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> >>> +{
> >>> +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> >>> +		sk->sk_protocol = IPPROTO_MPTCP;
> >>> +		return (unsigned long)sk;
> >>> +	}
> >>
> >> How do we ensure such modifications are done at the right moment? I
> >> mean: we can only change the protocol ID in very few places, before even
> >> creating the socket (__sock_create()?). If we change it after, we will
> >> break stuff: tcp ops, security labels, etc.
> > 
> > You're right, Matt, we need to do the modifications at the very beginning
> > of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
> > right place to do the modifications.
> 
> Thank you for this v3 and for your replies: it is clearer with your replies!
> 
> I hope such wrapper can be accepted upstream :)

Hi Matt,

I just sent a v4, adding hooks in __sock_create(), instead of adding a
wrapper. I think v4 is much more acceptable by upstream.

> 
> It is then important to fail if bpf_mptcpify() is not executed at the
> beginning of this new "socket_create()" wrapper. Will it be the case?
> 
> I guess the verifier will make sure bpf_mptcpify() is only called with a
> function having "struct socket_args" as first argument so "by accident",
> it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
> mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
> fails if the socket has already been created? (maybe that's something we
> need to ask to BPF devs)
> 
> One last thing: in your v3, I think you restrict the conversion to IPv4
> only sockets, no?
> 
>   (args->family == AF_INET || args->family == AF_INET6)

I updated this in v4.

Thanks,
-Geliang

> 
> (and I guess you saw that the kernel test robot complained about one of
> the patch because the new wrapper is not declared in the "include" dir)
> 
> Cheers,
> Matt
> -- 
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Matthieu Baerts 2 years, 7 months ago
Hi Geliang,

On 05/07/2023 10:15, Geliang Tang wrote:
> On Mon, Jul 03, 2023 at 02:58:33PM +0200, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 03/07/2023 08:54, Geliang Tang wrote:
>>> On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
>>>> On 29/06/2023 04:12, Geliang Tang wrote:
>>>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>>>> index 968139f4a1ac..e439f8b5f203 100644
>>>>> --- a/net/core/filter.c
>>>>> +++ b/net/core/filter.c
>>>>> @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
>>>>>  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
>>>>>  };
>>>>>  
>>>>> +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
>>>>> +{
>>>>> +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
>>>>> +		sk->sk_protocol = IPPROTO_MPTCP;
>>>>> +		return (unsigned long)sk;
>>>>> +	}
>>>>
>>>> How do we ensure such modifications are done at the right moment? I
>>>> mean: we can only change the protocol ID in very few places, before even
>>>> creating the socket (__sock_create()?). If we change it after, we will
>>>> break stuff: tcp ops, security labels, etc.
>>>
>>> You're right, Matt, we need to do the modifications at the very beginning
>>> of sys_socket(). In v3, a new wrapper socket_create() is added, it's the
>>> right place to do the modifications.
>>
>> Thank you for this v3 and for your replies: it is clearer with your replies!
>>
>> I hope such wrapper can be accepted upstream :)
> 
> Hi Matt,
> 
> I just sent a v4, adding hooks in __sock_create(), instead of adding a
> wrapper. I think v4 is much more acceptable by upstream.

Thank you for your reply. Yes, I think the v4 is more acceptable.

>> It is then important to fail if bpf_mptcpify() is not executed at the
>> beginning of this new "socket_create()" wrapper. Will it be the case?
>>
>> I guess the verifier will make sure bpf_mptcpify() is only called with a
>> function having "struct socket_args" as first argument so "by accident",
>> it will only be bpf_mptcpify(), right? Maybe there is a way to clearly
>> mark that bpf_mptcpify() cannot be called elsewhere? Or make sure it
>> fails if the socket has already been created? (maybe that's something we
>> need to ask to BPF devs)
>>
>> One last thing: in your v3, I think you restrict the conversion to IPv4
>> only sockets, no?
>>
>>   (args->family == AF_INET || args->family == AF_INET6)
> 
> I updated this in v4.

Thanks!

Cheers,
Matt
-- 
Tessares | Belgium | Hybrid Access Solutions
www.tessares.net
Re: [PATCH mptcp-next 2/4] bpf: Add bpf_mptcpify helper
Posted by Geliang Tang 2 years, 7 months ago
On Thu, Jun 29, 2023 at 07:43:50PM +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 29/06/2023 04:12, Geliang Tang wrote:
> > This patch implements a new struct bpf_func_proto bpf_mptcpify_proto. And
> > define a new helper bpf_mptcpify() to mptcpify a TCP socket dynamically as
> > an MPTCP one.
> 
> Nice feature, thank you for working on that!
> 
> Is it linked to what Nicolas Rybowski looked at a few years ago? I think
> he put info in a Github ticket. Was there not an issue with this
> technique? Did you see it and fix it?

Yes, this series addresses issue #79 "allow 'force to MPTCP' mode: BPF".
Nicolas looked at it before. I assigned it to myself a few days ago.

> 
> I didn't look at the patchset in detail but I have one question, please
> see below:
> 
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 968139f4a1ac..e439f8b5f203 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -11587,6 +11587,24 @@ const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
> >  	.ret_btf_id	= &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
> >  };
> >  
> > +BPF_CALL_1(bpf_mptcpify, struct sock *, sk)
> > +{
> > +	if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) {
> > +		sk->sk_protocol = IPPROTO_MPTCP;
> > +		return (unsigned long)sk;
> > +	}
> 
> How do we ensure such modifications are done at the right moment? I
> mean: we can only change the protocol ID in very few places, before even
> creating the socket (__sock_create()?). If we change it after, we will
> break stuff: tcp ops, security labels, etc.

This will be hooked in BPF_CGROUP_RUN_PROG_INET_SOCK() in inet_create().
See v2, a better version.

> 
> I thought it was not possible to hook at the right place when Nicolas
> looked at that and/or ensure the restriction was done but I might be
> mistaken and the situation has probably changed.

The selftest works, proving its ability to convert TCP to MPTCP.

Thanks,
-Geliang

> 
> Cheers,
> Matt
> -- 
> Tessares | Belgium | Hybrid Access Solutions
> www.tessares.net