[PATCH mptcp-next v8 2/4] selftests/bpf: Add getsockopt to inspect mptcp subflow

Geliang Tang posted 4 patches 2 months, 2 weeks ago
[PATCH mptcp-next v8 2/4] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 2 months, 2 weeks ago
From: Geliang Tang <tanggeliang@kylinos.cn>

This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.

mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.

Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
 .../selftests/bpf/progs/mptcp_subflow.c       | 69 +++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..92d5deed0214 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,36 @@
 
 #include <vmlinux.h>
 #include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
 
 #define MPTCP_SUBFLOWS_MAX 8
 
+static inline int list_is_head(const struct list_head *list,
+			       const struct list_head *head)
+{
+	return list == head;
+}
+
+#define list_entry(ptr, type, member)					\
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member)				\
+	list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member)					\
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member)				\
+	list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
+	     cond_break, !list_entry_is_head(pos, head, member);	\
+	     pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow)			\
+	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
 extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
 					bool scheduled) __ksym;
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 2e28f4a215b5..70302477e326 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,10 +4,12 @@
 
 /* vmlinux.h, bpf_helpers.h and other 'define' */
 #include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
 
 char _license[] SEC("license") = "GPL";
 
 char cc[TCP_CA_NAME_MAX] = "reno";
+int pid;
 
 /* Associate a subflow counter to each token */
 struct {
@@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
 
 	return 1;
 }
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+
+		if (ssk->sk_mark != ++i) {
+			ctx->retval = -2;
+			break;
+		}
+	}
+
+	return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct inet_connection_sock *icsk;
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+		if (ssk->sk_mark == 2 &&
+		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
+			ctx->retval = -2;
+			break;
+		}
+	}
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+	struct bpf_sock *sk = ctx->sk;
+	struct mptcp_sock *msk;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 1;
+
+	if (!sk || sk->protocol != IPPROTO_MPTCP ||
+	    (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
+	     !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
+		return 1;
+
+	msk = bpf_core_cast(sk, struct mptcp_sock);
+	if (msk->pm.subflows != 1) {
+		ctx->retval = -1;
+		return 1;
+	}
+
+	if (ctx->optname == SO_MARK)
+		return _check_getsockopt_subflow_mark(msk, ctx);
+	return _check_getsockopt_subflow_cc(msk, ctx);
+}
-- 
2.43.0
Re: [PATCH mptcp-next v8 2/4] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 2 months, 2 weeks ago
Hi Matt,

On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> This patch adds a "cgroup/getsockopt" way to inspect the subflows of
> a
> mptcp socket.
> 
> mptcp_for_each_stubflow() and other helpers related to list_dentry
> are
> added into progs/mptcp_bpf.h.
> 
> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and
> use
> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> allow
> to inspect all the fields in a tcp_sock.
> 
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
>  .../selftests/bpf/progs/mptcp_subflow.c       | 69
> +++++++++++++++++++
>  2 files changed, 96 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> index 782f36ed027e..92d5deed0214 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> @@ -4,9 +4,36 @@
>  
>  #include <vmlinux.h>
>  #include <bpf/bpf_core_read.h>
> +#include "bpf_experimental.h"
>  
>  #define MPTCP_SUBFLOWS_MAX 8
>  
> +static inline int list_is_head(const struct list_head *list,
> +			       const struct list_head *head)
> +{
> +	return list == head;
> +}
> +
> +#define list_entry(ptr, type,
> member)					\
> +	container_of(ptr, type, member)
> +
> +#define list_first_entry(ptr, type,
> member)				\
> +	list_entry((ptr)->next, type, member)
> +
> +#define list_next_entry(pos,
> member)					\
> +	list_entry((pos)->member.next, typeof(*(pos)), member)
> +
> +#define list_entry_is_head(pos, head,
> member)				\
> +	list_is_head(&pos->member, (head))
> +
> +#define list_for_each_entry(pos, head,
> member)				\
> +	for (pos = list_first_entry(head, typeof(*pos),
> member);	\
> +	     cond_break, !list_entry_is_head(pos, head,
> member);	\
> +	     pos = list_next_entry(pos, member))
> +
> +#define mptcp_for_each_subflow(__msk,
> __subflow)			\
> +	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
> +

The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf:
Add bpf_rr scheduler & test" needs be added into this commit:

static __always_inline struct sock *
mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
{
        return subflow->tcp_sock;
}

Otherwise, compiling errors occur.

Thanks,
-Geliang

>  extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context
> *subflow,
>  					bool scheduled) __ksym;
>  
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> index 2e28f4a215b5..70302477e326 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> @@ -4,10 +4,12 @@
>  
>  /* vmlinux.h, bpf_helpers.h and other 'define' */
>  #include "bpf_tracing_net.h"
> +#include "mptcp_bpf.h"
>  
>  char _license[] SEC("license") = "GPL";
>  
>  char cc[TCP_CA_NAME_MAX] = "reno";
> +int pid;
>  
>  /* Associate a subflow counter to each token */
>  struct {
> @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>  
>  	return 1;
>  }
> +
> +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk,
> struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct
> mptcp_subflow_context));
> +
> +		if (ssk->sk_mark != ++i) {
> +			ctx->retval = -2;
> +			break;
> +		}
> +	}
> +
> +	return 1;
> +}
> +
> +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		struct inet_connection_sock *icsk;
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct
> mptcp_subflow_context));
> +		icsk = bpf_core_cast(ssk, struct
> inet_connection_sock);
> +
> +		if (ssk->sk_mark == 2 &&
> +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> TCP_CA_NAME_MAX)) {
> +			ctx->retval = -2;
> +			break;
> +		}
> +	}
> +
> +	return 1;
> +}
> +
> +SEC("cgroup/getsockopt")
> +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> +{
> +	struct bpf_sock *sk = ctx->sk;
> +	struct mptcp_sock *msk;
> +
> +	if (bpf_get_current_pid_tgid() >> 32 != pid)
> +		return 1;
> +
> +	if (!sk || sk->protocol != IPPROTO_MPTCP ||
> +	    (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> &&
> +	     !(ctx->level == SOL_TCP && ctx->optname ==
> TCP_CONGESTION)))
> +		return 1;
> +
> +	msk = bpf_core_cast(sk, struct mptcp_sock);
> +	if (msk->pm.subflows != 1) {
> +		ctx->retval = -1;
> +		return 1;
> +	}
> +
> +	if (ctx->optname == SO_MARK)
> +		return _check_getsockopt_subflow_mark(msk, ctx);
> +	return _check_getsockopt_subflow_cc(msk, ctx);
> +}

Re: [PATCH mptcp-next v8 2/4] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Matthieu Baerts 2 months, 2 weeks ago
Hi Geliang,

On 05/09/2024 10:06, Geliang Tang wrote:
> Hi Matt,
> 
> On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
>> From: Geliang Tang <tanggeliang@kylinos.cn>
>>
>> This patch adds a "cgroup/getsockopt" way to inspect the subflows of
>> a
>> mptcp socket.
>>
>> mptcp_for_each_stubflow() and other helpers related to list_dentry
>> are
>> added into progs/mptcp_bpf.h.
>>
>> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and
>> use
>> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
>> allow
>> to inspect all the fields in a tcp_sock.
>>
>> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
>> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
>> ---
>>  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
>>  .../selftests/bpf/progs/mptcp_subflow.c       | 69
>> +++++++++++++++++++
>>  2 files changed, 96 insertions(+)
>>
>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> index 782f36ed027e..92d5deed0214 100644
>> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>> @@ -4,9 +4,36 @@
>>  
>>  #include <vmlinux.h>
>>  #include <bpf/bpf_core_read.h>
>> +#include "bpf_experimental.h"
>>  
>>  #define MPTCP_SUBFLOWS_MAX 8
>>  
>> +static inline int list_is_head(const struct list_head *list,
>> +			       const struct list_head *head)
>> +{
>> +	return list == head;
>> +}
>> +
>> +#define list_entry(ptr, type,
>> member)					\
>> +	container_of(ptr, type, member)
>> +
>> +#define list_first_entry(ptr, type,
>> member)				\
>> +	list_entry((ptr)->next, type, member)
>> +
>> +#define list_next_entry(pos,
>> member)					\
>> +	list_entry((pos)->member.next, typeof(*(pos)), member)
>> +
>> +#define list_entry_is_head(pos, head,
>> member)				\
>> +	list_is_head(&pos->member, (head))
>> +
>> +#define list_for_each_entry(pos, head,
>> member)				\
>> +	for (pos = list_first_entry(head, typeof(*pos),
>> member);	\
>> +	     cond_break, !list_entry_is_head(pos, head,
>> member);	\
>> +	     pos = list_next_entry(pos, member))
>> +
>> +#define mptcp_for_each_subflow(__msk,
>> __subflow)			\
>> +	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
>> +
> 
> The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf:
> Add bpf_rr scheduler & test" needs be added into this commit:
> 
> static __always_inline struct sock *
> mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> {
>         return subflow->tcp_sock;
> }
> 
> Otherwise, compiling errors occur.

Thank you for the heads-up. Please note that the file doesn't exist in
net-next, so we need to add it, and update the MAINTAINERS file. I'm
fixing that. While at it, I'm also adding comments about where the
helpers are coming from this in mptcp_bpf.h file, and the modifications
you did on top. That should help for the maintenance.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next v8 2/4] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 2 months, 2 weeks ago
Hi Matt,

On Thu, 2024-09-05 at 16:06 +0800, Geliang Tang wrote:
> Hi Matt,
> 
> On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > This patch adds a "cgroup/getsockopt" way to inspect the subflows
> > of
> > a

Here should be "an mptcp socket", not "a mptcp socket". Please update
this for me when merging it.

Thanks,
-Geliang

> > mptcp socket.
> > 
> > mptcp_for_each_stubflow() and other helpers related to list_dentry
> > are
> > added into progs/mptcp_bpf.h.
> > 
> > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list
> > and
> > use
> > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> > allow
> > to inspect all the fields in a tcp_sock.
> > 
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
> >  .../selftests/bpf/progs/mptcp_subflow.c       | 69
> > +++++++++++++++++++
> >  2 files changed, 96 insertions(+)
> > 
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > index 782f36ed027e..92d5deed0214 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > @@ -4,9 +4,36 @@
> >  
> >  #include <vmlinux.h>
> >  #include <bpf/bpf_core_read.h>
> > +#include "bpf_experimental.h"
> >  
> >  #define MPTCP_SUBFLOWS_MAX 8
> >  
> > +static inline int list_is_head(const struct list_head *list,
> > +			       const struct list_head *head)
> > +{
> > +	return list == head;
> > +}
> > +
> > +#define list_entry(ptr, type,
> > member)					\
> > +	container_of(ptr, type, member)
> > +
> > +#define list_first_entry(ptr, type,
> > member)				\
> > +	list_entry((ptr)->next, type, member)
> > +
> > +#define list_next_entry(pos,
> > member)					\
> > +	list_entry((pos)->member.next, typeof(*(pos)), member)
> > +
> > +#define list_entry_is_head(pos, head,
> > member)				\
> > +	list_is_head(&pos->member, (head))
> > +
> > +#define list_for_each_entry(pos, head,
> > member)				\
> > +	for (pos = list_first_entry(head, typeof(*pos),
> > member);	\
> > +	     cond_break, !list_entry_is_head(pos, head,
> > member);	\
> > +	     pos = list_next_entry(pos, member))
> > +
> > +#define mptcp_for_each_subflow(__msk,
> > __subflow)			\
> > +	list_for_each_entry(__subflow, &((__msk)->conn_list),
> > node)
> > +
> 
> The helper mptcp_subflow_tcp_sock() added by the commit
> "selftests/bpf:
> Add bpf_rr scheduler & test" needs be added into this commit:
> 
> static __always_inline struct sock *
> mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> {
>         return subflow->tcp_sock;
> }
> 
> Otherwise, compiling errors occur.
> 
> Thanks,
> -Geliang
> 
> >  extern void mptcp_subflow_set_scheduled(struct
> > mptcp_subflow_context
> > *subflow,
> >  					bool scheduled) __ksym;
> >  
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > index 2e28f4a215b5..70302477e326 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > @@ -4,10 +4,12 @@
> >  
> >  /* vmlinux.h, bpf_helpers.h and other 'define' */
> >  #include "bpf_tracing_net.h"
> > +#include "mptcp_bpf.h"
> >  
> >  char _license[] SEC("license") = "GPL";
> >  
> >  char cc[TCP_CA_NAME_MAX] = "reno";
> > +int pid;
> >  
> >  /* Associate a subflow counter to each token */
> >  struct {
> > @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
> >  
> >  	return 1;
> >  }
> > +
> > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +	int i = 0;
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> > +		struct sock *ssk;
> > +
> > +		ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +							   struct
> > mptcp_subflow_context));
> > +
> > +		if (ssk->sk_mark != ++i) {
> > +			ctx->retval = -2;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return 1;
> > +}
> > +
> > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > +	struct mptcp_subflow_context *subflow;
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> > +		struct inet_connection_sock *icsk;
> > +		struct sock *ssk;
> > +
> > +		ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +							   struct
> > mptcp_subflow_context));
> > +		icsk = bpf_core_cast(ssk, struct
> > inet_connection_sock);
> > +
> > +		if (ssk->sk_mark == 2 &&
> > +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> > TCP_CA_NAME_MAX)) {
> > +			ctx->retval = -2;
> > +			break;
> > +		}
> > +	}
> > +
> > +	return 1;
> > +}
> > +
> > +SEC("cgroup/getsockopt")
> > +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> > +{
> > +	struct bpf_sock *sk = ctx->sk;
> > +	struct mptcp_sock *msk;
> > +
> > +	if (bpf_get_current_pid_tgid() >> 32 != pid)
> > +		return 1;
> > +
> > +	if (!sk || sk->protocol != IPPROTO_MPTCP ||
> > +	    (!(ctx->level == SOL_SOCKET && ctx->optname ==
> > SO_MARK)
> > &&
> > +	     !(ctx->level == SOL_TCP && ctx->optname ==
> > TCP_CONGESTION)))
> > +		return 1;
> > +
> > +	msk = bpf_core_cast(sk, struct mptcp_sock);
> > +	if (msk->pm.subflows != 1) {
> > +		ctx->retval = -1;
> > +		return 1;
> > +	}
> > +
> > +	if (ctx->optname == SO_MARK)
> > +		return _check_getsockopt_subflow_mark(msk, ctx);
> > +	return _check_getsockopt_subflow_cc(msk, ctx);
> > +}
> 
>