[PATCH mptcp-next v5 1/3] selftests/bpf: Add getsockopt to inspect mptcp subflow

Geliang Tang posted 3 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH mptcp-next v5 1/3] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 2 months, 3 weeks ago
From: Geliang Tang <tanggeliang@kylinos.cn>

This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.

mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.

Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 +++++++++++
 .../selftests/bpf/progs/mptcp_subflow.c       | 62 +++++++++++++++++++
 2 files changed, 97 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..d60e3c8f85a1 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,44 @@
 
 #include <vmlinux.h>
 #include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
 
 #define MPTCP_SUBFLOWS_MAX 8
 
+static inline int list_is_head(const struct list_head *list,
+			       const struct list_head *head)
+{
+	return list == head;
+}
+
+#define list_entry(ptr, type, member)					\
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member)				\
+	list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member)					\
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member)				\
+	list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
+	     cond_break, !list_entry_is_head(pos, head, member);	\
+	     pos = list_next_entry(pos, member))
+
+#define list_for_each_entry_safe(pos, n, head, member)			\
+	for (pos = list_first_entry(head, typeof(*pos), member),	\
+		n = list_next_entry(pos, member);			\
+	     cond_break, !list_entry_is_head(pos, head, member);	\
+	     pos = n, n = list_next_entry(n, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow)			\
+	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp)		\
+	list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+
 extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
 					bool scheduled) __ksym;
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index bc572e1d6df8..6a7cf71c2795 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
 
 /* vmlinux.h, bpf_helpers.h and other 'define' */
 #include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
 
 	return 1;
 }
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+
+		if (ssk->sk_mark != ++i)
+			ctx->retval = -1;
+	}
+
+	return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+	struct mptcp_subflow_context *subflow, *tmp;
+
+	mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+		struct inet_connection_sock *icsk;
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+		if ((ssk->sk_mark == 1 &&
+		     __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) ||
+		    (ssk->sk_mark == 2 &&
+		     __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic", TCP_CA_NAME_MAX)))
+			ctx->retval = -1;
+	}
+
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+	struct bpf_sock *sk = ctx->sk;
+	struct mptcp_sock *msk;
+
+	if (!sk || sk->protocol != IPPROTO_MPTCP)
+		return 1;
+
+	msk = bpf_core_cast(sk, struct mptcp_sock);
+	if (msk->pm.subflows != 1)
+		return 1;
+
+	if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
+		return _check_getsockopt_subflow_mark(msk, ctx);
+	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
+		return _check_getsockopt_subflow_cc(msk, ctx);
+
+	return 1;
+}
-- 
2.43.0
Re: [PATCH mptcp-next v5 1/3] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Matthieu Baerts 2 months, 3 weeks ago
Hi Geliang,

cc: -Martin (not to annoy him with MPTCP specific stuff)

On 29/08/2024 04:53, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
> mptcp socket.
> 
> mptcp_for_each_stubflow() and other helpers related to list_dentry are
> added into progs/mptcp_bpf.h.
> 
> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
> to inspect all the fields in a tcp_sock.

(...)

> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> index bc572e1d6df8..6a7cf71c2795 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> @@ -4,6 +4,7 @@
>  
>  /* vmlinux.h, bpf_helpers.h and other 'define' */
>  #include "bpf_tracing_net.h"
> +#include "mptcp_bpf.h"
>  
>  char _license[] SEC("license") = "GPL";
>  
> @@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>  
>  	return 1;
>  }
> +
> +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct mptcp_subflow_context));
> +
> +		if (ssk->sk_mark != ++i)
> +			ctx->retval = -1;
> +	}
> +
> +	return 1;
> +}
> +
> +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
> +{
> +	struct mptcp_subflow_context *subflow, *tmp;
> +
> +	mptcp_for_each_subflow_safe(msk, subflow, tmp) {

Out of curiosity, why do you need the '_safe()' helper here? Just to
check it works?

> +		struct inet_connection_sock *icsk;
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct mptcp_subflow_context));
> +		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
> +
> +		if ((ssk->sk_mark == 1 &&
> +		     __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) ||

In your previous version, you had a patch to set the cc on the other
subflow, not to change the value of 'getsockopt(TCP_CONGESTION)' seen by
the userspace. Why did you drop it?

> +		    (ssk->sk_mark == 2 &&
> +		     __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic", TCP_CA_NAME_MAX)))

You cannot check this one (or at least not like that): the default TCP
CC can be changed with a kernel config, it is not necessarily cubic.

I think it is fine to only check the other subflow.

> +			ctx->retval = -1;

Should you not mark it as an error if the mark is different from 1 or 2?
(Or maybe not, because that's covered by _check_getsockopt_subflow_mark)

> +	}
> +
> +	return 1;
> +}
> +
> +SEC("cgroup/getsockopt")
> +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> +{
> +	struct bpf_sock *sk = ctx->sk;
> +	struct mptcp_sock *msk;
> +
> +	if (!sk || sk->protocol != IPPROTO_MPTCP)
> +		return 1;

If this test fails, the check will not be done.

> +
> +	msk = bpf_core_cast(sk, struct mptcp_sock);
> +	if (msk->pm.subflows != 1)
> +		return 1;

Same here: if there is no extra subflow, the test is silently not done.
You change 'ctx->retval'.

Also, it might be good to print something somewhere everywhere you set
this 'ctx->retval' to -1, to be able to easily understand what went
wrong, no? Can we use bpf_printk() or something similar?

> +
> +	if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> +		return _check_getsockopt_subflow_mark(msk, ctx);
> +	if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
> +		return _check_getsockopt_subflow_cc(msk, ctx);
> +
> +	return 1;
> +}

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.
Re: [PATCH mptcp-next v5 1/3] selftests/bpf: Add getsockopt to inspect mptcp subflow
Posted by Geliang Tang 2 months, 3 weeks ago
Hi Matt,

On Thu, 2024-08-29 at 16:25 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> cc: -Martin (not to annoy him with MPTCP specific stuff)
> 
> On 29/08/2024 04:53, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > This patch adds a "cgroup/getsockopt" way to inspect the subflows
> > of a
> > mptcp socket.
> > 
> > mptcp_for_each_stubflow() and other helpers related to list_dentry
> > are
> > added into progs/mptcp_bpf.h.
> > 
> > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list
> > and use
> > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will
> > allow
> > to inspect all the fields in a tcp_sock.
> 
> (...)
> 
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > index bc572e1d6df8..6a7cf71c2795 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > @@ -4,6 +4,7 @@
> >  
> >  /* vmlinux.h, bpf_helpers.h and other 'define' */
> >  #include "bpf_tracing_net.h"
> > +#include "mptcp_bpf.h"
> >  
> >  char _license[] SEC("license") = "GPL";
> >  
> > @@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
> >  
> >   return 1;
> >  }
> > +
> > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > + struct mptcp_subflow_context *subflow;
> > + int i = 0;
> > +
> > + mptcp_for_each_subflow(msk, subflow) {
> > + struct sock *ssk;
> > +
> > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +    struct mptcp_subflow_context));
> > +
> > + if (ssk->sk_mark != ++i)
> > + ctx->retval = -1;
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk,
> > struct bpf_sockopt *ctx)
> > +{
> > + struct mptcp_subflow_context *subflow, *tmp;
> > +
> > + mptcp_for_each_subflow_safe(msk, subflow, tmp) {
> 
> Out of curiosity, why do you need the '_safe()' helper here? Just to
> check it works?
> 
> > + struct inet_connection_sock *icsk;
> > + struct sock *ssk;
> > +
> > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +    struct mptcp_subflow_context));
> > + icsk = bpf_core_cast(ssk, struct inet_connection_sock);
> > +
> > + if ((ssk->sk_mark == 1 &&
> > +      __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> > TCP_CA_NAME_MAX)) ||
> 
> In your previous version, you had a patch to set the cc on the other
> subflow, not to change the value of 'getsockopt(TCP_CONGESTION)' seen
> by
> the userspace. Why did you drop it?
> 
> > +     (ssk->sk_mark == 2 &&
> > +      __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic",
> > TCP_CA_NAME_MAX)))
> 
> You cannot check this one (or at least not like that): the default
> TCP
> CC can be changed with a kernel config, it is not necessarily cubic.
> 
> I think it is fine to only check the other subflow.
> 
> > + ctx->retval = -1;
> 
> Should you not mark it as an error if the mark is different from 1 or
> 2?
> (Or maybe not, because that's covered by
> _check_getsockopt_subflow_mark)
> 
> > + }
> > +
> > + return 1;
> > +}
> > +
> > +SEC("cgroup/getsockopt")
> > +int _getsockopt_subflow(struct bpf_sockopt *ctx)
> > +{
> > + struct bpf_sock *sk = ctx->sk;
> > + struct mptcp_sock *msk;
> > +
> > + if (!sk || sk->protocol != IPPROTO_MPTCP)
> > + return 1;
> 
> If this test fails, the check will not be done.
> 
> > +
> > + msk = bpf_core_cast(sk, struct mptcp_sock);
> > + if (msk->pm.subflows != 1)
> > + return 1;
> 
> Same here: if there is no extra subflow, the test is silently not
> done.
> You change 'ctx->retval'.
> 
> Also, it might be good to print something somewhere everywhere you
> set
> this 'ctx->retval' to -1, to be able to easily understand what went
> wrong, no? Can we use bpf_printk() or something similar?

getsockopt() is invoked by connect_to_fd() too, and msk->pm.subflows is
0 in this case. So in v6, I moved this "msk->pm.subflows" check into
_check_getsockopt_subflow_mark/cc.

Thanks,
-Geliang

> 
> > +
> > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
> > + return _check_getsockopt_subflow_mark(msk, ctx);
> > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
> > + return _check_getsockopt_subflow_cc(msk, ctx);
> > +
> > + return 1;
> > +}
> 
> Cheers,
> Matt