From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 +++++++++++
.../selftests/bpf/progs/mptcp_subflow.c | 62 +++++++++++++++++++
2 files changed, 97 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..d60e3c8f85a1 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,44 @@
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member), \
+ n = list_next_entry(pos, member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = n, n = list_next_entry(n, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
+ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index bc572e1d6df8..6a7cf71c2795 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
@@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i)
+ ctx->retval = -1;
+ }
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow, *tmp;
+
+ mptcp_for_each_subflow_safe(msk, subflow, tmp) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if ((ssk->sk_mark == 1 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) ||
+ (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic", TCP_CA_NAME_MAX)))
+ ctx->retval = -1;
+ }
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk || sk->protocol != IPPROTO_MPTCP)
+ return 1;
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.subflows != 1)
+ return 1;
+
+ if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
+ return _check_getsockopt_subflow_mark(msk, ctx);
+ if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
+ return _check_getsockopt_subflow_cc(msk, ctx);
+
+ return 1;
+}
--
2.43.0
Hi Geliang, cc: -Martin (not to annoy him with MPTCP specific stuff) On 29/08/2024 04:53, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds a "cgroup/getsockopt" way to inspect the subflows of a > mptcp socket. > > mptcp_for_each_stubflow() and other helpers related to list_dentry are > added into progs/mptcp_bpf.h. > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow > to inspect all the fields in a tcp_sock. (...) > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > index bc572e1d6df8..6a7cf71c2795 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > @@ -4,6 +4,7 @@ > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > #include "bpf_tracing_net.h" > +#include "mptcp_bpf.h" > > char _license[] SEC("license") = "GPL"; > > @@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > return 1; > } > + > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow; > + int i = 0; > + > + mptcp_for_each_subflow(msk, subflow) { > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct mptcp_subflow_context)); > + > + if (ssk->sk_mark != ++i) > + ctx->retval = -1; > + } > + > + return 1; > +} > + > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow, *tmp; > + > + mptcp_for_each_subflow_safe(msk, subflow, tmp) { Out of curiosity, why do you need the '_safe()' helper here? Just to check it works? > + struct inet_connection_sock *icsk; > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct mptcp_subflow_context)); > + icsk = bpf_core_cast(ssk, struct inet_connection_sock); > + > + if ((ssk->sk_mark == 1 && > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) || In your previous version, you had a patch to set the cc on the other subflow, not to change the value of 'getsockopt(TCP_CONGESTION)' seen by the userspace. Why did you drop it? > + (ssk->sk_mark == 2 && > + __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic", TCP_CA_NAME_MAX))) You cannot check this one (or at least not like that): the default TCP CC can be changed with a kernel config, it is not necessarily cubic. I think it is fine to only check the other subflow. > + ctx->retval = -1; Should you not mark it as an error if the mark is different from 1 or 2? (Or maybe not, because that's covered by _check_getsockopt_subflow_mark) > + } > + > + return 1; > +} > + > +SEC("cgroup/getsockopt") > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > +{ > + struct bpf_sock *sk = ctx->sk; > + struct mptcp_sock *msk; > + > + if (!sk || sk->protocol != IPPROTO_MPTCP) > + return 1; If this test fails, the check will not be done. > + > + msk = bpf_core_cast(sk, struct mptcp_sock); > + if (msk->pm.subflows != 1) > + return 1; Same here: if there is no extra subflow, the test is silently not done. You change 'ctx->retval'. Also, it might be good to print something somewhere everywhere you set this 'ctx->retval' to -1, to be able to easily understand what went wrong, no? Can we use bpf_printk() or something similar? > + > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > + return _check_getsockopt_subflow_mark(msk, ctx); > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) > + return _check_getsockopt_subflow_cc(msk, ctx); > + > + return 1; > +} Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Matt, On Thu, 2024-08-29 at 16:25 +0200, Matthieu Baerts wrote: > Hi Geliang, > > cc: -Martin (not to annoy him with MPTCP specific stuff) > > On 29/08/2024 04:53, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > of a > > mptcp socket. > > > > mptcp_for_each_stubflow() and other helpers related to list_dentry > > are > > added into progs/mptcp_bpf.h. > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > and use > > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > > allow > > to inspect all the fields in a tcp_sock. > > (...) > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > index bc572e1d6df8..6a7cf71c2795 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > @@ -4,6 +4,7 @@ > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > #include "bpf_tracing_net.h" > > +#include "mptcp_bpf.h" > > > > char _license[] SEC("license") = "GPL"; > > > > @@ -57,3 +58,64 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > return 1; > > } > > + > > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow; > > + int i = 0; > > + > > + mptcp_for_each_subflow(msk, subflow) { > > + struct sock *ssk; > > + > > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct mptcp_subflow_context)); > > + > > + if (ssk->sk_mark != ++i) > > + ctx->retval = -1; > > + } > > + > > + return 1; > > +} > > + > > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow, *tmp; > > + > > + mptcp_for_each_subflow_safe(msk, subflow, tmp) { > > Out of curiosity, why do you need the '_safe()' helper here? Just to > check it works? > > > + struct inet_connection_sock *icsk; > > + struct sock *ssk; > > + > > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct mptcp_subflow_context)); > > + icsk = bpf_core_cast(ssk, struct inet_connection_sock); > > + > > + if ((ssk->sk_mark == 1 && > > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, > > TCP_CA_NAME_MAX)) || > > In your previous version, you had a patch to set the cc on the other > subflow, not to change the value of 'getsockopt(TCP_CONGESTION)' seen > by > the userspace. Why did you drop it? > > > + (ssk->sk_mark == 2 && > > + __builtin_memcmp(icsk->icsk_ca_ops->name, "cubic", > > TCP_CA_NAME_MAX))) > > You cannot check this one (or at least not like that): the default > TCP > CC can be changed with a kernel config, it is not necessarily cubic. > > I think it is fine to only check the other subflow. > > > + ctx->retval = -1; > > Should you not mark it as an error if the mark is different from 1 or > 2? > (Or maybe not, because that's covered by > _check_getsockopt_subflow_mark) > > > + } > > + > > + return 1; > > +} > > + > > +SEC("cgroup/getsockopt") > > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > > +{ > > + struct bpf_sock *sk = ctx->sk; > > + struct mptcp_sock *msk; > > + > > + if (!sk || sk->protocol != IPPROTO_MPTCP) > > + return 1; > > If this test fails, the check will not be done. > > > + > > + msk = bpf_core_cast(sk, struct mptcp_sock); > > + if (msk->pm.subflows != 1) > > + return 1; > > Same here: if there is no extra subflow, the test is silently not > done. > You change 'ctx->retval'. > > Also, it might be good to print something somewhere everywhere you > set > this 'ctx->retval' to -1, to be able to easily understand what went > wrong, no? Can we use bpf_printk() or something similar? getsockopt() is invoked by connect_to_fd() too, and msk->pm.subflows is 0 in this case. So in v6, I moved this "msk->pm.subflows" check into _check_getsockopt_subflow_mark/cc. Thanks, -Geliang > > > + > > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > > + return _check_getsockopt_subflow_mark(msk, ctx); > > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) > > + return _check_getsockopt_subflow_cc(msk, ctx); > > + > > + return 1; > > +} > > Cheers, > Matt
© 2016 - 2024 Red Hat, Inc.