From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 35 ++++++++++
.../selftests/bpf/progs/mptcp_subflow.c | 69 +++++++++++++++++++
2 files changed, 104 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..d60e3c8f85a1 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,44 @@
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member), \
+ n = list_next_entry(pos, member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = n, n = list_next_entry(n, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \
+ list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 2e28f4a215b5..18396532f016 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
@@ -57,3 +58,71 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ if (msk->pm.subflows != 1) {
+ bpf_printk("subflows=%u", msk->pm.subflows);
+ ctx->retval = -1;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i)
+ ctx->retval = -1;
+ }
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+
+ if (msk->pm.subflows != 1) {
+ bpf_printk("subflows=%u", msk->pm.subflows);
+ ctx->retval = -1;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
+ ctx->retval = -1;
+ }
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk || sk->protocol != IPPROTO_MPTCP) {
+ ctx->retval = -1;
+ return 1;
+ }
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
+ return _check_getsockopt_subflow_mark(msk, ctx);
+ if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
+ return _check_getsockopt_subflow_cc(msk, ctx);
+
+ return 1;
+}
--
2.43.0
Hi Geliang, Thank you for the new version. I still have one opened question about what is printed in case of error: do we have enough info to debug issues? Please see below. On 30/08/2024 08:55, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds a "cgroup/getsockopt" way to inspect the subflows of a > mptcp socket. > > mptcp_for_each_stubflow() and other helpers related to list_dentry are > added into progs/mptcp_bpf.h. > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow > to inspect all the fields in a tcp_sock. (...) > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > index 2e28f4a215b5..18396532f016 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > @@ -4,6 +4,7 @@ > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > #include "bpf_tracing_net.h" > +#include "mptcp_bpf.h" > > char _license[] SEC("license") = "GPL"; > > @@ -57,3 +58,71 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > return 1; > } > + > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow; > + int i = 0; > + > + if (msk->pm.subflows != 1) { > + bpf_printk("subflows=%u", msk->pm.subflows); I think the message should be a bit longer, e.g. "MPTCP Subflow: expected 1 extra subflows, got %u" It looks like you didn't reply to my question from v5: if there is an issue with the verifications that are done in BPF, are these bpf_printk printed somewhere? e.g. if you force it to fail here, do you see the new message in the logs of the BPF selftests? If the test is unstable, we need to have a way understand where is the issue exactly, not somewhere where we set 'ctx->retval = -1'. The best would be to have a comprehensive message like what I'm suggesting here. If not, we should at least have a unique retval or errno somehow. > + ctx->retval = -1; > + } > + > + mptcp_for_each_subflow(msk, subflow) { > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct mptcp_subflow_context)); > + > + if (ssk->sk_mark != ++i) > + ctx->retval = -1; Please add an error message here, e.g. MPTCP Subflow: expected %d mark, got %d > + } > + > + return 1; (no need to change if you prefer, but your helper is always returning -1, maybe it doesn't need to return anything?) > +} > + > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow; > + > + if (msk->pm.subflows != 1) { > + bpf_printk("subflows=%u", msk->pm.subflows); Same here. > + ctx->retval = -1; > + } > + > + mptcp_for_each_subflow(msk, subflow) { > + struct inet_connection_sock *icsk; > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct mptcp_subflow_context)); > + icsk = bpf_core_cast(ssk, struct inet_connection_sock); > + > + if (ssk->sk_mark == 2 && > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) > + ctx->retval = -1; Same here. > + } > + > + return 1; > +} > + > +SEC("cgroup/getsockopt") > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > +{ > + struct bpf_sock *sk = ctx->sk; > + struct mptcp_sock *msk; > + > + if (!sk || sk->protocol != IPPROTO_MPTCP) { > + ctx->retval = -1; Same here. > + return 1; > + } > + > + msk = bpf_core_cast(sk, struct mptcp_sock); Just to be sure: this cast can never fail, and no need to check if 'msk' is NULL, right? > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > + return _check_getsockopt_subflow_mark(msk, ctx); > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) > + return _check_getsockopt_subflow_cc(msk, ctx); > + > + return 1; > +} Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Matt, On Mon, 2024-09-02 at 18:47 +0200, Matthieu Baerts wrote: > Hi Geliang, > > Thank you for the new version. I still have one opened question about > what is printed in case of error: do we have enough info to debug > issues? Please see below. > > On 30/08/2024 08:55, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > of a > > mptcp socket. > > > > mptcp_for_each_stubflow() and other helpers related to list_dentry > > are > > added into progs/mptcp_bpf.h. > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > and use > > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > > allow > > to inspect all the fields in a tcp_sock. > > (...) > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > index 2e28f4a215b5..18396532f016 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > @@ -4,6 +4,7 @@ > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > #include "bpf_tracing_net.h" > > +#include "mptcp_bpf.h" > > > > char _license[] SEC("license") = "GPL"; > > > > @@ -57,3 +58,71 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > return 1; > > } > > + > > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow; > > + int i = 0; > > + > > + if (msk->pm.subflows != 1) { > > + bpf_printk("subflows=%u", msk->pm.subflows); > > I think the message should be a bit longer, e.g. > > "MPTCP Subflow: expected 1 extra subflows, got %u" > > It looks like you didn't reply to my question from v5: if there is an > issue with the verifications that are done in BPF, are these > bpf_printk > printed somewhere? e.g. if you force it to fail here, do you see the > new > message in the logs of the BPF selftests? Yes, bpf_printk works. These logs are found in /sys/kernel/debug/tracing/trace if force it to fail here: test_progs-75858 [003] ...11 2338.270065: bpf_trace_printk: MPTCP Subflow: expected 1 extra subflows, got 1 > > If the test is unstable, we need to have a way understand where is > the > issue exactly, not somewhere where we set 'ctx->retval = -1'. The > best > would be to have a comprehensive message like what I'm suggesting > here. > If not, we should at least have a unique retval or errno somehow. > > > + ctx->retval = -1; > > + } > > + > > + mptcp_for_each_subflow(msk, subflow) { > > + struct sock *ssk; > > + > > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct mptcp_subflow_context)); > > + > > + if (ssk->sk_mark != ++i) > > + ctx->retval = -1; > > Please add an error message here, e.g. > > MPTCP Subflow: expected %d mark, got %d > > > + } > > + > > + return 1; > > (no need to change if you prefer, but your helper is always returning > -1, maybe it doesn't need to return anything?) Set the return value to ctx->retval in v7: ctx->retval = _check_getsockopt_subflow_mark() > > > +} > > + > > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow; > > + > > + if (msk->pm.subflows != 1) { > > + bpf_printk("subflows=%u", msk->pm.subflows); > > Same here. > > > + ctx->retval = -1; > > + } > > + > > + mptcp_for_each_subflow(msk, subflow) { > > + struct inet_connection_sock *icsk; > > + struct sock *ssk; > > + > > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct mptcp_subflow_context)); > > + icsk = bpf_core_cast(ssk, struct inet_connection_sock); > > + > > + if (ssk->sk_mark == 2 && > > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, > > TCP_CA_NAME_MAX)) > > + ctx->retval = -1; > > Same here. > > > + } > > + > > + return 1; > > +} > > + > > +SEC("cgroup/getsockopt") > > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > > +{ > > + struct bpf_sock *sk = ctx->sk; > > + struct mptcp_sock *msk; > > + > > + if (!sk || sk->protocol != IPPROTO_MPTCP) { > > + ctx->retval = -1; > > Same here. > > > + return 1; > > + } > > + > > + msk = bpf_core_cast(sk, struct mptcp_sock); > > Just to be sure: this cast can never fail, and no need to check if > 'msk' > is NULL, right? Yes, no need to check if msk is NULL. Thanks, -Geliang > > > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > > + return _check_getsockopt_subflow_mark(msk, ctx); > > + if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) > > + return _check_getsockopt_subflow_cc(msk, ctx); > > + > > + return 1; > > +} > > Cheers, > Matt
Hi Geliang, On 03/09/2024 10:00, Geliang Tang wrote: > On Mon, 2024-09-02 at 18:47 +0200, Matthieu Baerts wrote: >> On 30/08/2024 08:55, Geliang Tang wrote: (...) >> It looks like you didn't reply to my question from v5: if there is an >> issue with the verifications that are done in BPF, are these >> bpf_printk >> printed somewhere? e.g. if you force it to fail here, do you see the >> new >> message in the logs of the BPF selftests? > > Yes, bpf_printk works. These logs are found in > /sys/kernel/debug/tracing/trace if force it to fail here: > > test_progs-75858 [003] ...11 2338.270065: bpf_trace_printk: > MPTCP Subflow: expected 1 extra subflows, got 1 Good, but is it visible when launching the test or did you have to do something else to see them? Said differently: if such issue happens on the CI side, will we see the logs? Or do we need to modify the CI to get them in case of error? >> If the test is unstable, we need to have a way understand where is >> the >> issue exactly, not somewhere where we set 'ctx->retval = -1'. The >> best >> would be to have a comprehensive message like what I'm suggesting >> here. >> If not, we should at least have a unique retval or errno somehow. >> >>> + ctx->retval = -1; >>> + } >>> + >>> + mptcp_for_each_subflow(msk, subflow) { >>> + struct sock *ssk; >>> + >>> + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, >>> + struct mptcp_subflow_context)); >>> + >>> + if (ssk->sk_mark != ++i) >>> + ctx->retval = -1; >> >> Please add an error message here, e.g. >> >> MPTCP Subflow: expected %d mark, got %d >> >>> + } >>> + >>> + return 1; >> >> (no need to change if you prefer, but your helper is always returning >> -1, maybe it doesn't need to return anything?) > > Set the return value to ctx->retval in v7: > > ctx->retval = _check_getsockopt_subflow_mark() Can 'ctx->retval' be != 0 before _getsockopt_subflow()? Not to override an error if there was already one before. (...) >>> + return 1; >>> + } >>> + >>> + msk = bpf_core_cast(sk, struct mptcp_sock); >> >> Just to be sure: this cast can never fail, and no need to check if >> 'msk' >> is NULL, right? > > Yes, no need to check if msk is NULL. Good, thank you for your reply! Cheers, Matt -- Sponsored by the NGI0 Core fund.
© 2016 - 2024 Red Hat, Inc.