From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++
.../selftests/bpf/progs/mptcp_subflow.c | 90 +++++++++++++++++++
2 files changed, 117 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..92d5deed0214 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,36 @@
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 2e28f4a215b5..1053a795eb43 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
@@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+static int _check_getsockopt_subflow_mark(struct bpf_sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+ int i = 0;
+
+ if (sk->protocol != IPPROTO_MPTCP) {
+ bpf_printk("MPTCP Subflow: unexpected protocol %u", sk->protocol);
+ return -1;
+ }
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.subflows != 1) {
+ bpf_printk("MPTCP Subflow: expected 1 extra subflows, got %u",
+ msk->pm.subflows);
+ return -1;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i) {
+ bpf_printk("MPTCP Subflow: expected %d mark, got %d",
+ i, ssk->sk_mark);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int _check_getsockopt_subflow_cc(struct bpf_sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk;
+
+ if (sk->protocol != IPPROTO_MPTCP) {
+ bpf_printk("MPTCP Subflow: unexpected protocol %u", sk->protocol);
+ return -1;
+ }
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.subflows != 1) {
+ bpf_printk("MPTCP Subflow: expected 1 extra subflows, got %u",
+ msk->pm.subflows);
+ return -1;
+ }
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
+ bpf_printk("MPTCP Subflow: expected %s cc, got %s",
+ cc, icsk->icsk_ca_ops->name);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+
+ if (!sk) {
+ bpf_printk("MPTCP Subflow: unexpected socket, sk is NULL");
+ ctx->retval = -1;
+ return 1;
+ }
+
+ if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK)
+ ctx->retval = _check_getsockopt_subflow_mark(sk);
+ else if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)
+ ctx->retval = _check_getsockopt_subflow_cc(sk);
+
+ return 1;
+}
--
2.43.0
Hi Geliang, Thank you for the new version. On 03/09/2024 10:04, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds a "cgroup/getsockopt" way to inspect the subflows of a > mptcp socket. > > mptcp_for_each_stubflow() and other helpers related to list_dentry are > added into progs/mptcp_bpf.h. > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow > to inspect all the fields in a tcp_sock. (...) > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > index 2e28f4a215b5..1053a795eb43 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > @@ -4,6 +4,7 @@ > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > #include "bpf_tracing_net.h" > +#include "mptcp_bpf.h" > > char _license[] SEC("license") = "GPL"; > > @@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > return 1; > } > + > +static int _check_getsockopt_subflow_mark(struct bpf_sock *sk) > +{ > + struct mptcp_subflow_context *subflow; > + struct mptcp_sock *msk; > + int i = 0; > + > + if (sk->protocol != IPPROTO_MPTCP) { > + bpf_printk("MPTCP Subflow: unexpected protocol %u", sk->protocol); > + return -1; > + } Out of curiosity, why did you move this code here and in _check_getsockopt_subflow_cc()? I'm just surprised because recently, you tried to reduce the amount of duplicated code :) Or is it because this program is used in other tests, where it should not be? Do you need to add a restriction per PID, like you did in mptcpify.c? (...) > +SEC("cgroup/getsockopt") > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > +{ > + struct bpf_sock *sk = ctx->sk; > + > + if (!sk) { > + bpf_printk("MPTCP Subflow: unexpected socket, sk is NULL"); > + ctx->retval = -1; > + return 1; > + } > + > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > + ctx->retval = _check_getsockopt_subflow_mark(sk); > + else if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) > + ctx->retval = _check_getsockopt_subflow_cc(sk); As I mentioned in my previous reply: can ctx->retval be < 0 here, before calling _check_getsockopt_subflow_XXX()? If yes or not sure, maybe safer to do: err = _check_getsockopt_subflow_XXX() (...) if (err) ctx->retval = -1; > + > + return 1; > +} Cheers, Matt -- Sponsored by the NGI0 Core fund.
On Tue, 2024-09-03 at 11:11 +0200, Matthieu Baerts wrote: > Hi Geliang, > > Thank you for the new version. > > On 03/09/2024 10:04, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > of a > > mptcp socket. > > > > mptcp_for_each_stubflow() and other helpers related to list_dentry > > are > > added into progs/mptcp_bpf.h. > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > and use > > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > > allow > > to inspect all the fields in a tcp_sock. > > (...) > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > index 2e28f4a215b5..1053a795eb43 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > @@ -4,6 +4,7 @@ > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > #include "bpf_tracing_net.h" > > +#include "mptcp_bpf.h" > > > > char _license[] SEC("license") = "GPL"; > > > > @@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > return 1; > > } > > + > > +static int _check_getsockopt_subflow_mark(struct bpf_sock *sk) > > +{ > > + struct mptcp_subflow_context *subflow; > > + struct mptcp_sock *msk; > > + int i = 0; > > + > > + if (sk->protocol != IPPROTO_MPTCP) { > > + bpf_printk("MPTCP Subflow: unexpected protocol > > %u", sk->protocol); > > + return -1; > > + } > > Out of curiosity, why did you move this code here and in > _check_getsockopt_subflow_cc()? > > I'm just surprised because recently, you tried to reduce the amount > of > duplicated code :) > > Or is it because this program is used in other tests, where it should > not be? Do you need to add a restriction per PID, like you did in > mptcpify.c? I don't know, I'll add "per PID" in next version. > > (...) > > > +SEC("cgroup/getsockopt") > > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > > +{ > > + struct bpf_sock *sk = ctx->sk; > > + > > + if (!sk) { > > + bpf_printk("MPTCP Subflow: unexpected socket, sk > > is NULL"); > > + ctx->retval = -1; > > + return 1; > > + } > > + > > + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > > + ctx->retval = _check_getsockopt_subflow_mark(sk); > > + else if (ctx->level == SOL_TCP && ctx->optname == > > TCP_CONGESTION) > > + ctx->retval = _check_getsockopt_subflow_cc(sk); > > As I mentioned in my previous reply: can ctx->retval be < 0 here, > before > calling _check_getsockopt_subflow_XXX()? > If yes or not sure, maybe safer to do: > > err = _check_getsockopt_subflow_XXX() > (...) > > if (err) > ctx->retval = -1; else ctx->retval = 0; Otherwise: 86: (63) *(u32 *)(r6 +0) = r1 dereference of modified ctx ptr R6 off=36 disallowed processed 169 insns (limit 1000000) max_states_per_insn 2 total_states 15 peak_states 15 mark_read 4 -- END PROG LOAD LOG -- > > > + > > + return 1; > > +} > > Cheers, > Matt
On 04/09/2024 12:09, Geliang Tang wrote: > On Tue, 2024-09-03 at 11:11 +0200, Matthieu Baerts wrote: >> On 03/09/2024 10:04, Geliang Tang wrote: (...) >>> +SEC("cgroup/getsockopt") >>> +int _getsockopt_subflow(struct bpf_sockopt *ctx) >>> +{ >>> + struct bpf_sock *sk = ctx->sk; >>> + >>> + if (!sk) { >>> + bpf_printk("MPTCP Subflow: unexpected socket, sk >>> is NULL"); >>> + ctx->retval = -1; >>> + return 1; >>> + } >>> + >>> + if (ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) >>> + ctx->retval = _check_getsockopt_subflow_mark(sk); >>> + else if (ctx->level == SOL_TCP && ctx->optname == >>> TCP_CONGESTION) >>> + ctx->retval = _check_getsockopt_subflow_cc(sk); >> >> As I mentioned in my previous reply: can ctx->retval be < 0 here, >> before >> calling _check_getsockopt_subflow_XXX()? >> If yes or not sure, maybe safer to do: >> >> err = _check_getsockopt_subflow_XXX() >> (...) >> >> if (err) >> ctx->retval = -1; > else > ctx->retval = 0; > > Otherwise: > > 86: (63) *(u32 *)(r6 +0) = r1 > dereference of modified ctx ptr R6 off=36 disallowed > processed 169 insns (limit 1000000) max_states_per_insn 2 total_states > 15 peak_states 15 mark_read 4 > -- END PROG LOAD LOG -- Sorry, I'm not familiar with these errors: do you mean that 'retval' always need to be set? But in a previous version you sent, 'retval' was only set to -1 in case of error, no? The error message doesn't seem to say that. Are you sure it is not because 'err' was not initialised, or something like that? Cheers, Matt -- Sponsored by the NGI0 Core fund.
On 03/09/2024 11:11, Matthieu Baerts wrote: > On 03/09/2024 10:04, Geliang Tang wrote: >> From: Geliang Tang <tanggeliang@kylinos.cn> >> >> This patch adds a "cgroup/getsockopt" way to inspect the subflows of a >> mptcp socket. >> >> mptcp_for_each_stubflow() and other helpers related to list_dentry are >> added into progs/mptcp_bpf.h. >> >> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use >> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow >> to inspect all the fields in a tcp_sock. > > (...) > >> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >> index 2e28f4a215b5..1053a795eb43 100644 >> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c >> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >> @@ -4,6 +4,7 @@ >> >> /* vmlinux.h, bpf_helpers.h and other 'define' */ >> #include "bpf_tracing_net.h" >> +#include "mptcp_bpf.h" >> >> char _license[] SEC("license") = "GPL"; >> >> @@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops) >> >> return 1; >> } >> + >> +static int _check_getsockopt_subflow_mark(struct bpf_sock *sk) >> +{ >> + struct mptcp_subflow_context *subflow; >> + struct mptcp_sock *msk; >> + int i = 0; >> + >> + if (sk->protocol != IPPROTO_MPTCP) { >> + bpf_printk("MPTCP Subflow: unexpected protocol %u", sk->protocol); >> + return -1; >> + } > > Out of curiosity, why did you move this code here and in > _check_getsockopt_subflow_cc()? > > I'm just surprised because recently, you tried to reduce the amount of > duplicated code :) While at it: it sounds better to have unique error messages, to know where was the error. Here, a few messages are the same, e.g. protocol, number of subflows, sk is null. Do not hesitate to add something to differentiate them: level & optname, a string prefix, the line number, etc. Cheers, Matt -- Sponsored by the NGI0 Core fund.
On Tue, 2024-09-03 at 11:16 +0200, Matthieu Baerts wrote: > On 03/09/2024 11:11, Matthieu Baerts wrote: > > On 03/09/2024 10:04, Geliang Tang wrote: > > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > > of a > > > mptcp socket. > > > > > > mptcp_for_each_stubflow() and other helpers related to > > > list_dentry are > > > added into progs/mptcp_bpf.h. > > > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > > and use > > > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > > > allow > > > to inspect all the fields in a tcp_sock. > > > > (...) > > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > index 2e28f4a215b5..1053a795eb43 100644 > > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > @@ -4,6 +4,7 @@ > > > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > > #include "bpf_tracing_net.h" > > > +#include "mptcp_bpf.h" > > > > > > char _license[] SEC("license") = "GPL"; > > > > > > @@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > > > return 1; > > > } > > > + > > > +static int _check_getsockopt_subflow_mark(struct bpf_sock *sk) > > > +{ > > > + struct mptcp_subflow_context *subflow; > > > + struct mptcp_sock *msk; > > > + int i = 0; > > > + > > > + if (sk->protocol != IPPROTO_MPTCP) { > > > + bpf_printk("MPTCP Subflow: unexpected protocol > > > %u", sk->protocol); > > > + return -1; > > > + } > > > > Out of curiosity, why did you move this code here and in > > _check_getsockopt_subflow_cc()? > > > > I'm just surprised because recently, you tried to reduce the amount > > of > > duplicated code :) > > While at it: it sounds better to have unique error messages, to know > where was the error. Here, a few messages are the same, e.g. > protocol, > number of subflows, sk is null. Do not hesitate to add something to > differentiate them: level & optname, a string prefix, the line > number, etc. No, I think we should drop all these "bpf_printk" messages. I checked all other BPF selftests programs, no one use these error messages to debug like this. Just "return 1" is enough: if (!sk || sk->protocol != IPPROTO_MPTCP) return 1; If a BPF program fail, its own load log will show: libbpf: prog '_getsockopt_subflow': -- BEGIN PROG LOAD LOG -- 0: R1=ctx() R10=fp0 ; int _getsockopt_subflow(struct bpf_sockopt *ctx) @ mptcp_subflow.c:136 0: (bf) r9 = r1 ; R1=ctx() R9_w=ctx() ; struct bpf_sock *sk = ctx->sk; @ mptcp_subflow.c:138 1: (79) r7 = *(u64 *)(r9 +0) ; R7_w=sock() R9_w=ctx() ; if (bpf_get_current_pid_tgid() >> 32 != pid) @ mptcp_subflow.c:141 2: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() No need to add any line number by ourselves. > > Cheers, > Matt
Hi Geliang, Thank you for your reply! On 04/09/2024 11:55, Geliang Tang wrote: > On Tue, 2024-09-03 at 11:16 +0200, Matthieu Baerts wrote: >> On 03/09/2024 11:11, Matthieu Baerts wrote: >>> On 03/09/2024 10:04, Geliang Tang wrote: >>>> From: Geliang Tang <tanggeliang@kylinos.cn> >>>> >>>> This patch adds a "cgroup/getsockopt" way to inspect the subflows >>>> of a >>>> mptcp socket. >>>> >>>> mptcp_for_each_stubflow() and other helpers related to >>>> list_dentry are >>>> added into progs/mptcp_bpf.h. >>>> >>>> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list >>>> and use >>>> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will >>>> allow >>>> to inspect all the fields in a tcp_sock. >>> >>> (...) >>> >>>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>>> b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>>> index 2e28f4a215b5..1053a795eb43 100644 >>>> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>>> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>>> @@ -4,6 +4,7 @@ >>>> >>>> /* vmlinux.h, bpf_helpers.h and other 'define' */ >>>> #include "bpf_tracing_net.h" >>>> +#include "mptcp_bpf.h" >>>> >>>> char _license[] SEC("license") = "GPL"; >>>> >>>> @@ -57,3 +58,92 @@ int mptcp_subflow(struct bpf_sock_ops *skops) >>>> >>>> return 1; >>>> } >>>> + >>>> +static int _check_getsockopt_subflow_mark(struct bpf_sock *sk) >>>> +{ >>>> + struct mptcp_subflow_context *subflow; >>>> + struct mptcp_sock *msk; >>>> + int i = 0; >>>> + >>>> + if (sk->protocol != IPPROTO_MPTCP) { >>>> + bpf_printk("MPTCP Subflow: unexpected protocol >>>> %u", sk->protocol); >>>> + return -1; >>>> + } >>> >>> Out of curiosity, why did you move this code here and in >>> _check_getsockopt_subflow_cc()? >>> >>> I'm just surprised because recently, you tried to reduce the amount >>> of >>> duplicated code :) >> >> While at it: it sounds better to have unique error messages, to know >> where was the error. Here, a few messages are the same, e.g. >> protocol, >> number of subflows, sk is null. Do not hesitate to add something to >> differentiate them: level & optname, a string prefix, the line >> number, etc. > > No, I think we should drop all these "bpf_printk" messages. I checked > all other BPF selftests programs, no one use these error messages to > debug like this. Just "return 1" is enough: > > if (!sk || sk->protocol != IPPROTO_MPTCP) > return 1; > > If a BPF program fail, its own load log will show: > > libbpf: prog '_getsockopt_subflow': -- BEGIN PROG LOAD LOG -- > 0: R1=ctx() R10=fp0 > ; int _getsockopt_subflow(struct bpf_sockopt *ctx) @ > mptcp_subflow.c:136 > 0: (bf) r9 = r1 ; R1=ctx() R9_w=ctx() > ; struct bpf_sock *sk = ctx->sk; @ mptcp_subflow.c:138 > 1: (79) r7 = *(u64 *)(r9 +0) ; R7_w=sock() R9_w=ctx() > ; if (bpf_get_current_pid_tgid() >> 32 != pid) @ mptcp_subflow.c:141 > 2: (85) call bpf_get_current_pid_tgid#14 ; R0_w=scalar() > > No need to add any line number by ourselves. Good, that looks OK. Just to be sure it is readable: here the BPF program failed because the PID was not the expected one, right? Or is it another error? Do you mind sharing the output if the CC is not "foo" for example please? Cheers, Matt -- Sponsored by the NGI0 Core fund.
© 2016 - 2024 Red Hat, Inc.