From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_core_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++
.../selftests/bpf/progs/mptcp_subflow.c | 69 +++++++++++++++++++
2 files changed, 96 insertions(+)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..92d5deed0214 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -4,9 +4,36 @@
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ cond_break, !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 2e28f4a215b5..70302477e326 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,10 +4,12 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
char cc[TCP_CA_NAME_MAX] = "reno";
+int pid;
/* Associate a subflow counter to each token */
struct {
@@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+
+ if (ssk->sk_mark != ++i) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx)
+{
+ struct mptcp_subflow_context *subflow;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark == 2 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) {
+ ctx->retval = -2;
+ break;
+ }
+ }
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int _getsockopt_subflow(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ struct mptcp_sock *msk;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ if (!sk || sk->protocol != IPPROTO_MPTCP ||
+ (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) &&
+ !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION)))
+ return 1;
+
+ msk = bpf_core_cast(sk, struct mptcp_sock);
+ if (msk->pm.subflows != 1) {
+ ctx->retval = -1;
+ return 1;
+ }
+
+ if (ctx->optname == SO_MARK)
+ return _check_getsockopt_subflow_mark(msk, ctx);
+ return _check_getsockopt_subflow_cc(msk, ctx);
+}
--
2.43.0
Hi Matt, On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds a "cgroup/getsockopt" way to inspect the subflows of > a > mptcp socket. > > mptcp_for_each_stubflow() and other helpers related to list_dentry > are > added into progs/mptcp_bpf.h. > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and > use > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > allow > to inspect all the fields in a tcp_sock. > > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++ > .../selftests/bpf/progs/mptcp_subflow.c | 69 > +++++++++++++++++++ > 2 files changed, 96 insertions(+) > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > index 782f36ed027e..92d5deed0214 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > @@ -4,9 +4,36 @@ > > #include <vmlinux.h> > #include <bpf/bpf_core_read.h> > +#include "bpf_experimental.h" > > #define MPTCP_SUBFLOWS_MAX 8 > > +static inline int list_is_head(const struct list_head *list, > + const struct list_head *head) > +{ > + return list == head; > +} > + > +#define list_entry(ptr, type, > member) \ > + container_of(ptr, type, member) > + > +#define list_first_entry(ptr, type, > member) \ > + list_entry((ptr)->next, type, member) > + > +#define list_next_entry(pos, > member) \ > + list_entry((pos)->member.next, typeof(*(pos)), member) > + > +#define list_entry_is_head(pos, head, > member) \ > + list_is_head(&pos->member, (head)) > + > +#define list_for_each_entry(pos, head, > member) \ > + for (pos = list_first_entry(head, typeof(*pos), > member); \ > + cond_break, !list_entry_is_head(pos, head, > member); \ > + pos = list_next_entry(pos, member)) > + > +#define mptcp_for_each_subflow(__msk, > __subflow) \ > + list_for_each_entry(__subflow, &((__msk)->conn_list), node) > + The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf: Add bpf_rr scheduler & test" needs be added into this commit: static __always_inline struct sock * mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) { return subflow->tcp_sock; } Otherwise, compiling errors occur. Thanks, -Geliang > extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context > *subflow, > bool scheduled) __ksym; > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > index 2e28f4a215b5..70302477e326 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > @@ -4,10 +4,12 @@ > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > #include "bpf_tracing_net.h" > +#include "mptcp_bpf.h" > > char _license[] SEC("license") = "GPL"; > > char cc[TCP_CA_NAME_MAX] = "reno"; > +int pid; > > /* Associate a subflow counter to each token */ > struct { > @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > return 1; > } > + > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, > struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow; > + int i = 0; > + > + mptcp_for_each_subflow(msk, subflow) { > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct > mptcp_subflow_context)); > + > + if (ssk->sk_mark != ++i) { > + ctx->retval = -2; > + break; > + } > + } > + > + return 1; > +} > + > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, > struct bpf_sockopt *ctx) > +{ > + struct mptcp_subflow_context *subflow; > + > + mptcp_for_each_subflow(msk, subflow) { > + struct inet_connection_sock *icsk; > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct > mptcp_subflow_context)); > + icsk = bpf_core_cast(ssk, struct > inet_connection_sock); > + > + if (ssk->sk_mark == 2 && > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, > TCP_CA_NAME_MAX)) { > + ctx->retval = -2; > + break; > + } > + } > + > + return 1; > +} > + > +SEC("cgroup/getsockopt") > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > +{ > + struct bpf_sock *sk = ctx->sk; > + struct mptcp_sock *msk; > + > + if (bpf_get_current_pid_tgid() >> 32 != pid) > + return 1; > + > + if (!sk || sk->protocol != IPPROTO_MPTCP || > + (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) > && > + !(ctx->level == SOL_TCP && ctx->optname == > TCP_CONGESTION))) > + return 1; > + > + msk = bpf_core_cast(sk, struct mptcp_sock); > + if (msk->pm.subflows != 1) { > + ctx->retval = -1; > + return 1; > + } > + > + if (ctx->optname == SO_MARK) > + return _check_getsockopt_subflow_mark(msk, ctx); > + return _check_getsockopt_subflow_cc(msk, ctx); > +}
Hi Geliang, On 05/09/2024 10:06, Geliang Tang wrote: > Hi Matt, > > On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote: >> From: Geliang Tang <tanggeliang@kylinos.cn> >> >> This patch adds a "cgroup/getsockopt" way to inspect the subflows of >> a >> mptcp socket. >> >> mptcp_for_each_stubflow() and other helpers related to list_dentry >> are >> added into progs/mptcp_bpf.h. >> >> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and >> use >> bpf_core_cast to cast a pointer to tcp_sock for readonly. It will >> allow >> to inspect all the fields in a tcp_sock. >> >> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org> >> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> >> --- >> tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++ >> .../selftests/bpf/progs/mptcp_subflow.c | 69 >> +++++++++++++++++++ >> 2 files changed, 96 insertions(+) >> >> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h >> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h >> index 782f36ed027e..92d5deed0214 100644 >> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h >> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h >> @@ -4,9 +4,36 @@ >> >> #include <vmlinux.h> >> #include <bpf/bpf_core_read.h> >> +#include "bpf_experimental.h" >> >> #define MPTCP_SUBFLOWS_MAX 8 >> >> +static inline int list_is_head(const struct list_head *list, >> + const struct list_head *head) >> +{ >> + return list == head; >> +} >> + >> +#define list_entry(ptr, type, >> member) \ >> + container_of(ptr, type, member) >> + >> +#define list_first_entry(ptr, type, >> member) \ >> + list_entry((ptr)->next, type, member) >> + >> +#define list_next_entry(pos, >> member) \ >> + list_entry((pos)->member.next, typeof(*(pos)), member) >> + >> +#define list_entry_is_head(pos, head, >> member) \ >> + list_is_head(&pos->member, (head)) >> + >> +#define list_for_each_entry(pos, head, >> member) \ >> + for (pos = list_first_entry(head, typeof(*pos), >> member); \ >> + cond_break, !list_entry_is_head(pos, head, >> member); \ >> + pos = list_next_entry(pos, member)) >> + >> +#define mptcp_for_each_subflow(__msk, >> __subflow) \ >> + list_for_each_entry(__subflow, &((__msk)->conn_list), node) >> + > > The helper mptcp_subflow_tcp_sock() added by the commit "selftests/bpf: > Add bpf_rr scheduler & test" needs be added into this commit: > > static __always_inline struct sock * > mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) > { > return subflow->tcp_sock; > } > > Otherwise, compiling errors occur. Thank you for the heads-up. Please note that the file doesn't exist in net-next, so we need to add it, and update the MAINTAINERS file. I'm fixing that. While at it, I'm also adding comments about where the helpers are coming from this in mptcp_bpf.h file, and the modifications you did on top. That should help for the maintenance. Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Matt, On Thu, 2024-09-05 at 16:06 +0800, Geliang Tang wrote: > Hi Matt, > > On Thu, 2024-09-05 at 10:26 +0800, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > of > > a Here should be "an mptcp socket", not "a mptcp socket". Please update this for me when merging it. Thanks, -Geliang > > mptcp socket. > > > > mptcp_for_each_stubflow() and other helpers related to list_dentry > > are > > added into progs/mptcp_bpf.h. > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > and > > use > > bpf_core_cast to cast a pointer to tcp_sock for readonly. It will > > allow > > to inspect all the fields in a tcp_sock. > > > > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org> > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > --- > > tools/testing/selftests/bpf/progs/mptcp_bpf.h | 27 ++++++++ > > .../selftests/bpf/progs/mptcp_subflow.c | 69 > > +++++++++++++++++++ > > 2 files changed, 96 insertions(+) > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > index 782f36ed027e..92d5deed0214 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > @@ -4,9 +4,36 @@ > > > > #include <vmlinux.h> > > #include <bpf/bpf_core_read.h> > > +#include "bpf_experimental.h" > > > > #define MPTCP_SUBFLOWS_MAX 8 > > > > +static inline int list_is_head(const struct list_head *list, > > + const struct list_head *head) > > +{ > > + return list == head; > > +} > > + > > +#define list_entry(ptr, type, > > member) \ > > + container_of(ptr, type, member) > > + > > +#define list_first_entry(ptr, type, > > member) \ > > + list_entry((ptr)->next, type, member) > > + > > +#define list_next_entry(pos, > > member) \ > > + list_entry((pos)->member.next, typeof(*(pos)), member) > > + > > +#define list_entry_is_head(pos, head, > > member) \ > > + list_is_head(&pos->member, (head)) > > + > > +#define list_for_each_entry(pos, head, > > member) \ > > + for (pos = list_first_entry(head, typeof(*pos), > > member); \ > > + cond_break, !list_entry_is_head(pos, head, > > member); \ > > + pos = list_next_entry(pos, member)) > > + > > +#define mptcp_for_each_subflow(__msk, > > __subflow) \ > > + list_for_each_entry(__subflow, &((__msk)->conn_list), > > node) > > + > > The helper mptcp_subflow_tcp_sock() added by the commit > "selftests/bpf: > Add bpf_rr scheduler & test" needs be added into this commit: > > static __always_inline struct sock * > mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) > { > return subflow->tcp_sock; > } > > Otherwise, compiling errors occur. > > Thanks, > -Geliang > > > extern void mptcp_subflow_set_scheduled(struct > > mptcp_subflow_context > > *subflow, > > bool scheduled) __ksym; > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > index 2e28f4a215b5..70302477e326 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > @@ -4,10 +4,12 @@ > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > #include "bpf_tracing_net.h" > > +#include "mptcp_bpf.h" > > > > char _license[] SEC("license") = "GPL"; > > > > char cc[TCP_CA_NAME_MAX] = "reno"; > > +int pid; > > > > /* Associate a subflow counter to each token */ > > struct { > > @@ -57,3 +59,70 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > return 1; > > } > > + > > +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow; > > + int i = 0; > > + > > + mptcp_for_each_subflow(msk, subflow) { > > + struct sock *ssk; > > + > > + ssk = > > mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct > > mptcp_subflow_context)); > > + > > + if (ssk->sk_mark != ++i) { > > + ctx->retval = -2; > > + break; > > + } > > + } > > + > > + return 1; > > +} > > + > > +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, > > struct bpf_sockopt *ctx) > > +{ > > + struct mptcp_subflow_context *subflow; > > + > > + mptcp_for_each_subflow(msk, subflow) { > > + struct inet_connection_sock *icsk; > > + struct sock *ssk; > > + > > + ssk = > > mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct > > mptcp_subflow_context)); > > + icsk = bpf_core_cast(ssk, struct > > inet_connection_sock); > > + > > + if (ssk->sk_mark == 2 && > > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, > > TCP_CA_NAME_MAX)) { > > + ctx->retval = -2; > > + break; > > + } > > + } > > + > > + return 1; > > +} > > + > > +SEC("cgroup/getsockopt") > > +int _getsockopt_subflow(struct bpf_sockopt *ctx) > > +{ > > + struct bpf_sock *sk = ctx->sk; > > + struct mptcp_sock *msk; > > + > > + if (bpf_get_current_pid_tgid() >> 32 != pid) > > + return 1; > > + > > + if (!sk || sk->protocol != IPPROTO_MPTCP || > > + (!(ctx->level == SOL_SOCKET && ctx->optname == > > SO_MARK) > > && > > + !(ctx->level == SOL_TCP && ctx->optname == > > TCP_CONGESTION))) > > + return 1; > > + > > + msk = bpf_core_cast(sk, struct mptcp_sock); > > + if (msk->pm.subflows != 1) { > > + ctx->retval = -1; > > + return 1; > > + } > > + > > + if (ctx->optname == SO_MARK) > > + return _check_getsockopt_subflow_mark(msk, ctx); > > + return _check_getsockopt_subflow_cc(msk, ctx); > > +} > >
© 2016 - 2024 Red Hat, Inc.