From: Geliang Tang <tanggeliang@kylinos.cn>
This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.
mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.
Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.
Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
.../testing/selftests/bpf/prog_tests/mptcp.c | 11 +++++++
tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++
.../selftests/bpf/progs/mptcp_subflow.c | 32 +++++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 69fdcb28249d..2178db94f764 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -383,6 +383,7 @@ static void run_subflow(char *new)
{
int server_fd, client_fd, err;
char cc[TCP_CA_NAME_MAX];
+ unsigned int mark;
socklen_t len;
server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
@@ -407,6 +408,10 @@ static void run_subflow(char *new)
ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");
+ len = sizeof(mark);
+ err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
+ ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
+
close(client_fd);
fail:
close(server_fd);
@@ -417,6 +422,7 @@ static void test_subflow(void)
int cgroup_fd, prog_fd, err;
struct mptcp_subflow *skel;
struct nstoken *nstoken;
+ struct bpf_link *link;
cgroup_fd = test__join_cgroup("/mptcp_subflow");
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow"))
@@ -442,6 +448,10 @@ static void test_subflow(void)
if (endpoint_init("subflow") < 0)
goto close_netns;
+ link = bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(link, "getsockopt prog"))
+ goto close_netns;
+
run_subflow(skel->data->cc);
close_netns:
@@ -450,6 +460,7 @@ static void test_subflow(void)
mptcp_subflow__destroy(skel);
close_cgroup:
close(cgroup_fd);
+ bpf_link__destroy(link);
}
static struct nstoken *sched_init(char *flags, char *sched)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..2086170e7379 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -7,6 +7,32 @@
#define MPTCP_SUBFLOWS_MAX 8
+static inline int list_is_head(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list == head;
+}
+
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member) \
+ list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ !list_entry_is_head(pos, head, member); \
+ pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow) \
+ list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
bool scheduled) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index bc572e1d6df8..c41418ab8db4 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
/* vmlinux.h, bpf_helpers.h and other 'define' */
#include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
char _license[] SEC("license") = "GPL";
@@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
return 1;
}
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock);
+ struct mptcp_subflow_context *subflow;
+ int i = 0;
+
+ if (!msk || ctx->level != SOL_SOCKET || ctx->optname != SO_MARK)
+ return 1;
+
+ mptcp_for_each_subflow(msk, subflow) {
+ struct inet_connection_sock *icsk;
+ struct sock *ssk;
+
+ ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+ struct mptcp_subflow_context));
+ icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+ if (ssk->sk_mark != i + 1)
+ ctx->retval = -1;
+ if (ssk->sk_mark == 1 &&
+ __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
+ ctx->retval = -1;
+
+ if (i++ >= MPTCP_SUBFLOWS_MAX)
+ break;
+ }
+
+ return 1;
+}
--
2.43.0
Hi Geliang, Thank you for looking at that! On 20/08/2024 10:44, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > This patch adds a "cgroup/getsockopt" way to inspect the subflows of a > mptcp socket. > > mptcp_for_each_stubflow() and other helpers related to list_dentry are > added into progs/mptcp_bpf.h. > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use > bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will allow Do you mean bpf_core_cast() instead of bpf_rdonly_cast()? Or is it indirectly used? > to inspect all the fields in a tcp_sock. > > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > .../testing/selftests/bpf/prog_tests/mptcp.c | 11 +++++++ > tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++ > .../selftests/bpf/progs/mptcp_subflow.c | 32 +++++++++++++++++++ > 3 files changed, 69 insertions(+) > > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c > index 69fdcb28249d..2178db94f764 100644 > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c > @@ -383,6 +383,7 @@ static void run_subflow(char *new) > { > int server_fd, client_fd, err; > char cc[TCP_CA_NAME_MAX]; > + unsigned int mark; > socklen_t len; > > server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); > @@ -407,6 +408,10 @@ static void run_subflow(char *new) > ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); > ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc"); I guess the next steps is to remove these 'ss_search()', right? > + len = sizeof(mark); > + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len); > + ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"); > + > close(client_fd); > fail: > close(server_fd); > @@ -417,6 +422,7 @@ static void test_subflow(void) > int cgroup_fd, prog_fd, err; > struct mptcp_subflow *skel; > struct nstoken *nstoken; > + struct bpf_link *link; > > cgroup_fd = test__join_cgroup("/mptcp_subflow"); > if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow")) > @@ -442,6 +448,10 @@ static void test_subflow(void) > if (endpoint_init("subflow") < 0) > goto close_netns; > > + link = bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); > + if (!ASSERT_OK_PTR(link, "getsockopt prog")) I don't know how it works: is this going to fail if the getsockopt program set 'ctx->retval = -1'? > + goto close_netns; > + > run_subflow(skel->data->cc); > > close_netns: > @@ -450,6 +460,7 @@ static void test_subflow(void) > mptcp_subflow__destroy(skel); > close_cgroup: > close(cgroup_fd); > + bpf_link__destroy(link); > } > > static struct nstoken *sched_init(char *flags, char *sched) > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > index 782f36ed027e..2086170e7379 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > @@ -7,6 +7,32 @@ > > #define MPTCP_SUBFLOWS_MAX 8 > > +static inline int list_is_head(const struct list_head *list, > + const struct list_head *head) > +{ > + return list == head; > +} > + > +#define list_entry(ptr, type, member) \ > + container_of(ptr, type, member) > + > +#define list_first_entry(ptr, type, member) \ > + list_entry((ptr)->next, type, member) > + > +#define list_next_entry(pos, member) \ > + list_entry((pos)->member.next, typeof(*(pos)), member) > + > +#define list_entry_is_head(pos, head, member) \ > + list_is_head(&pos->member, (head)) > + > +#define list_for_each_entry(pos, head, member) \ > + for (pos = list_first_entry(head, typeof(*pos), member); \ > + !list_entry_is_head(pos, head, member); \ > + pos = list_next_entry(pos, member)) Out of curiosity, are these generic helpers not defined elsewhere? Is it OK not to use the '_safe' version where the deletion of elements is supported? > + > +#define mptcp_for_each_subflow(__msk, __subflow) \ > + list_for_each_entry(__subflow, &((__msk)->conn_list), node) > + > extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, > bool scheduled) __ksym; > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > index bc572e1d6df8..c41418ab8db4 100644 > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > @@ -4,6 +4,7 @@ > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > #include "bpf_tracing_net.h" > +#include "mptcp_bpf.h" > > char _license[] SEC("license") = "GPL"; > > @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > return 1; > } > + > +SEC("cgroup/getsockopt") > +int _getsockopt(struct bpf_sockopt *ctx) This name seems too generic while what is done here is specific to the 'subflow' test. Maybe: _check_getsockopt_subflows()? (or _check_getsockopt_subflows_mark() and _check_getsockopt_subflow_cc() see below) > +{ > + struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock); What happens if the 'sk' is not an 'msk'? Does it check that the sk is indeed an MPTCP one? (how?) > + struct mptcp_subflow_context *subflow; > + int i = 0; > + > + if (!msk || ctx->level != SOL_SOCKET || ctx->optname != SO_MARK) > + return 1; Would it not be clearer to split the two checks? One dedicated to the mark, when getsockopt(SO_MARK) is used, and one for the CC, when the getsockopt(TCP_CONGESTION) is used? By doing that, we would know which one had an issue, if any, not just "something wrong with getsockopt()"? > + > + mptcp_for_each_subflow(msk, subflow) { (might be better to use this helper in our WIP MPTCP BPF packets scheduler examples, instead of converting them to a fixed array) > + struct inet_connection_sock *icsk; > + struct sock *ssk; > + > + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > + struct mptcp_subflow_context)); > + icsk = bpf_core_cast(ssk, struct inet_connection_sock); > + > + if (ssk->sk_mark != i + 1) > + ctx->retval = -1; > + if (ssk->sk_mark == 1 && > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) > + ctx->retval = -1; > + > + if (i++ >= MPTCP_SUBFLOWS_MAX) > + break; I guess this part is needed for the verifier, right? Somehow related to "cond_break" explained in this link? https://lwn.net/Articles/964381/ > + } > + > + return 1; > +} Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Matt, Thanks for the review. On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: > Hi Geliang, > > Thank you for looking at that! > > On 20/08/2024 10:44, Geliang Tang wrote: > > From: Geliang Tang <tanggeliang@kylinos.cn> > > > > This patch adds a "cgroup/getsockopt" way to inspect the subflows > > of a > > mptcp socket. > > > > mptcp_for_each_stubflow() and other helpers related to list_dentry > > are > > added into progs/mptcp_bpf.h. > > > > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list > > and use > > bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will > > allow > > Do you mean bpf_core_cast() instead of bpf_rdonly_cast()? Or is it > indirectly used? Yes, it should be "bpf_core_cast". Updated in v2. > > > to inspect all the fields in a tcp_sock. > > > > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org> > > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > > --- > > .../testing/selftests/bpf/prog_tests/mptcp.c | 11 +++++++ > > tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++ > > .../selftests/bpf/progs/mptcp_subflow.c | 32 > > +++++++++++++++++++ > > 3 files changed, 69 insertions(+) > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c > > b/tools/testing/selftests/bpf/prog_tests/mptcp.c > > index 69fdcb28249d..2178db94f764 100644 > > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c > > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c > > @@ -383,6 +383,7 @@ static void run_subflow(char *new) > > { > > int server_fd, client_fd, err; > > char cc[TCP_CA_NAME_MAX]; > > + unsigned int mark; > > socklen_t len; > > > > server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, > > 0); > > @@ -407,6 +408,10 @@ static void run_subflow(char *new) > > ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); > > ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc"); > > I guess the next steps is to remove these 'ss_search()', right? We can keep it for double checks. > > > + len = sizeof(mark); > > + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, > > &len); > > + ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"); > > + > > close(client_fd); > > fail: > > close(server_fd); > > @@ -417,6 +422,7 @@ static void test_subflow(void) > > int cgroup_fd, prog_fd, err; > > struct mptcp_subflow *skel; > > struct nstoken *nstoken; > > + struct bpf_link *link; > > > > cgroup_fd = test__join_cgroup("/mptcp_subflow"); > > if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: > > mptcp_subflow")) > > @@ -442,6 +448,10 @@ static void test_subflow(void) > > if (endpoint_init("subflow") < 0) > > goto close_netns; > > > > + link = bpf_program__attach_cgroup(skel->progs._getsockopt, > > cgroup_fd); > > + if (!ASSERT_OK_PTR(link, "getsockopt prog")) > > I don't know how it works: is this going to fail if the getsockopt > program set 'ctx->retval = -1'? Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if the getsockopt program set 'ctx->retval = -1'. > > > + goto close_netns; > > + > > run_subflow(skel->data->cc); > > > > close_netns: > > @@ -450,6 +460,7 @@ static void test_subflow(void) > > mptcp_subflow__destroy(skel); > > close_cgroup: > > close(cgroup_fd); > > + bpf_link__destroy(link); > > } > > > > static struct nstoken *sched_init(char *flags, char *sched) > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > index 782f36ed027e..2086170e7379 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > @@ -7,6 +7,32 @@ > > > > #define MPTCP_SUBFLOWS_MAX 8 > > > > +static inline int list_is_head(const struct list_head *list, > > + const struct list_head *head) > > +{ > > + return list == head; > > +} > > + > > +#define list_entry(ptr, type, > > member) \ > > + container_of(ptr, type, member) > > + > > +#define list_first_entry(ptr, type, > > member) \ > > + list_entry((ptr)->next, type, member) > > + > > +#define list_next_entry(pos, > > member) \ > > + list_entry((pos)->member.next, typeof(*(pos)), member) > > + > > +#define list_entry_is_head(pos, head, > > member) \ > > + list_is_head(&pos->member, (head)) > > + > > +#define list_for_each_entry(pos, head, > > member) \ > > + for (pos = list_first_entry(head, typeof(*pos), > > member); \ > > + !list_entry_is_head(pos, head, > > member); \ > > + pos = list_next_entry(pos, member)) > > Out of curiosity, are these generic helpers not defined elsewhere? Is > it > OK not to use the '_safe' version where the deletion of elements is > supported? It's read only in this test, so list_for_each_entry is enough. I added the '_safe' version in v2 too for future use. > > > + > > +#define mptcp_for_each_subflow(__msk, > > __subflow) \ > > + list_for_each_entry(__subflow, &((__msk)->conn_list), > > node) > > + > > extern void mptcp_subflow_set_scheduled(struct > > mptcp_subflow_context *subflow, > > bool scheduled) __ksym; > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > index bc572e1d6df8..c41418ab8db4 100644 > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > @@ -4,6 +4,7 @@ > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > #include "bpf_tracing_net.h" > > +#include "mptcp_bpf.h" > > > > char _license[] SEC("license") = "GPL"; > > > > @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops) > > > > return 1; > > } > > + > > +SEC("cgroup/getsockopt") > > +int _getsockopt(struct bpf_sockopt *ctx) > > This name seems too generic while what is done here is specific to > the > 'subflow' test. Maybe: _check_getsockopt_subflows()? Changed it to _getsockopt_subflow in v2. > > (or _check_getsockopt_subflows_mark() and > _check_getsockopt_subflow_cc() > see below) > > > +{ > > + struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct > > mptcp_sock); > > What happens if the 'sk' is not an 'msk'? > Does it check that the sk is indeed an MPTCP one? (how?) Also check msk->token in v2. > > > + struct mptcp_subflow_context *subflow; > > + int i = 0; > > + > > + if (!msk || ctx->level != SOL_SOCKET || ctx->optname != > > SO_MARK) > > + return 1; > > Would it not be clearer to split the two checks? One dedicated to the > mark, when getsockopt(SO_MARK) is used, and one for the CC, when the > getsockopt(TCP_CONGESTION) is used? By doing that, we would know > which > one had an issue, if any, not just "something wrong with > getsockopt()"? Done in v2. > > > + > > + mptcp_for_each_subflow(msk, subflow) { > > (might be better to use this helper in our WIP MPTCP BPF packets > scheduler examples, instead of converting them to a fixed array) Yes, but there are still some access permission issues that need to be resolved. > > > + struct inet_connection_sock *icsk; > > + struct sock *ssk; > > + > > + ssk = > > mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > + struct > > mptcp_subflow_context)); > > + icsk = bpf_core_cast(ssk, struct > > inet_connection_sock); > > + > > + if (ssk->sk_mark != i + 1) > > + ctx->retval = -1; > > + if (ssk->sk_mark == 1 && > > + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, > > TCP_CA_NAME_MAX)) > > + ctx->retval = -1; > > + > > + if (i++ >= MPTCP_SUBFLOWS_MAX) > > + break; > > I guess this part is needed for the verifier, right? > Somehow related to "cond_break" explained in this link? > > https://lwn.net/Articles/964381/ > Change this as "cond_break" in v2. Regards, -Geliang > > > + } > > + > > + return 1; > > +} > > Cheers, > Matt
Hi Geliang, Thank you for your reply! On 21/08/2024 10:00, Geliang Tang wrote: > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: >> On 20/08/2024 10:44, Geliang Tang wrote: (...) >>> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c >>> b/tools/testing/selftests/bpf/prog_tests/mptcp.c >>> index 69fdcb28249d..2178db94f764 100644 >>> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c >>> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c >>> @@ -383,6 +383,7 @@ static void run_subflow(char *new) >>> { >>> int server_fd, client_fd, err; >>> char cc[TCP_CA_NAME_MAX]; >>> + unsigned int mark; >>> socklen_t len; >>> >>> server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, >>> 0); >>> @@ -407,6 +408,10 @@ static void run_subflow(char *new) >>> ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); >>> ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc"); >> >> I guess the next steps is to remove these 'ss_search()', right? > > We can keep it for double checks. I would not mind, even if 'ss | grep' is used in other BPF tests, but I understood from Martin he prefers not to depend on external tools as much as possible. It should indeed ease the maintenance. >>> + len = sizeof(mark); >>> + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, >>> &len); >>> + ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"); >>> + >>> close(client_fd); >>> fail: >>> close(server_fd); >>> @@ -417,6 +422,7 @@ static void test_subflow(void) >>> int cgroup_fd, prog_fd, err; >>> struct mptcp_subflow *skel; >>> struct nstoken *nstoken; >>> + struct bpf_link *link; >>> >>> cgroup_fd = test__join_cgroup("/mptcp_subflow"); >>> if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: >>> mptcp_subflow")) >>> @@ -442,6 +448,10 @@ static void test_subflow(void) >>> if (endpoint_init("subflow") < 0) >>> goto close_netns; >>> >>> + link = bpf_program__attach_cgroup(skel->progs._getsockopt, >>> cgroup_fd); >>> + if (!ASSERT_OK_PTR(link, "getsockopt prog")) >> >> I don't know how it works: is this going to fail if the getsockopt >> program set 'ctx->retval = -1'? > > Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if the > getsockopt program set 'ctx->retval = -1'. Good, thank you! >>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h >>> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h >>> index 782f36ed027e..2086170e7379 100644 >>> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h >>> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h >>> @@ -7,6 +7,32 @@ >>> >>> #define MPTCP_SUBFLOWS_MAX 8 >>> >>> +static inline int list_is_head(const struct list_head *list, >>> + const struct list_head *head) >>> +{ >>> + return list == head; >>> +} >>> + >>> +#define list_entry(ptr, type, >>> member) \ >>> + container_of(ptr, type, member) >>> + >>> +#define list_first_entry(ptr, type, >>> member) \ >>> + list_entry((ptr)->next, type, member) >>> + >>> +#define list_next_entry(pos, >>> member) \ >>> + list_entry((pos)->member.next, typeof(*(pos)), member) >>> + >>> +#define list_entry_is_head(pos, head, >>> member) \ >>> + list_is_head(&pos->member, (head)) >>> + >>> +#define list_for_each_entry(pos, head, >>> member) \ >>> + for (pos = list_first_entry(head, typeof(*pos), >>> member); \ >>> + !list_entry_is_head(pos, head, >>> member); \ >>> + pos = list_next_entry(pos, member)) >> >> Out of curiosity, are these generic helpers not defined elsewhere? Is >> it >> OK not to use the '_safe' version where the deletion of elements is >> supported? > > It's read only in this test, so list_for_each_entry is enough. I added > the '_safe' version in v2 too for future use. OK! (but I don't think you defined list_for_each_entry_safe(). Or is it defined elsewhere?) >>> + >>> +#define mptcp_for_each_subflow(__msk, >>> __subflow) \ >>> + list_for_each_entry(__subflow, &((__msk)->conn_list), >>> node) >>> + >>> extern void mptcp_subflow_set_scheduled(struct >>> mptcp_subflow_context *subflow, >>> bool scheduled) __ksym; >>> >>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>> b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>> index bc572e1d6df8..c41418ab8db4 100644 >>> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c >>> @@ -4,6 +4,7 @@ >>> >>> /* vmlinux.h, bpf_helpers.h and other 'define' */ >>> #include "bpf_tracing_net.h" >>> +#include "mptcp_bpf.h" >>> >>> char _license[] SEC("license") = "GPL"; >>> >>> @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops) >>> >>> return 1; >>> } >>> + >>> +SEC("cgroup/getsockopt") >>> +int _getsockopt(struct bpf_sockopt *ctx) >> >> This name seems too generic while what is done here is specific to >> the >> 'subflow' test. Maybe: _check_getsockopt_subflows()? > > Changed it to _getsockopt_subflow in v2. > >> >> (or _check_getsockopt_subflows_mark() and >> _check_getsockopt_subflow_cc() >> see below) >> >>> +{ >>> + struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct >>> mptcp_sock); >> >> What happens if the 'sk' is not an 'msk'? >> Does it check that the sk is indeed an MPTCP one? (how?) > > Also check msk->token in v2. Mmh, I'm not sure if this is enough: the offset of 'token' in the msk structure could point to something unrelated but non-null in another socket structure, or out of bound. I guess BPF is doing extra checks not to crash here. But maybe that's OK because it is only reading stuff? >>> + struct mptcp_subflow_context *subflow; >>> + int i = 0; >>> + >>> + if (!msk || ctx->level != SOL_SOCKET || ctx->optname != >>> SO_MARK) >>> + return 1; >> >> Would it not be clearer to split the two checks? One dedicated to the >> mark, when getsockopt(SO_MARK) is used, and one for the CC, when the >> getsockopt(TCP_CONGESTION) is used? By doing that, we would know >> which >> one had an issue, if any, not just "something wrong with >> getsockopt()"? > > Done in v2. Thanks! >>> + >>> + mptcp_for_each_subflow(msk, subflow) { >> >> (might be better to use this helper in our WIP MPTCP BPF packets >> scheduler examples, instead of converting them to a fixed array) > > Yes, but there are still some access permission issues that need to be > resolved. OK, because structures cannot be modified I suppose. >> >>> + struct inet_connection_sock *icsk; >>> + struct sock *ssk; >>> + >>> + ssk = >>> mptcp_subflow_tcp_sock(bpf_core_cast(subflow, >>> + struct >>> mptcp_subflow_context)); >>> + icsk = bpf_core_cast(ssk, struct >>> inet_connection_sock); >>> + >>> + if (ssk->sk_mark != i + 1) >>> + ctx->retval = -1; >>> + if (ssk->sk_mark == 1 && >>> + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, >>> TCP_CA_NAME_MAX)) >>> + ctx->retval = -1; >>> + >>> + if (i++ >= MPTCP_SUBFLOWS_MAX) >>> + break; >> >> I guess this part is needed for the verifier, right? >> Somehow related to "cond_break" explained in this link? >> >> https://lwn.net/Articles/964381/ >> > > Change this as "cond_break" in v2. Note that you could also modify list_for_each_entry() to add this 'cond_break' (+bpf_experimental.h?), not to have to deal with that each time mptcp_for_each_subflow is used. Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Martin, Matt, On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote: > Hi Geliang, > > Thank you for your reply! > > On 21/08/2024 10:00, Geliang Tang wrote: > > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: > > > On 20/08/2024 10:44, Geliang Tang wrote: > > (...) > > > > > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c > > > > b/tools/testing/selftests/bpf/prog_tests/mptcp.c > > > > index 69fdcb28249d..2178db94f764 100644 > > > > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c > > > > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c > > > > @@ -383,6 +383,7 @@ static void run_subflow(char *new) > > > > { > > > > int server_fd, client_fd, err; > > > > char cc[TCP_CA_NAME_MAX]; > > > > + unsigned int mark; > > > > socklen_t len; > > > > > > > > server_fd = start_mptcp_server(AF_INET, ADDR_1, > > > > PORT_1, > > > > 0); > > > > @@ -407,6 +408,10 @@ static void run_subflow(char *new) > > > > ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); > > > > ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default > > > > cc"); > > > > > > I guess the next steps is to remove these 'ss_search()', right? > > > > We can keep it for double checks. > > I would not mind, even if 'ss | grep' is used in other BPF tests, but > I > understood from Martin he prefers not to depend on external tools as > much as possible. It should indeed ease the maintenance. > > > > > + len = sizeof(mark); > > > > + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, > > > > &mark, > > > > &len); > > > > + ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)"); > > > > + > > > > close(client_fd); > > > > fail: > > > > close(server_fd); > > > > @@ -417,6 +422,7 @@ static void test_subflow(void) > > > > int cgroup_fd, prog_fd, err; > > > > struct mptcp_subflow *skel; > > > > struct nstoken *nstoken; > > > > + struct bpf_link *link; > > > > > > > > cgroup_fd = test__join_cgroup("/mptcp_subflow"); > > > > if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: > > > > mptcp_subflow")) > > > > @@ -442,6 +448,10 @@ static void test_subflow(void) > > > > if (endpoint_init("subflow") < 0) > > > > goto close_netns; > > > > > > > > + link = bpf_program__attach_cgroup(skel- > > > > >progs._getsockopt, > > > > cgroup_fd); > > > > + if (!ASSERT_OK_PTR(link, "getsockopt prog")) > > > > > > I don't know how it works: is this going to fail if the > > > getsockopt > > > program set 'ctx->retval = -1'? > > > > Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if > > the > > getsockopt program set 'ctx->retval = -1'. > > Good, thank you! > > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > > > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > > > index 782f36ed027e..2086170e7379 100644 > > > > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h > > > > @@ -7,6 +7,32 @@ > > > > > > > > #define MPTCP_SUBFLOWS_MAX 8 > > > > > > > > +static inline int list_is_head(const struct list_head *list, > > > > + const struct list_head *head) > > > > +{ > > > > + return list == head; > > > > +} > > > > + > > > > +#define list_entry(ptr, type, > > > > member) \ > > > > + container_of(ptr, type, member) > > > > + > > > > +#define list_first_entry(ptr, type, > > > > member) \ > > > > + list_entry((ptr)->next, type, member) > > > > + > > > > +#define list_next_entry(pos, > > > > member) \ > > > > + list_entry((pos)->member.next, typeof(*(pos)), member) > > > > + > > > > +#define list_entry_is_head(pos, head, > > > > member) \ > > > > + list_is_head(&pos->member, (head)) > > > > + > > > > +#define list_for_each_entry(pos, head, > > > > member) \ > > > > + for (pos = list_first_entry(head, typeof(*pos), > > > > member); \ > > > > + !list_entry_is_head(pos, head, > > > > member); \ > > > > + pos = list_next_entry(pos, member)) > > > > > > Out of curiosity, are these generic helpers not defined > > > elsewhere? Is > > > it > > > OK not to use the '_safe' version where the deletion of elements > > > is > > > supported? > > > > It's read only in this test, so list_for_each_entry is enough. I > > added > > the '_safe' version in v2 too for future use. > > OK! (but I don't think you defined list_for_each_entry_safe(). Or is > it > defined elsewhere?) > > > > > + > > > > +#define mptcp_for_each_subflow(__msk, > > > > __subflow) \ > > > > + list_for_each_entry(__subflow, &((__msk)->conn_list), > > > > node) > > > > + > > > > extern void mptcp_subflow_set_scheduled(struct > > > > mptcp_subflow_context *subflow, > > > > bool scheduled) > > > > __ksym; > > > > > > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > > index bc572e1d6df8..c41418ab8db4 100644 > > > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c > > > > @@ -4,6 +4,7 @@ > > > > > > > > /* vmlinux.h, bpf_helpers.h and other 'define' */ > > > > #include "bpf_tracing_net.h" > > > > +#include "mptcp_bpf.h" > > > > > > > > char _license[] SEC("license") = "GPL"; > > > > > > > > @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops > > > > *skops) > > > > > > > > return 1; > > > > } > > > > + > > > > +SEC("cgroup/getsockopt") > > > > +int _getsockopt(struct bpf_sockopt *ctx) > > > > > > This name seems too generic while what is done here is specific > > > to > > > the > > > 'subflow' test. Maybe: _check_getsockopt_subflows()? > > > > Changed it to _getsockopt_subflow in v2. > > > > > > > > (or _check_getsockopt_subflows_mark() and > > > _check_getsockopt_subflow_cc() > > > see below) > > > > > > > +{ > > > > + struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct > > > > mptcp_sock); > > > > > > What happens if the 'sk' is not an 'msk'? > > > Does it check that the sk is indeed an MPTCP one? (how?) > > > > Also check msk->token in v2. > > Mmh, I'm not sure if this is enough: the offset of 'token' in the msk > structure could point to something unrelated but non-null in another > socket structure, or out of bound. I guess BPF is doing extra checks > not > to crash here. But maybe that's OK because it is only reading stuff? > > > > > + struct mptcp_subflow_context *subflow; > > > > + int i = 0; > > > > + > > > > + if (!msk || ctx->level != SOL_SOCKET || ctx->optname > > > > != > > > > SO_MARK) > > > > + return 1; > > > > > > Would it not be clearer to split the two checks? One dedicated to > > > the > > > mark, when getsockopt(SO_MARK) is used, and one for the CC, when > > > the > > > getsockopt(TCP_CONGESTION) is used? By doing that, we would know > > > which > > > one had an issue, if any, not just "something wrong with > > > getsockopt()"? > > > > Done in v2. > > Thanks! > > > > > + > > > > + mptcp_for_each_subflow(msk, subflow) { > > > > > > (might be better to use this helper in our WIP MPTCP BPF packets > > > scheduler examples, instead of converting them to a fixed array) > > > > Yes, but there are still some access permission issues that need to > > be > > resolved. > > OK, because structures cannot be modified I suppose. No, it seems the subflow cast by bpf_core_cast() can't be passed to a kernel function, regardless of whether this function modifies the subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error occurs. For example, mptcp_subflow_active() is a kernel function, and pass the subflow to it in progs/mptcp_bpf_first.c like this: ''' ... ... extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym; ... ... SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { struct mptcp_subflow_context *subflow, *tmp; mptcp_for_each_subflow(msk, tmp) { subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); if (!mptcp_subflow_active(subflow)) continue; } return 0; } ''' An "arg#0 is untrusted_ptr_ expected ptr_" error occurs: ''' ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 21: (e5) may_goto pc+1 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 22: (05) goto pc-14 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() R7=0 11: (bf) r1 = r6 ; R1_w=ptr_list_head() R6_w=ptr_list_head() 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() R8=trusted_ptr_mptcp_sock(off=2488) ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ mptcp_bpf_first.c:28 14: (bf) r1 = r6 ; R1_w=ptr_list_head() R6=ptr_list_head() 15: (18) r2 = 0x6d14 ; R2_w=27924 17: (85) call bpf_rdonly_cast#159867 ; R0_w=untrusted_ptr_mptcp_subflow_context() ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29 18: (bf) r1 = r0 ; R0_w=untrusted_ptr_mptcp_subflow_context() R1_w=untrusted_ptr_mptcp_subflow_context() 19: (85) call mptcp_subflow_active#111397 arg#0 is untrusted_ptr_ expected ptr_ or socket processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 2 -- END PROG LOAD LOG -- ''' How can I fix this? I need your advice. Thanks, -Geliang > > > > > > > > + struct inet_connection_sock *icsk; > > > > + struct sock *ssk; > > > > + > > > > + ssk = > > > > mptcp_subflow_tcp_sock(bpf_core_cast(subflow, > > > > + > > > > struct > > > > mptcp_subflow_context)); > > > > + icsk = bpf_core_cast(ssk, struct > > > > inet_connection_sock); > > > > + > > > > + if (ssk->sk_mark != i + 1) > > > > + ctx->retval = -1; > > > > + if (ssk->sk_mark == 1 && > > > > + __builtin_memcmp(icsk->icsk_ca_ops->name, > > > > cc, > > > > TCP_CA_NAME_MAX)) > > > > + ctx->retval = -1; > > > > + > > > > + if (i++ >= MPTCP_SUBFLOWS_MAX) > > > > + break; > > > > > > I guess this part is needed for the verifier, right? > > > Somehow related to "cond_break" explained in this link? > > > > > > https://lwn.net/Articles/964381/ > > > > > > > Change this as "cond_break" in v2. > > Note that you could also modify list_for_each_entry() to add this > 'cond_break' (+bpf_experimental.h?), not to have to deal with that > each > time mptcp_for_each_subflow is used. > > Cheers, > Matt
Hi Geliang, On 26/08/2024 04:57, Geliang Tang wrote: > On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote: >> On 21/08/2024 10:00, Geliang Tang wrote: >>> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: >>>> On 20/08/2024 10:44, Geliang Tang wrote: (...) >>>>> + mptcp_for_each_subflow(msk, subflow) { >>>> >>>> (might be better to use this helper in our WIP MPTCP BPF packets >>>> scheduler examples, instead of converting them to a fixed array) >>> >>> Yes, but there are still some access permission issues that need to >>> be >>> resolved. >> >> OK, because structures cannot be modified I suppose. > > No, it seems the subflow cast by bpf_core_cast() can't be passed to a > kernel function, regardless of whether this function modifies the > subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error > occurs. > > For example, mptcp_subflow_active() is a kernel function, and pass the > subflow to it in progs/mptcp_bpf_first.c like this: > > ''' > ... ... > extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) > __ksym; > > ... ... > SEC("struct_ops") > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > struct mptcp_sched_data *data) > { > struct mptcp_subflow_context *subflow, *tmp; > > mptcp_for_each_subflow(msk, tmp) { > subflow = bpf_core_cast(tmp, struct > mptcp_subflow_context); (Why do you need to cast "tmp" (struct mptcp_subflow_context *) in the same type of pointer? I don't think it changes anything for the error you got (see below), but it looks strange.) > if (!mptcp_subflow_active(subflow)) > continue; > } > return 0; > } > > ''' > > An "arg#0 is untrusted_ptr_ expected ptr_" error occurs: > > ''' > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > 21: (e5) may_goto pc+1 > 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 > R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > 22: (05) goto pc-14 > 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() > 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() R7=0 > 11: (bf) r1 = r6 ; R1_w=ptr_list_head() > R6_w=ptr_list_head() > 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 > 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() > R8=trusted_ptr_mptcp_sock(off=2488) > ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ > mptcp_bpf_first.c:28 > 14: (bf) r1 = r6 ; R1_w=ptr_list_head() > R6=ptr_list_head() > 15: (18) r2 = 0x6d14 ; R2_w=27924 > 17: (85) call bpf_rdonly_cast#159867 ; > R0_w=untrusted_ptr_mptcp_subflow_context() > ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29 > 18: (bf) r1 = r0 ; > R0_w=untrusted_ptr_mptcp_subflow_context() > R1_w=untrusted_ptr_mptcp_subflow_context() > 19: (85) call mptcp_subflow_active#111397 > arg#0 is untrusted_ptr_ expected ptr_ or socket > processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2 > peak_states 2 mark_read 2 > -- END PROG LOAD LOG -- > ''' > > How can I fix this? I need your advice. I'm not an expert in this, but I guess it means you cannot use 'mptcp_subflow_active()', because it can modify the 'subflow' structure that you got with bpf_core_cast() for a read-only usage. I guess it means we cannot iterate through the list and modify items from the list "directly" with BPF. Except if there is something else we can use, and I don't know about (which is very likely), we might have to continue extracting the subflows into an array of a fixed size. Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Matt, Martin, On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote: > Hi Geliang, > > On 26/08/2024 04:57, Geliang Tang wrote: > > On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote: > > > On 21/08/2024 10:00, Geliang Tang wrote: > > > > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: > > > > > On 20/08/2024 10:44, Geliang Tang wrote: > > (...) > > > > > > > + mptcp_for_each_subflow(msk, subflow) { > > > > > > > > > > (might be better to use this helper in our WIP MPTCP BPF > > > > > packets > > > > > scheduler examples, instead of converting them to a fixed > > > > > array) > > > > > > > > Yes, but there are still some access permission issues that > > > > need to > > > > be > > > > resolved. > > > > > > OK, because structures cannot be modified I suppose. > > > > No, it seems the subflow cast by bpf_core_cast() can't be passed to > > a > > kernel function, regardless of whether this function modifies the > > subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error > > occurs. > > > > For example, mptcp_subflow_active() is a kernel function, and pass > > the > > subflow to it in progs/mptcp_bpf_first.c like this: > > > > ''' > > ... ... > > extern bool mptcp_subflow_active(struct mptcp_subflow_context > > *subflow) > > __ksym; > > > > ... ... > > SEC("struct_ops") > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > > struct mptcp_sched_data *data) > > { > > struct mptcp_subflow_context *subflow, *tmp; > > > > mptcp_for_each_subflow(msk, tmp) { > > subflow = bpf_core_cast(tmp, struct > > mptcp_subflow_context); > > (Why do you need to cast "tmp" (struct mptcp_subflow_context *) in > the > same type of pointer? I don't think it changes anything for the error > you got (see below), but it looks strange.) We must do the cast, otherwise, an "access beyond struct list_head" occurs: ; token = subflow->token; @ mptcp_subflow.c:92 13: (61) r4 = *(u32 *)(r1 +524) access beyond struct list_head at off 524 size 4 See Martin's comment in [1]. [1] https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/ > > > if (!mptcp_subflow_active(subflow)) > > continue; > > } > > return 0; > > } > > > > ''' > > > > An "arg#0 is untrusted_ptr_ expected ptr_" error occurs: > > > > ''' > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > > 21: (e5) may_goto pc+1 > > 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 > > R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > > 22: (05) goto pc-14 > > 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() > > 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() R7=0 > > 11: (bf) r1 = r6 ; R1_w=ptr_list_head() > > R6_w=ptr_list_head() > > 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 > > 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() > > R8=trusted_ptr_mptcp_sock(off=2488) > > ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ > > mptcp_bpf_first.c:28 > > 14: (bf) r1 = r6 ; R1_w=ptr_list_head() > > R6=ptr_list_head() > > 15: (18) r2 = 0x6d14 ; R2_w=27924 > > 17: (85) call bpf_rdonly_cast#159867 ; > > R0_w=untrusted_ptr_mptcp_subflow_context() > > ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29 > > 18: (bf) r1 = r0 ; > > R0_w=untrusted_ptr_mptcp_subflow_context() > > R1_w=untrusted_ptr_mptcp_subflow_context() > > 19: (85) call mptcp_subflow_active#111397 > > arg#0 is untrusted_ptr_ expected ptr_ or socket > > processed 23 insns (limit 1000000) max_states_per_insn 0 > > total_states 2 > > peak_states 2 mark_read 2 > > -- END PROG LOAD LOG -- > > ''' > > > > How can I fix this? I need your advice. > > I'm not an expert in this, but I guess it means you cannot use > 'mptcp_subflow_active()', because it can modify the 'subflow' > structure > that you got with bpf_core_cast() for a read-only usage. A read-only function will get the same error. I added a read-only function mptcp_subflow_get_scheduled() for testing: bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow) { return subflow->scheduled; } And invoke it from BPF in mptcp_for_each_subflow() loop: int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { struct mptcp_subflow_context *subflow, *tmp; mptcp_for_each_subflow(msk, tmp) { subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); mptcp_subflow_get_scheduled(subflow); } return 0; } The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs. Hope Martin can give us a solution for this issue. Thanks, -Geliang > > I guess it means we cannot iterate through the list and modify items > from the list "directly" with BPF. Except if there is something else > we > can use, and I don't know about (which is very likely), we might have > to > continue extracting the subflows into an array of a fixed size. > > Cheers, > Matt
On 8/26/24 2:24 AM, Geliang Tang wrote: >> I'm not an expert in this, but I guess it means you cannot use >> 'mptcp_subflow_active()', because it can modify the 'subflow' >> structure >> that you got with bpf_core_cast() for a read-only usage. > > A read-only function will get the same error. > > I added a read-only function mptcp_subflow_get_scheduled() for testing: > > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow) > { > return subflow->scheduled; > } > > And invoke it from BPF in mptcp_for_each_subflow() loop: > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > struct mptcp_sched_data *data) > { > struct mptcp_subflow_context *subflow, *tmp; > > mptcp_for_each_subflow(msk, tmp) { > subflow = bpf_core_cast(tmp, struct > mptcp_subflow_context); > mptcp_subflow_get_scheduled(subflow); > } > return 0; > } > > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs. > > Hope Martin can give us a solution for this issue. I don't know the context for this list walking + modify-by-kfunc usage, so the following could be a grain of salt. It seems like fitting the bpf_iter use case. Take a look at some recent bpf_iter additions, e.g. bpf_iter_{task,css}_next(). Also the bpf_for_each macro usage in selftests. There may be some secondary things that need to consider, e.g. how the walked subflow is protected, rcu, refcnt...etc.
Hi Martin, Matt, On Mon, 2024-08-26 at 22:22 -0700, Martin KaFai Lau wrote: > On 8/26/24 2:24 AM, Geliang Tang wrote: > > > I'm not an expert in this, but I guess it means you cannot use > > > 'mptcp_subflow_active()', because it can modify the 'subflow' > > > structure > > > that you got with bpf_core_cast() for a read-only usage. > > > > A read-only function will get the same error. > > > > I added a read-only function mptcp_subflow_get_scheduled() for > > testing: > > > > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context > > *subflow) > > { > > return subflow->scheduled; > > } > > > > And invoke it from BPF in mptcp_for_each_subflow() loop: > > > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > > struct mptcp_sched_data *data) > > { > > struct mptcp_subflow_context *subflow, *tmp; > > > > mptcp_for_each_subflow(msk, tmp) { > > subflow = bpf_core_cast(tmp, struct > > mptcp_subflow_context); > > mptcp_subflow_get_scheduled(subflow); > > } > > return 0; > > } > > > > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs. > > > > Hope Martin can give us a solution for this issue. > > I don't know the context for this list walking + modify-by-kfunc > usage, so the > following could be a grain of salt. > > It seems like fitting the bpf_iter use case. Take a look at some > recent bpf_iter > additions, e.g. bpf_iter_{task,css}_next(). Also the bpf_for_each > macro usage in > selftests. There may be some secondary things that need to consider, > e.g. how > the walked subflow is protected, rcu, refcnt...etc. Great! Thanks. bpf_iter works well for MPTCP BPF scheduler. I added a new bpf_iter type named "mptcp_subflow" in net/mptcp/bpf.c like this: ''' __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_mptcp_subflow_new(struct bpf_iter_mptcp_subflow *it, struct mptcp_sock *msk, unsigned int flags) { struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; kit->msk = msk; kit->pos = &msk->conn_list; spin_lock_bh(&msk->pm.lock); return 0; } __bpf_kfunc struct mptcp_subflow_context * bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it) { struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = kit->msk; subflow = list_entry((kit->pos)->next, struct mptcp_subflow_context, node); if (list_entry_is_head(subflow, &msk->conn_list, node)) return NULL; kit->pos = &subflow->node; return subflow; } __bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct bpf_iter_mptcp_subflow *it) { struct bpf_iter_mptcp_subflow_kern *kit = (void *)it; struct mptcp_sock *msk = kit->msk; spin_unlock_bh(&msk->pm.lock); } __bpf_kfunc_end_defs(); ''' And use "bpf_for_each(mptcp_subflow)" like this in progs/mptcp_bpf_burst.c: ''' i = 0; bpf_rcu_read_lock(); bpf_for_each(mptcp_subflow, subflow, msk, 0) { bool backup = subflow->backup || subflow->request_bkup; if (i++ > MPTCP_SUBFLOWS_MAX) break; ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; nr_active += !backup; pace = subflow->avg_pacing_rate; if (!pace) { /* init pacing rate from socket */ subflow->avg_pacing_rate = ssk->sk_pacing_rate; pace = subflow->avg_pacing_rate; if (!pace) continue; } linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace); if (linger_time < send_info[backup].linger_time) { send_info[backup].subflow_id = i; send_info[backup].linger_time = linger_time; } } bpf_rcu_read_unlock(); ''' With this mptcp_subflow bpf_iter, we can get rid of the subflows array "contexts" in struct mptcp_sched_data: ''' diff --git a/include/net/mptcp.h b/include/net/mptcp.h index c3d0ea07cf0c..a739f917b054 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -104,8 +104,6 @@ struct mptcp_out_options { struct mptcp_sched_data { bool reinject; - u8 subflows; - struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX]; }; struct mptcp_sched_ops { ''' I'll send out these patches for review soon, with Martin's "Suggested- by" tags. Thanks, -Geliang
Hi Geliang, Martin, On 26/08/2024 11:24, Geliang Tang wrote: > Hi Matt, Martin, > > On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote: >> Hi Geliang, >> >> On 26/08/2024 04:57, Geliang Tang wrote: >>> On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote: >>>> On 21/08/2024 10:00, Geliang Tang wrote: >>>>> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: >>>>>> On 20/08/2024 10:44, Geliang Tang wrote: >> >> (...) >> >>>>>>> + mptcp_for_each_subflow(msk, subflow) { >>>>>> >>>>>> (might be better to use this helper in our WIP MPTCP BPF >>>>>> packets >>>>>> scheduler examples, instead of converting them to a fixed >>>>>> array) >>>>> >>>>> Yes, but there are still some access permission issues that >>>>> need to >>>>> be >>>>> resolved. >>>> >>>> OK, because structures cannot be modified I suppose. >>> >>> No, it seems the subflow cast by bpf_core_cast() can't be passed to >>> a >>> kernel function, regardless of whether this function modifies the >>> subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error >>> occurs. >>> >>> For example, mptcp_subflow_active() is a kernel function, and pass >>> the >>> subflow to it in progs/mptcp_bpf_first.c like this: >>> >>> ''' >>> ... ... >>> extern bool mptcp_subflow_active(struct mptcp_subflow_context >>> *subflow) >>> __ksym; >>> >>> ... ... >>> SEC("struct_ops") >>> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, >>> struct mptcp_sched_data *data) >>> { >>> struct mptcp_subflow_context *subflow, *tmp; >>> >>> mptcp_for_each_subflow(msk, tmp) { >>> subflow = bpf_core_cast(tmp, struct >>> mptcp_subflow_context); >> >> (Why do you need to cast "tmp" (struct mptcp_subflow_context *) in >> the >> same type of pointer? I don't think it changes anything for the error >> you got (see below), but it looks strange.) > > We must do the cast, otherwise, an "access beyond struct list_head" > occurs: > > ; token = subflow->token; @ mptcp_subflow.c:92 > 13: (61) r4 = *(u32 *)(r1 +524) > access beyond struct list_head at off 524 size 4 OK! > > See Martin's comment in [1]. > > [1] > https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/ > >> >>> if (!mptcp_subflow_active(subflow)) >>> continue; >>> } >>> return 0; >>> } >>> >>> ''' >>> >>> An "arg#0 is untrusted_ptr_ expected ptr_" error occurs: >>> >>> ''' >>> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 >>> 21: (e5) may_goto pc+1 >>> 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 >>> R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 >>> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 >>> 22: (05) goto pc-14 >>> 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() >>> 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() R7=0 >>> 11: (bf) r1 = r6 ; R1_w=ptr_list_head() >>> R6_w=ptr_list_head() >>> 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 >>> 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() >>> R8=trusted_ptr_mptcp_sock(off=2488) >>> ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ >>> mptcp_bpf_first.c:28 >>> 14: (bf) r1 = r6 ; R1_w=ptr_list_head() >>> R6=ptr_list_head() >>> 15: (18) r2 = 0x6d14 ; R2_w=27924 >>> 17: (85) call bpf_rdonly_cast#159867 ; >>> R0_w=untrusted_ptr_mptcp_subflow_context() >>> ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29 >>> 18: (bf) r1 = r0 ; >>> R0_w=untrusted_ptr_mptcp_subflow_context() >>> R1_w=untrusted_ptr_mptcp_subflow_context() >>> 19: (85) call mptcp_subflow_active#111397 >>> arg#0 is untrusted_ptr_ expected ptr_ or socket >>> processed 23 insns (limit 1000000) max_states_per_insn 0 >>> total_states 2 >>> peak_states 2 mark_read 2 >>> -- END PROG LOAD LOG -- >>> ''' >>> >>> How can I fix this? I need your advice. >> >> I'm not an expert in this, but I guess it means you cannot use >> 'mptcp_subflow_active()', because it can modify the 'subflow' >> structure >> that you got with bpf_core_cast() for a read-only usage. > > A read-only function will get the same error. > > I added a read-only function mptcp_subflow_get_scheduled() for testing: > > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow) Do you have the same issue if '*subflow' is marked as 'const'? > { > return subflow->scheduled; > } > > And invoke it from BPF in mptcp_for_each_subflow() loop: > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > struct mptcp_sched_data *data) > { > struct mptcp_subflow_context *subflow, *tmp; > > mptcp_for_each_subflow(msk, tmp) { > subflow = bpf_core_cast(tmp, struct > mptcp_subflow_context); > mptcp_subflow_get_scheduled(subflow); > } > return 0; > } > > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs. > > Hope Martin can give us a solution for this issue. > > Thanks, > -Geliang > >> >> I guess it means we cannot iterate through the list and modify items >> from the list "directly" with BPF. Except if there is something else >> we >> can use, and I don't know about (which is very likely), we might have >> to >> continue extracting the subflows into an array of a fixed size. >> >> Cheers, >> Matt > Cheers, Matt -- Sponsored by the NGI0 Core fund.
On Mon, 2024-08-26 at 11:49 +0200, Matthieu Baerts wrote: > Hi Geliang, Martin, > > On 26/08/2024 11:24, Geliang Tang wrote: > > Hi Matt, Martin, > > > > On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote: > > > Hi Geliang, > > > > > > On 26/08/2024 04:57, Geliang Tang wrote: > > > > On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote: > > > > > On 21/08/2024 10:00, Geliang Tang wrote: > > > > > > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: > > > > > > > On 20/08/2024 10:44, Geliang Tang wrote: > > > > > > (...) > > > > > > > > > > > + mptcp_for_each_subflow(msk, subflow) { > > > > > > > > > > > > > > (might be better to use this helper in our WIP MPTCP BPF > > > > > > > packets > > > > > > > scheduler examples, instead of converting them to a fixed > > > > > > > array) > > > > > > > > > > > > Yes, but there are still some access permission issues that > > > > > > need to > > > > > > be > > > > > > resolved. > > > > > > > > > > OK, because structures cannot be modified I suppose. > > > > > > > > No, it seems the subflow cast by bpf_core_cast() can't be > > > > passed to > > > > a > > > > kernel function, regardless of whether this function modifies > > > > the > > > > subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" > > > > error > > > > occurs. > > > > > > > > For example, mptcp_subflow_active() is a kernel function, and > > > > pass > > > > the > > > > subflow to it in progs/mptcp_bpf_first.c like this: > > > > > > > > ''' > > > > ... ... > > > > extern bool mptcp_subflow_active(struct mptcp_subflow_context > > > > *subflow) > > > > __ksym; > > > > > > > > ... ... > > > > SEC("struct_ops") > > > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > > > > struct mptcp_sched_data *data) > > > > { > > > > struct mptcp_subflow_context *subflow, *tmp; > > > > > > > > mptcp_for_each_subflow(msk, tmp) { > > > > subflow = bpf_core_cast(tmp, struct > > > > mptcp_subflow_context); > > > > > > (Why do you need to cast "tmp" (struct mptcp_subflow_context *) > > > in > > > the > > > same type of pointer? I don't think it changes anything for the > > > error > > > you got (see below), but it looks strange.) > > > > We must do the cast, otherwise, an "access beyond struct list_head" > > occurs: > > > > ; token = subflow->token; @ mptcp_subflow.c:92 > > 13: (61) r4 = *(u32 *)(r1 +524) > > access beyond struct list_head at off 524 size 4 > > OK! > > > > > See Martin's comment in [1]. > > > > [1] > > https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/ > > > > > > > > > if (!mptcp_subflow_active(subflow)) > > > > continue; > > > > } > > > > return 0; > > > > } > > > > > > > > ''' > > > > > > > > An "arg#0 is untrusted_ptr_ expected ptr_" error occurs: > > > > > > > > ''' > > > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > > > > 21: (e5) may_goto pc+1 > > > > 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 > > > > R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 > > > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 > > > > 22: (05) goto pc-14 > > > > 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() > > > > 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() > > > > R7=0 > > > > 11: (bf) r1 = r6 ; R1_w=ptr_list_head() > > > > R6_w=ptr_list_head() > > > > 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 > > > > 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() > > > > R8=trusted_ptr_mptcp_sock(off=2488) > > > > ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ > > > > mptcp_bpf_first.c:28 > > > > 14: (bf) r1 = r6 ; R1_w=ptr_list_head() > > > > R6=ptr_list_head() > > > > 15: (18) r2 = 0x6d14 ; R2_w=27924 > > > > 17: (85) call bpf_rdonly_cast#159867 ; > > > > R0_w=untrusted_ptr_mptcp_subflow_context() > > > > ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29 > > > > 18: (bf) r1 = r0 ; > > > > R0_w=untrusted_ptr_mptcp_subflow_context() > > > > R1_w=untrusted_ptr_mptcp_subflow_context() > > > > 19: (85) call mptcp_subflow_active#111397 > > > > arg#0 is untrusted_ptr_ expected ptr_ or socket > > > > processed 23 insns (limit 1000000) max_states_per_insn 0 > > > > total_states 2 > > > > peak_states 2 mark_read 2 > > > > -- END PROG LOAD LOG -- > > > > ''' > > > > > > > > How can I fix this? I need your advice. > > > > > > I'm not an expert in this, but I guess it means you cannot use > > > 'mptcp_subflow_active()', because it can modify the 'subflow' > > > structure > > > that you got with bpf_core_cast() for a read-only usage. > > > > A read-only function will get the same error. > > > > I added a read-only function mptcp_subflow_get_scheduled() for > > testing: > > > > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context > > *subflow) > > Do you have the same issue if '*subflow' is marked as 'const'? Yes, the same error occurs: ''' libbpf: prog 'bpf_first_get_subflow': -- BEGIN PROG LOAD LOG -- 0: R1=ctx() R10=fp0 ; int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, @ mptcp_bpf_first.c:22 0: (b7) r7 = 0 ; R7_w=0 1: (b7) r2 = 2488 ; R2_w=2488 2: (79) r8 = *(u64 *)(r1 +0) func 'get_subflow' arg0 has btf_id 27737 type STRUCT 'mptcp_sock' 3: R1=ctx() R8_w=trusted_ptr_mptcp_sock() 3: (bf) r6 = r8 ; R6_w=trusted_ptr_mptcp_sock() R8_w=trusted_ptr_mptcp_sock() 4: (0f) r6 += r2 ; R2_w=2488 R6_w=trusted_ptr_mptcp_sock(off=2488) 5: (b7) r1 = 2488 ; R1_w=2488 6: (0f) r8 += r1 ; R1_w=2488 R8_w=trusted_ptr_mptcp_sock(off=2488) 7: (b7) r9 = 0 ; R9_w=0 8: (05) goto pc+12 ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 21: (e5) may_goto pc+1 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0 R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0 ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27 22: (05) goto pc-14 9: (79) r6 = *(u64 *)(r6 +0) ; R6_w=ptr_list_head() 10: (1f) r6 -= r7 ; R6_w=ptr_list_head() R7=0 11: (bf) r1 = r6 ; R1_w=ptr_list_head() R6_w=ptr_list_head() 12: (0f) r1 += r7 ; R1=ptr_list_head() R7=0 13: (1d) if r1 == r8 goto pc+9 ; R1=ptr_list_head() R8=trusted_ptr_mptcp_sock(off=2488) ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @ mptcp_bpf_first.c:28 14: (bf) r1 = r6 ; R1_w=ptr_list_head() R6=ptr_list_head() 15: (18) r2 = 0x6c55 ; R2_w=27733 17: (85) call bpf_rdonly_cast#122463 ; R0_w=untrusted_ptr_mptcp_subflow_context() ; mptcp_subflow_get_scheduled(subflow); @ mptcp_bpf_first.c:29 18: (bf) r1 = r0 ; R0_w=untrusted_ptr_mptcp_subflow_context() R1_w=untrusted_ptr_mptcp_subflow_context() 19: (85) call mptcp_subflow_get_scheduled#127716 arg#0 is untrusted_ptr_ expected ptr_ or socket processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2 peak_states 2 mark_read 2 -- END PROG LOAD LOG -- ''' net/mptcp/sched.c: ''' bool mptcp_subflow_get_scheduled(const struct mptcp_subflow_context *subflow) { return subflow->scheduled; } ''' progs/mptcp_bpf_first.c: ''' extern bool mptcp_subflow_get_scheduled(const struct mptcp_subflow_context *subflow) __ksym; ... ... SEC("struct_ops") int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, struct mptcp_sched_data *data) { struct mptcp_subflow_context *subflow, *tmp; mptcp_for_each_subflow(msk, tmp) { subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); mptcp_subflow_get_scheduled(subflow); } return 0; } ''' Thanks, -Geliang > > > { > > return subflow->scheduled; > > } > > > > And invoke it from BPF in mptcp_for_each_subflow() loop: > > > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, > > struct mptcp_sched_data *data) > > { > > struct mptcp_subflow_context *subflow, *tmp; > > > > mptcp_for_each_subflow(msk, tmp) { > > subflow = bpf_core_cast(tmp, struct > > mptcp_subflow_context); > > mptcp_subflow_get_scheduled(subflow); > > } > > return 0; > > } > > > > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs. > > > > Hope Martin can give us a solution for this issue. > > > > Thanks, > > -Geliang > > > > > > > > I guess it means we cannot iterate through the list and modify > > > items > > > from the list "directly" with BPF. Except if there is something > > > else > > > we > > > can use, and I don't know about (which is very likely), we might > > > have > > > to > > > continue extracting the subflows into an array of a fixed size. > > > > > > Cheers, > > > Matt > > > > Cheers, > Matt
On 8/21/24 2:37 AM, Matthieu Baerts wrote: > Hi Geliang, > > Thank you for your reply! > > On 21/08/2024 10:00, Geliang Tang wrote: >> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote: >>> On 20/08/2024 10:44, Geliang Tang wrote: > (...) > >>>> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c >>>> b/tools/testing/selftests/bpf/prog_tests/mptcp.c >>>> index 69fdcb28249d..2178db94f764 100644 >>>> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c >>>> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c >>>> @@ -383,6 +383,7 @@ static void run_subflow(char *new) >>>> { >>>> int server_fd, client_fd, err; >>>> char cc[TCP_CA_NAME_MAX]; >>>> + unsigned int mark; >>>> socklen_t len; >>>> >>>> server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, >>>> 0); >>>> @@ -407,6 +408,10 @@ static void run_subflow(char *new) >>>> ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc"); >>>> ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc"); >>> I guess the next steps is to remove these 'ss_search()', right? >> We can keep it for double checks. > I would not mind, even if 'ss | grep' is used in other BPF tests, but I > understood from Martin he prefers not to depend on external tools as > much as possible. It should indeed ease the maintenance. I would prefer to avoid the 'ss | grep'. The other bpf selftests using this "ss | grep" should be in the .sh which is not in the test_progs and not run by bpf CI. They are there to loop-waiting for something. The ss usage hopefully will be removed when eventually migrating the .sh tests to test_progs. At least the one in test_tc_tunnel.sh should be doable once moved to test_progs. Haven't looked at the test_xdp_features.sh yet. Thanks for trying the getsockopt approach and it turns out working well.
© 2016 - 2024 Red Hat, Inc.