[v1] fixes for "new MPTCP subflow subtest v4"

[PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 3 months ago

From: Geliang Tang <tanggeliang@kylinos.cn>

This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
mptcp socket.

mptcp_for_each_stubflow() and other helpers related to list_dentry are
added into progs/mptcp_bpf.h.

Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will allow
to inspect all the fields in a tcp_sock.

Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 .../testing/selftests/bpf/prog_tests/mptcp.c  | 11 +++++++
 tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++
 .../selftests/bpf/progs/mptcp_subflow.c       | 32 +++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 69fdcb28249d..2178db94f764 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -383,6 +383,7 @@ static void run_subflow(char *new)
 {
 	int server_fd, client_fd, err;
 	char cc[TCP_CA_NAME_MAX];
+	unsigned int mark;
 	socklen_t len;
 
 	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
@@ -407,6 +408,10 @@ static void run_subflow(char *new)
 	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
 	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");
 
+	len = sizeof(mark);
+	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
+	ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
+
 	close(client_fd);
 fail:
 	close(server_fd);
@@ -417,6 +422,7 @@ static void test_subflow(void)
 	int cgroup_fd, prog_fd, err;
 	struct mptcp_subflow *skel;
 	struct nstoken *nstoken;
+	struct bpf_link *link;
 
 	cgroup_fd = test__join_cgroup("/mptcp_subflow");
 	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow"))
@@ -442,6 +448,10 @@ static void test_subflow(void)
 	if (endpoint_init("subflow") < 0)
 		goto close_netns;
 
+	link = bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
+		goto close_netns;
+
 	run_subflow(skel->data->cc);
 
 close_netns:
@@ -450,6 +460,7 @@ static void test_subflow(void)
 	mptcp_subflow__destroy(skel);
 close_cgroup:
 	close(cgroup_fd);
+	bpf_link__destroy(link);
 }
 
 static struct nstoken *sched_init(char *flags, char *sched)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 782f36ed027e..2086170e7379 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -7,6 +7,32 @@
 
 #define MPTCP_SUBFLOWS_MAX 8
 
+static inline int list_is_head(const struct list_head *list,
+			       const struct list_head *head)
+{
+	return list == head;
+}
+
+#define list_entry(ptr, type, member)					\
+	container_of(ptr, type, member)
+
+#define list_first_entry(ptr, type, member)				\
+	list_entry((ptr)->next, type, member)
+
+#define list_next_entry(pos, member)					\
+	list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_entry_is_head(pos, head, member)				\
+	list_is_head(&pos->member, (head))
+
+#define list_for_each_entry(pos, head, member)				\
+	for (pos = list_first_entry(head, typeof(*pos), member);	\
+	     !list_entry_is_head(pos, head, member);			\
+	     pos = list_next_entry(pos, member))
+
+#define mptcp_for_each_subflow(__msk, __subflow)			\
+	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
+
 extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
 					bool scheduled) __ksym;
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index bc572e1d6df8..c41418ab8db4 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -4,6 +4,7 @@
 
 /* vmlinux.h, bpf_helpers.h and other 'define' */
 #include "bpf_tracing_net.h"
+#include "mptcp_bpf.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
 
 	return 1;
 }
+
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock);
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	if (!msk || ctx->level != SOL_SOCKET || ctx->optname != SO_MARK)
+		return 1;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		struct inet_connection_sock *icsk;
+		struct sock *ssk;
+
+		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
+							   struct mptcp_subflow_context));
+		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
+
+		if (ssk->sk_mark != i + 1)
+			ctx->retval = -1;
+		if (ssk->sk_mark == 1 &&
+		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
+			ctx->retval = -1;
+
+		if (i++ >= MPTCP_SUBFLOWS_MAX)
+			break;
+	}
+
+	return 1;
+}
-- 
2.43.0

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Matthieu Baerts 3 months ago

Hi Geliang,

Thank you for looking at that!

On 20/08/2024 10:44, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> This patch adds a "cgroup/getsockopt" way to inspect the subflows of a
> mptcp socket.
> 
> mptcp_for_each_stubflow() and other helpers related to list_dentry are
> added into progs/mptcp_bpf.h.
> 
> Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list and use
> bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will allow

Do you mean bpf_core_cast() instead of bpf_rdonly_cast()? Or is it
indirectly used?

> to inspect all the fields in a tcp_sock.
> 
> Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  .../testing/selftests/bpf/prog_tests/mptcp.c  | 11 +++++++
>  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++
>  .../selftests/bpf/progs/mptcp_subflow.c       | 32 +++++++++++++++++++
>  3 files changed, 69 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> index 69fdcb28249d..2178db94f764 100644
> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> @@ -383,6 +383,7 @@ static void run_subflow(char *new)
>  {
>  	int server_fd, client_fd, err;
>  	char cc[TCP_CA_NAME_MAX];
> +	unsigned int mark;
>  	socklen_t len;
>  
>  	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0);
> @@ -407,6 +408,10 @@ static void run_subflow(char *new)
>  	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
>  	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");

I guess the next steps is to remove these 'ss_search()', right?

> +	len = sizeof(mark);
> +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len);
> +	ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
> +
>  	close(client_fd);
>  fail:
>  	close(server_fd);
> @@ -417,6 +422,7 @@ static void test_subflow(void)
>  	int cgroup_fd, prog_fd, err;
>  	struct mptcp_subflow *skel;
>  	struct nstoken *nstoken;
> +	struct bpf_link *link;
>  
>  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
>  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup: mptcp_subflow"))
> @@ -442,6 +448,10 @@ static void test_subflow(void)
>  	if (endpoint_init("subflow") < 0)
>  		goto close_netns;
>  
> +	link = bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
> +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))

I don't know how it works: is this going to fail if the getsockopt
program set 'ctx->retval = -1'?

> +		goto close_netns;
> +
>  	run_subflow(skel->data->cc);
>  
>  close_netns:
> @@ -450,6 +460,7 @@ static void test_subflow(void)
>  	mptcp_subflow__destroy(skel);
>  close_cgroup:
>  	close(cgroup_fd);
> +	bpf_link__destroy(link);
>  }
>  
>  static struct nstoken *sched_init(char *flags, char *sched)
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> index 782f36ed027e..2086170e7379 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> @@ -7,6 +7,32 @@
>  
>  #define MPTCP_SUBFLOWS_MAX 8
>  
> +static inline int list_is_head(const struct list_head *list,
> +			       const struct list_head *head)
> +{
> +	return list == head;
> +}
> +
> +#define list_entry(ptr, type, member)					\
> +	container_of(ptr, type, member)
> +
> +#define list_first_entry(ptr, type, member)				\
> +	list_entry((ptr)->next, type, member)
> +
> +#define list_next_entry(pos, member)					\
> +	list_entry((pos)->member.next, typeof(*(pos)), member)
> +
> +#define list_entry_is_head(pos, head, member)				\
> +	list_is_head(&pos->member, (head))
> +
> +#define list_for_each_entry(pos, head, member)				\
> +	for (pos = list_first_entry(head, typeof(*pos), member);	\
> +	     !list_entry_is_head(pos, head, member);			\
> +	     pos = list_next_entry(pos, member))

Out of curiosity, are these generic helpers not defined elsewhere? Is it
OK not to use the '_safe' version where the deletion of elements is
supported?

> +
> +#define mptcp_for_each_subflow(__msk, __subflow)			\
> +	list_for_each_entry(__subflow, &((__msk)->conn_list), node)
> +
>  extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
>  					bool scheduled) __ksym;
>  
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> index bc572e1d6df8..c41418ab8db4 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> @@ -4,6 +4,7 @@
>  
>  /* vmlinux.h, bpf_helpers.h and other 'define' */
>  #include "bpf_tracing_net.h"
> +#include "mptcp_bpf.h"
>  
>  char _license[] SEC("license") = "GPL";
>  
> @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>  
>  	return 1;
>  }
> +
> +SEC("cgroup/getsockopt")
> +int _getsockopt(struct bpf_sockopt *ctx)

This name seems too generic while what is done here is specific to the
'subflow' test. Maybe: _check_getsockopt_subflows()?

(or _check_getsockopt_subflows_mark() and _check_getsockopt_subflow_cc()
see below)

> +{
> +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct mptcp_sock);

What happens if the 'sk' is not an 'msk'?
Does it check that the sk is indeed an MPTCP one? (how?)

> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	if (!msk || ctx->level != SOL_SOCKET || ctx->optname != SO_MARK)
> +		return 1;

Would it not be clearer to split the two checks? One dedicated to the
mark, when getsockopt(SO_MARK) is used, and one for the CC, when the
getsockopt(TCP_CONGESTION) is used? By doing that, we would know which
one had an issue, if any, not just "something wrong with getsockopt()"?

> +
> +	mptcp_for_each_subflow(msk, subflow) {

(might be better to use this helper in our WIP MPTCP BPF packets
scheduler examples, instead of converting them to a fixed array)

> +		struct inet_connection_sock *icsk;
> +		struct sock *ssk;
> +
> +		ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> +							   struct mptcp_subflow_context));
> +		icsk = bpf_core_cast(ssk, struct inet_connection_sock);
> +
> +		if (ssk->sk_mark != i + 1)
> +			ctx->retval = -1;
> +		if (ssk->sk_mark == 1 &&
> +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX))
> +			ctx->retval = -1;
> +
> +		if (i++ >= MPTCP_SUBFLOWS_MAX)
> +			break;

I guess this part is needed for the verifier, right?
Somehow related to "cond_break" explained in this link?

  https://lwn.net/Articles/964381/


> +	}
> +
> +	return 1;
> +}

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 3 months ago

Hi Matt,

Thanks for the review.

On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> Thank you for looking at that!
> 
> On 20/08/2024 10:44, Geliang Tang wrote:
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > This patch adds a "cgroup/getsockopt" way to inspect the subflows
> > of a
> > mptcp socket.
> > 
> > mptcp_for_each_stubflow() and other helpers related to list_dentry
> > are
> > added into progs/mptcp_bpf.h.
> > 
> > Add an extra "cgroup/getsockopt" prog to walk the msk->conn_list
> > and use
> > bpf_rdonly_cast to cast a pointer to tcp_sock for readonly. It will
> > allow
> 
> Do you mean bpf_core_cast() instead of bpf_rdonly_cast()? Or is it
> indirectly used?

Yes, it should be "bpf_core_cast". Updated in v2.

> 
> > to inspect all the fields in a tcp_sock.
> > 
> > Suggested-by: Martin KaFai Lau <martin.lau@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> >  .../testing/selftests/bpf/prog_tests/mptcp.c  | 11 +++++++
> >  tools/testing/selftests/bpf/progs/mptcp_bpf.h | 26 +++++++++++++++
> >  .../selftests/bpf/progs/mptcp_subflow.c       | 32
> > +++++++++++++++++++
> >  3 files changed, 69 insertions(+)
> > 
> > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > index 69fdcb28249d..2178db94f764 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > @@ -383,6 +383,7 @@ static void run_subflow(char *new)
> >  {
> >  	int server_fd, client_fd, err;
> >  	char cc[TCP_CA_NAME_MAX];
> > +	unsigned int mark;
> >  	socklen_t len;
> >  
> >  	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1,
> > 0);
> > @@ -407,6 +408,10 @@ static void run_subflow(char *new)
> >  	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
> >  	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");
> 
> I guess the next steps is to remove these 'ss_search()', right?

We can keep it for double checks.

> 
> > +	len = sizeof(mark);
> > +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark,
> > &len);
> > +	ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
> > +
> >  	close(client_fd);
> >  fail:
> >  	close(server_fd);
> > @@ -417,6 +422,7 @@ static void test_subflow(void)
> >  	int cgroup_fd, prog_fd, err;
> >  	struct mptcp_subflow *skel;
> >  	struct nstoken *nstoken;
> > +	struct bpf_link *link;
> >  
> >  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
> >  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup:
> > mptcp_subflow"))
> > @@ -442,6 +448,10 @@ static void test_subflow(void)
> >  	if (endpoint_init("subflow") < 0)
> >  		goto close_netns;
> >  
> > +	link = bpf_program__attach_cgroup(skel->progs._getsockopt,
> > cgroup_fd);
> > +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
> 
> I don't know how it works: is this going to fail if the getsockopt
> program set 'ctx->retval = -1'?

Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if the
getsockopt program set 'ctx->retval = -1'.
> 
> > +		goto close_netns;
> > +
> >  	run_subflow(skel->data->cc);
> >  
> >  close_netns:
> > @@ -450,6 +460,7 @@ static void test_subflow(void)
> >  	mptcp_subflow__destroy(skel);
> >  close_cgroup:
> >  	close(cgroup_fd);
> > +	bpf_link__destroy(link);
> >  }
> >  
> >  static struct nstoken *sched_init(char *flags, char *sched)
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > index 782f36ed027e..2086170e7379 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > @@ -7,6 +7,32 @@
> >  
> >  #define MPTCP_SUBFLOWS_MAX 8
> >  
> > +static inline int list_is_head(const struct list_head *list,
> > +			       const struct list_head *head)
> > +{
> > +	return list == head;
> > +}
> > +
> > +#define list_entry(ptr, type,
> > member)					\
> > +	container_of(ptr, type, member)
> > +
> > +#define list_first_entry(ptr, type,
> > member)				\
> > +	list_entry((ptr)->next, type, member)
> > +
> > +#define list_next_entry(pos,
> > member)					\
> > +	list_entry((pos)->member.next, typeof(*(pos)), member)
> > +
> > +#define list_entry_is_head(pos, head,
> > member)				\
> > +	list_is_head(&pos->member, (head))
> > +
> > +#define list_for_each_entry(pos, head,
> > member)				\
> > +	for (pos = list_first_entry(head, typeof(*pos),
> > member);	\
> > +	     !list_entry_is_head(pos, head,
> > member);			\
> > +	     pos = list_next_entry(pos, member))
> 
> Out of curiosity, are these generic helpers not defined elsewhere? Is
> it
> OK not to use the '_safe' version where the deletion of elements is
> supported?

It's read only in this test, so list_for_each_entry is enough. I added
the '_safe' version in v2 too for future use.

> 
> > +
> > +#define mptcp_for_each_subflow(__msk,
> > __subflow)			\
> > +	list_for_each_entry(__subflow, &((__msk)->conn_list),
> > node)
> > +
> >  extern void mptcp_subflow_set_scheduled(struct
> > mptcp_subflow_context *subflow,
> >  					bool scheduled) __ksym;
> >  
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > index bc572e1d6df8..c41418ab8db4 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > @@ -4,6 +4,7 @@
> >  
> >  /* vmlinux.h, bpf_helpers.h and other 'define' */
> >  #include "bpf_tracing_net.h"
> > +#include "mptcp_bpf.h"
> >  
> >  char _license[] SEC("license") = "GPL";
> >  
> > @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
> >  
> >  	return 1;
> >  }
> > +
> > +SEC("cgroup/getsockopt")
> > +int _getsockopt(struct bpf_sockopt *ctx)
> 
> This name seems too generic while what is done here is specific to
> the
> 'subflow' test. Maybe: _check_getsockopt_subflows()?

Changed it to _getsockopt_subflow in v2.

> 
> (or _check_getsockopt_subflows_mark() and
> _check_getsockopt_subflow_cc()
> see below)
> 
> > +{
> > +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct
> > mptcp_sock);
> 
> What happens if the 'sk' is not an 'msk'?
> Does it check that the sk is indeed an MPTCP one? (how?)

Also check msk->token in v2.

> 
> > +	struct mptcp_subflow_context *subflow;
> > +	int i = 0;
> > +
> > +	if (!msk || ctx->level != SOL_SOCKET || ctx->optname !=
> > SO_MARK)
> > +		return 1;
> 
> Would it not be clearer to split the two checks? One dedicated to the
> mark, when getsockopt(SO_MARK) is used, and one for the CC, when the
> getsockopt(TCP_CONGESTION) is used? By doing that, we would know
> which
> one had an issue, if any, not just "something wrong with
> getsockopt()"?

Done in v2.

> 
> > +
> > +	mptcp_for_each_subflow(msk, subflow) {
> 
> (might be better to use this helper in our WIP MPTCP BPF packets
> scheduler examples, instead of converting them to a fixed array)

Yes, but there are still some access permission issues that need to be
resolved.

> 
> > +		struct inet_connection_sock *icsk;
> > +		struct sock *ssk;
> > +
> > +		ssk =
> > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > +							   struct
> > mptcp_subflow_context));
> > +		icsk = bpf_core_cast(ssk, struct
> > inet_connection_sock);
> > +
> > +		if (ssk->sk_mark != i + 1)
> > +			ctx->retval = -1;
> > +		if (ssk->sk_mark == 1 &&
> > +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
> > TCP_CA_NAME_MAX))
> > +			ctx->retval = -1;
> > +
> > +		if (i++ >= MPTCP_SUBFLOWS_MAX)
> > +			break;
> 
> I guess this part is needed for the verifier, right?
> Somehow related to "cond_break" explained in this link?
> 
>   https://lwn.net/Articles/964381/
> 

Change this as "cond_break" in v2.

Regards,
-Geliang

> 
> > +	}
> > +
> > +	return 1;
> > +}
> 
> Cheers,
> Matt

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Matthieu Baerts 3 months ago

Hi Geliang,

Thank you for your reply!

On 21/08/2024 10:00, Geliang Tang wrote:
> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
>> On 20/08/2024 10:44, Geliang Tang wrote:

(...)

>>> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>> b/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>> index 69fdcb28249d..2178db94f764 100644
>>> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>> @@ -383,6 +383,7 @@ static void run_subflow(char *new)
>>>  {
>>>  	int server_fd, client_fd, err;
>>>  	char cc[TCP_CA_NAME_MAX];
>>> +	unsigned int mark;
>>>  	socklen_t len;
>>>  
>>>  	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1,
>>> 0);
>>> @@ -407,6 +408,10 @@ static void run_subflow(char *new)
>>>  	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
>>>  	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");
>>
>> I guess the next steps is to remove these 'ss_search()', right?
> 
> We can keep it for double checks.

I would not mind, even if 'ss | grep' is used in other BPF tests, but I
understood from Martin he prefers not to depend on external tools as
much as possible. It should indeed ease the maintenance.

>>> +	len = sizeof(mark);
>>> +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark,
>>> &len);
>>> +	ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
>>> +
>>>  	close(client_fd);
>>>  fail:
>>>  	close(server_fd);
>>> @@ -417,6 +422,7 @@ static void test_subflow(void)
>>>  	int cgroup_fd, prog_fd, err;
>>>  	struct mptcp_subflow *skel;
>>>  	struct nstoken *nstoken;
>>> +	struct bpf_link *link;
>>>  
>>>  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
>>>  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup:
>>> mptcp_subflow"))
>>> @@ -442,6 +448,10 @@ static void test_subflow(void)
>>>  	if (endpoint_init("subflow") < 0)
>>>  		goto close_netns;
>>>  
>>> +	link = bpf_program__attach_cgroup(skel->progs._getsockopt,
>>> cgroup_fd);
>>> +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
>>
>> I don't know how it works: is this going to fail if the getsockopt
>> program set 'ctx->retval = -1'?
> 
> Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if the
> getsockopt program set 'ctx->retval = -1'.

Good, thank you!

>>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> index 782f36ed027e..2086170e7379 100644
>>> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
>>> @@ -7,6 +7,32 @@
>>>  
>>>  #define MPTCP_SUBFLOWS_MAX 8
>>>  
>>> +static inline int list_is_head(const struct list_head *list,
>>> +			       const struct list_head *head)
>>> +{
>>> +	return list == head;
>>> +}
>>> +
>>> +#define list_entry(ptr, type,
>>> member)					\
>>> +	container_of(ptr, type, member)
>>> +
>>> +#define list_first_entry(ptr, type,
>>> member)				\
>>> +	list_entry((ptr)->next, type, member)
>>> +
>>> +#define list_next_entry(pos,
>>> member)					\
>>> +	list_entry((pos)->member.next, typeof(*(pos)), member)
>>> +
>>> +#define list_entry_is_head(pos, head,
>>> member)				\
>>> +	list_is_head(&pos->member, (head))
>>> +
>>> +#define list_for_each_entry(pos, head,
>>> member)				\
>>> +	for (pos = list_first_entry(head, typeof(*pos),
>>> member);	\
>>> +	     !list_entry_is_head(pos, head,
>>> member);			\
>>> +	     pos = list_next_entry(pos, member))
>>
>> Out of curiosity, are these generic helpers not defined elsewhere? Is
>> it
>> OK not to use the '_safe' version where the deletion of elements is
>> supported?
> 
> It's read only in this test, so list_for_each_entry is enough. I added
> the '_safe' version in v2 too for future use.

OK! (but I don't think you defined list_for_each_entry_safe(). Or is it
defined elsewhere?)

>>> +
>>> +#define mptcp_for_each_subflow(__msk,
>>> __subflow)			\
>>> +	list_for_each_entry(__subflow, &((__msk)->conn_list),
>>> node)
>>> +
>>>  extern void mptcp_subflow_set_scheduled(struct
>>> mptcp_subflow_context *subflow,
>>>  					bool scheduled) __ksym;
>>>  
>>> diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
>>> b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
>>> index bc572e1d6df8..c41418ab8db4 100644
>>> --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
>>> +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
>>> @@ -4,6 +4,7 @@
>>>  
>>>  /* vmlinux.h, bpf_helpers.h and other 'define' */
>>>  #include "bpf_tracing_net.h"
>>> +#include "mptcp_bpf.h"
>>>  
>>>  char _license[] SEC("license") = "GPL";
>>>  
>>> @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops *skops)
>>>  
>>>  	return 1;
>>>  }
>>> +
>>> +SEC("cgroup/getsockopt")
>>> +int _getsockopt(struct bpf_sockopt *ctx)
>>
>> This name seems too generic while what is done here is specific to
>> the
>> 'subflow' test. Maybe: _check_getsockopt_subflows()?
> 
> Changed it to _getsockopt_subflow in v2.
> 
>>
>> (or _check_getsockopt_subflows_mark() and
>> _check_getsockopt_subflow_cc()
>> see below)
>>
>>> +{
>>> +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct
>>> mptcp_sock);
>>
>> What happens if the 'sk' is not an 'msk'?
>> Does it check that the sk is indeed an MPTCP one? (how?)
> 
> Also check msk->token in v2.

Mmh, I'm not sure if this is enough: the offset of 'token' in the msk
structure could point to something unrelated but non-null in another
socket structure, or out of bound. I guess BPF is doing extra checks not
to crash here. But maybe that's OK because it is only reading stuff?

>>> +	struct mptcp_subflow_context *subflow;
>>> +	int i = 0;
>>> +
>>> +	if (!msk || ctx->level != SOL_SOCKET || ctx->optname !=
>>> SO_MARK)
>>> +		return 1;
>>
>> Would it not be clearer to split the two checks? One dedicated to the
>> mark, when getsockopt(SO_MARK) is used, and one for the CC, when the
>> getsockopt(TCP_CONGESTION) is used? By doing that, we would know
>> which
>> one had an issue, if any, not just "something wrong with
>> getsockopt()"?
> 
> Done in v2.

Thanks!

>>> +
>>> +	mptcp_for_each_subflow(msk, subflow) {
>>
>> (might be better to use this helper in our WIP MPTCP BPF packets
>> scheduler examples, instead of converting them to a fixed array)
> 
> Yes, but there are still some access permission issues that need to be
> resolved.

OK, because structures cannot be modified I suppose.

>>
>>> +		struct inet_connection_sock *icsk;
>>> +		struct sock *ssk;
>>> +
>>> +		ssk =
>>> mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
>>> +							   struct
>>> mptcp_subflow_context));
>>> +		icsk = bpf_core_cast(ssk, struct
>>> inet_connection_sock);
>>> +
>>> +		if (ssk->sk_mark != i + 1)
>>> +			ctx->retval = -1;
>>> +		if (ssk->sk_mark == 1 &&
>>> +		    __builtin_memcmp(icsk->icsk_ca_ops->name, cc,
>>> TCP_CA_NAME_MAX))
>>> +			ctx->retval = -1;
>>> +
>>> +		if (i++ >= MPTCP_SUBFLOWS_MAX)
>>> +			break;
>>
>> I guess this part is needed for the verifier, right?
>> Somehow related to "cond_break" explained in this link?
>>
>>   https://lwn.net/Articles/964381/
>>
> 
> Change this as "cond_break" in v2.

Note that you could also modify list_for_each_entry() to add this
'cond_break' (+bpf_experimental.h?), not to have to deal with that each
time mptcp_for_each_subflow is used.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 2 months, 3 weeks ago

Hi Martin, Matt,

On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> Thank you for your reply!
> 
> On 21/08/2024 10:00, Geliang Tang wrote:
> > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
> > > On 20/08/2024 10:44, Geliang Tang wrote:
> 
> (...)
> 
> > > > diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > > > b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > > > index 69fdcb28249d..2178db94f764 100644
> > > > --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > > > +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> > > > @@ -383,6 +383,7 @@ static void run_subflow(char *new)
> > > >  {
> > > >  	int server_fd, client_fd, err;
> > > >  	char cc[TCP_CA_NAME_MAX];
> > > > +	unsigned int mark;
> > > >  	socklen_t len;
> > > >  
> > > >  	server_fd = start_mptcp_server(AF_INET, ADDR_1,
> > > > PORT_1,
> > > > 0);
> > > > @@ -407,6 +408,10 @@ static void run_subflow(char *new)
> > > >  	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
> > > >  	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default
> > > > cc");
> > > 
> > > I guess the next steps is to remove these 'ss_search()', right?
> > 
> > We can keep it for double checks.
> 
> I would not mind, even if 'ss | grep' is used in other BPF tests, but
> I
> understood from Martin he prefers not to depend on external tools as
> much as possible. It should indeed ease the maintenance.
> 
> > > > +	len = sizeof(mark);
> > > > +	err = getsockopt(client_fd, SOL_SOCKET, SO_MARK,
> > > > &mark,
> > > > &len);
> > > > +	ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)");
> > > > +
> > > >  	close(client_fd);
> > > >  fail:
> > > >  	close(server_fd);
> > > > @@ -417,6 +422,7 @@ static void test_subflow(void)
> > > >  	int cgroup_fd, prog_fd, err;
> > > >  	struct mptcp_subflow *skel;
> > > >  	struct nstoken *nstoken;
> > > > +	struct bpf_link *link;
> > > >  
> > > >  	cgroup_fd = test__join_cgroup("/mptcp_subflow");
> > > >  	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup:
> > > > mptcp_subflow"))
> > > > @@ -442,6 +448,10 @@ static void test_subflow(void)
> > > >  	if (endpoint_init("subflow") < 0)
> > > >  		goto close_netns;
> > > >  
> > > > +	link = bpf_program__attach_cgroup(skel-
> > > > >progs._getsockopt,
> > > > cgroup_fd);
> > > > +	if (!ASSERT_OK_PTR(link, "getsockopt prog"))
> > > 
> > > I don't know how it works: is this going to fail if the
> > > getsockopt
> > > program set 'ctx->retval = -1'?
> > 
> > Yes. ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)") will fail if
> > the
> > getsockopt program set 'ctx->retval = -1'.
> 
> Good, thank you!
> 
> > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > index 782f36ed027e..2086170e7379 100644
> > > > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> > > > @@ -7,6 +7,32 @@
> > > >  
> > > >  #define MPTCP_SUBFLOWS_MAX 8
> > > >  
> > > > +static inline int list_is_head(const struct list_head *list,
> > > > +			       const struct list_head *head)
> > > > +{
> > > > +	return list == head;
> > > > +}
> > > > +
> > > > +#define list_entry(ptr, type,
> > > > member)					\
> > > > +	container_of(ptr, type, member)
> > > > +
> > > > +#define list_first_entry(ptr, type,
> > > > member)				\
> > > > +	list_entry((ptr)->next, type, member)
> > > > +
> > > > +#define list_next_entry(pos,
> > > > member)					\
> > > > +	list_entry((pos)->member.next, typeof(*(pos)), member)
> > > > +
> > > > +#define list_entry_is_head(pos, head,
> > > > member)				\
> > > > +	list_is_head(&pos->member, (head))
> > > > +
> > > > +#define list_for_each_entry(pos, head,
> > > > member)				\
> > > > +	for (pos = list_first_entry(head, typeof(*pos),
> > > > member);	\
> > > > +	     !list_entry_is_head(pos, head,
> > > > member);			\
> > > > +	     pos = list_next_entry(pos, member))
> > > 
> > > Out of curiosity, are these generic helpers not defined
> > > elsewhere? Is
> > > it
> > > OK not to use the '_safe' version where the deletion of elements
> > > is
> > > supported?
> > 
> > It's read only in this test, so list_for_each_entry is enough. I
> > added
> > the '_safe' version in v2 too for future use.
> 
> OK! (but I don't think you defined list_for_each_entry_safe(). Or is
> it
> defined elsewhere?)
> 
> > > > +
> > > > +#define mptcp_for_each_subflow(__msk,
> > > > __subflow)			\
> > > > +	list_for_each_entry(__subflow, &((__msk)->conn_list),
> > > > node)
> > > > +
> > > >  extern void mptcp_subflow_set_scheduled(struct
> > > > mptcp_subflow_context *subflow,
> > > >  					bool scheduled)
> > > > __ksym;
> > > >  
> > > > diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > > > b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > > > index bc572e1d6df8..c41418ab8db4 100644
> > > > --- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > > > +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
> > > > @@ -4,6 +4,7 @@
> > > >  
> > > >  /* vmlinux.h, bpf_helpers.h and other 'define' */
> > > >  #include "bpf_tracing_net.h"
> > > > +#include "mptcp_bpf.h"
> > > >  
> > > >  char _license[] SEC("license") = "GPL";
> > > >  
> > > > @@ -57,3 +58,34 @@ int mptcp_subflow(struct bpf_sock_ops
> > > > *skops)
> > > >  
> > > >  	return 1;
> > > >  }
> > > > +
> > > > +SEC("cgroup/getsockopt")
> > > > +int _getsockopt(struct bpf_sockopt *ctx)
> > > 
> > > This name seems too generic while what is done here is specific
> > > to
> > > the
> > > 'subflow' test. Maybe: _check_getsockopt_subflows()?
> > 
> > Changed it to _getsockopt_subflow in v2.
> > 
> > > 
> > > (or _check_getsockopt_subflows_mark() and
> > > _check_getsockopt_subflow_cc()
> > > see below)
> > > 
> > > > +{
> > > > +	struct mptcp_sock *msk = bpf_core_cast(ctx->sk, struct
> > > > mptcp_sock);
> > > 
> > > What happens if the 'sk' is not an 'msk'?
> > > Does it check that the sk is indeed an MPTCP one? (how?)
> > 
> > Also check msk->token in v2.
> 
> Mmh, I'm not sure if this is enough: the offset of 'token' in the msk
> structure could point to something unrelated but non-null in another
> socket structure, or out of bound. I guess BPF is doing extra checks
> not
> to crash here. But maybe that's OK because it is only reading stuff?
> 
> > > > +	struct mptcp_subflow_context *subflow;
> > > > +	int i = 0;
> > > > +
> > > > +	if (!msk || ctx->level != SOL_SOCKET || ctx->optname
> > > > !=
> > > > SO_MARK)
> > > > +		return 1;
> > > 
> > > Would it not be clearer to split the two checks? One dedicated to
> > > the
> > > mark, when getsockopt(SO_MARK) is used, and one for the CC, when
> > > the
> > > getsockopt(TCP_CONGESTION) is used? By doing that, we would know
> > > which
> > > one had an issue, if any, not just "something wrong with
> > > getsockopt()"?
> > 
> > Done in v2.
> 
> Thanks!
> 
> > > > +
> > > > +	mptcp_for_each_subflow(msk, subflow) {
> > > 
> > > (might be better to use this helper in our WIP MPTCP BPF packets
> > > scheduler examples, instead of converting them to a fixed array)
> > 
> > Yes, but there are still some access permission issues that need to
> > be
> > resolved.
> 
> OK, because structures cannot be modified I suppose.

No, it seems the subflow cast by bpf_core_cast() can't be passed to a
kernel function, regardless of whether this function modifies the
subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error
occurs.

For example, mptcp_subflow_active() is a kernel function, and pass the
subflow to it in progs/mptcp_bpf_first.c like this:

'''
... ...
extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
__ksym;

... ...
SEC("struct_ops")
int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
             struct mptcp_sched_data *data)
{
        struct mptcp_subflow_context *subflow, *tmp;

        mptcp_for_each_subflow(msk, tmp) {
                subflow = bpf_core_cast(tmp, struct
mptcp_subflow_context);
                if (!mptcp_subflow_active(subflow))
                        continue;
        }
        return 0;
}

'''

An "arg#0 is untrusted_ptr_ expected ptr_" error occurs:

'''
; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
21: (e5) may_goto pc+1
22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
22: (05) goto pc-14
9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
10: (1f) r6 -= r7                     ; R6_w=ptr_list_head() R7=0
11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
R6_w=ptr_list_head()
12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
R8=trusted_ptr_mptcp_sock(off=2488)
; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
mptcp_bpf_first.c:28
14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
R6=ptr_list_head()
15: (18) r2 = 0x6d14                  ; R2_w=27924
17: (85) call bpf_rdonly_cast#159867          ;
R0_w=untrusted_ptr_mptcp_subflow_context()
; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29
18: (bf) r1 = r0                      ;
R0_w=untrusted_ptr_mptcp_subflow_context()
R1_w=untrusted_ptr_mptcp_subflow_context()
19: (85) call mptcp_subflow_active#111397
arg#0 is untrusted_ptr_ expected ptr_ or socket
processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2
peak_states 2 mark_read 2
-- END PROG LOAD LOG --
'''

How can I fix this? I need your advice.

Thanks,
-Geliang

> 
> > > 
> > > > +		struct inet_connection_sock *icsk;
> > > > +		struct sock *ssk;
> > > > +
> > > > +		ssk =
> > > > mptcp_subflow_tcp_sock(bpf_core_cast(subflow,
> > > > +							  
> > > > struct
> > > > mptcp_subflow_context));
> > > > +		icsk = bpf_core_cast(ssk, struct
> > > > inet_connection_sock);
> > > > +
> > > > +		if (ssk->sk_mark != i + 1)
> > > > +			ctx->retval = -1;
> > > > +		if (ssk->sk_mark == 1 &&
> > > > +		    __builtin_memcmp(icsk->icsk_ca_ops->name,
> > > > cc,
> > > > TCP_CA_NAME_MAX))
> > > > +			ctx->retval = -1;
> > > > +
> > > > +		if (i++ >= MPTCP_SUBFLOWS_MAX)
> > > > +			break;
> > > 
> > > I guess this part is needed for the verifier, right?
> > > Somehow related to "cond_break" explained in this link?
> > > 
> > >   https://lwn.net/Articles/964381/
> > > 
> > 
> > Change this as "cond_break" in v2.
> 
> Note that you could also modify list_for_each_entry() to add this
> 'cond_break' (+bpf_experimental.h?), not to have to deal with that
> each
> time mptcp_for_each_subflow is used.
> 
> Cheers,
> Matt

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Matthieu Baerts 2 months, 3 weeks ago

Hi Geliang,

On 26/08/2024 04:57, Geliang Tang wrote:
> On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote:
>> On 21/08/2024 10:00, Geliang Tang wrote:
>>> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
>>>> On 20/08/2024 10:44, Geliang Tang wrote:

(...)

>>>>> +	mptcp_for_each_subflow(msk, subflow) {
>>>>
>>>> (might be better to use this helper in our WIP MPTCP BPF packets
>>>> scheduler examples, instead of converting them to a fixed array)
>>>
>>> Yes, but there are still some access permission issues that need to
>>> be
>>> resolved.
>>
>> OK, because structures cannot be modified I suppose.
> 
> No, it seems the subflow cast by bpf_core_cast() can't be passed to a
> kernel function, regardless of whether this function modifies the
> subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error
> occurs.
> 
> For example, mptcp_subflow_active() is a kernel function, and pass the
> subflow to it in progs/mptcp_bpf_first.c like this:
> 
> '''
> ... ...
> extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow)
> __ksym;
> 
> ... ...
> SEC("struct_ops")
> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
>              struct mptcp_sched_data *data)
> {
>         struct mptcp_subflow_context *subflow, *tmp;
> 
>         mptcp_for_each_subflow(msk, tmp) {
>                 subflow = bpf_core_cast(tmp, struct
> mptcp_subflow_context);

(Why do you need to cast "tmp" (struct mptcp_subflow_context *) in the
same type of pointer? I don't think it changes anything for the error
you got (see below), but it looks strange.)

>                 if (!mptcp_subflow_active(subflow))
>                         continue;
>         }
>         return 0;
> }
> 
> '''
> 
> An "arg#0 is untrusted_ptr_ expected ptr_" error occurs:
> 
> '''
> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> 21: (e5) may_goto pc+1
> 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
> R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> 22: (05) goto pc-14
> 9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
> 10: (1f) r6 -= r7                     ; R6_w=ptr_list_head() R7=0
> 11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> R6_w=ptr_list_head()
> 12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
> 13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
> R8=trusted_ptr_mptcp_sock(off=2488)
> ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
> mptcp_bpf_first.c:28
> 14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> R6=ptr_list_head()
> 15: (18) r2 = 0x6d14                  ; R2_w=27924
> 17: (85) call bpf_rdonly_cast#159867          ;
> R0_w=untrusted_ptr_mptcp_subflow_context()
> ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29
> 18: (bf) r1 = r0                      ;
> R0_w=untrusted_ptr_mptcp_subflow_context()
> R1_w=untrusted_ptr_mptcp_subflow_context()
> 19: (85) call mptcp_subflow_active#111397
> arg#0 is untrusted_ptr_ expected ptr_ or socket
> processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2
> peak_states 2 mark_read 2
> -- END PROG LOAD LOG --
> '''
> 
> How can I fix this? I need your advice.

I'm not an expert in this, but I guess it means you cannot use
'mptcp_subflow_active()', because it can modify the 'subflow' structure
that you got with bpf_core_cast() for a read-only usage.

I guess it means we cannot iterate through the list and modify items
from the list "directly" with BPF. Except if there is something else we
can use, and I don't know about (which is very likely), we might have to
continue extracting the subflows into an array of a fixed size.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 2 months, 3 weeks ago

Hi Matt, Martin,

On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote:
> Hi Geliang,
> 
> On 26/08/2024 04:57, Geliang Tang wrote:
> > On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote:
> > > On 21/08/2024 10:00, Geliang Tang wrote:
> > > > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
> > > > > On 20/08/2024 10:44, Geliang Tang wrote:
> 
> (...)
> 
> > > > > > +	mptcp_for_each_subflow(msk, subflow) {
> > > > > 
> > > > > (might be better to use this helper in our WIP MPTCP BPF
> > > > > packets
> > > > > scheduler examples, instead of converting them to a fixed
> > > > > array)
> > > > 
> > > > Yes, but there are still some access permission issues that
> > > > need to
> > > > be
> > > > resolved.
> > > 
> > > OK, because structures cannot be modified I suppose.
> > 
> > No, it seems the subflow cast by bpf_core_cast() can't be passed to
> > a
> > kernel function, regardless of whether this function modifies the
> > subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error
> > occurs.
> > 
> > For example, mptcp_subflow_active() is a kernel function, and pass
> > the
> > subflow to it in progs/mptcp_bpf_first.c like this:
> > 
> > '''
> > ... ...
> > extern bool mptcp_subflow_active(struct mptcp_subflow_context
> > *subflow)
> > __ksym;
> > 
> > ... ...
> > SEC("struct_ops")
> > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
> >              struct mptcp_sched_data *data)
> > {
> >         struct mptcp_subflow_context *subflow, *tmp;
> > 
> >         mptcp_for_each_subflow(msk, tmp) {
> >                 subflow = bpf_core_cast(tmp, struct
> > mptcp_subflow_context);
> 
> (Why do you need to cast "tmp" (struct mptcp_subflow_context *) in
> the
> same type of pointer? I don't think it changes anything for the error
> you got (see below), but it looks strange.)

We must do the cast, otherwise, an "access beyond struct list_head"
occurs:

; token = subflow->token; @ mptcp_subflow.c:92
13: (61) r4 = *(u32 *)(r1 +524)
access beyond struct list_head at off 524 size 4

See Martin's comment in [1].

[1]
https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/

> 
> >                 if (!mptcp_subflow_active(subflow))
> >                         continue;
> >         }
> >         return 0;
> > }
> > 
> > '''
> > 
> > An "arg#0 is untrusted_ptr_ expected ptr_" error occurs:
> > 
> > '''
> > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> > 21: (e5) may_goto pc+1
> > 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
> > R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
> > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> > 22: (05) goto pc-14
> > 9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
> > 10: (1f) r6 -= r7                     ; R6_w=ptr_list_head() R7=0
> > 11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> > R6_w=ptr_list_head()
> > 12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
> > 13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
> > R8=trusted_ptr_mptcp_sock(off=2488)
> > ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
> > mptcp_bpf_first.c:28
> > 14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> > R6=ptr_list_head()
> > 15: (18) r2 = 0x6d14                  ; R2_w=27924
> > 17: (85) call bpf_rdonly_cast#159867          ;
> > R0_w=untrusted_ptr_mptcp_subflow_context()
> > ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29
> > 18: (bf) r1 = r0                      ;
> > R0_w=untrusted_ptr_mptcp_subflow_context()
> > R1_w=untrusted_ptr_mptcp_subflow_context()
> > 19: (85) call mptcp_subflow_active#111397
> > arg#0 is untrusted_ptr_ expected ptr_ or socket
> > processed 23 insns (limit 1000000) max_states_per_insn 0
> > total_states 2
> > peak_states 2 mark_read 2
> > -- END PROG LOAD LOG --
> > '''
> > 
> > How can I fix this? I need your advice.
> 
> I'm not an expert in this, but I guess it means you cannot use
> 'mptcp_subflow_active()', because it can modify the 'subflow'
> structure
> that you got with bpf_core_cast() for a read-only usage.

A read-only function will get the same error.

I added a read-only function mptcp_subflow_get_scheduled() for testing:

bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow)
{
        return subflow->scheduled;
}

And invoke it from BPF in mptcp_for_each_subflow() loop:

int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
             struct mptcp_sched_data *data)
{
        struct mptcp_subflow_context *subflow, *tmp;

        mptcp_for_each_subflow(msk, tmp) {
                subflow = bpf_core_cast(tmp, struct
mptcp_subflow_context);
                mptcp_subflow_get_scheduled(subflow);
        }
        return 0;
}

The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs.

Hope Martin can give us a solution for this issue.

Thanks,
-Geliang

> 
> I guess it means we cannot iterate through the list and modify items
> from the list "directly" with BPF. Except if there is something else
> we
> can use, and I don't know about (which is very likely), we might have
> to
> continue extracting the subflows into an array of a fixed size.
> 
> Cheers,
> Matt

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Martin KaFai Lau 2 months, 3 weeks ago

On 8/26/24 2:24 AM, Geliang Tang wrote:
>> I'm not an expert in this, but I guess it means you cannot use
>> 'mptcp_subflow_active()', because it can modify the 'subflow'
>> structure
>> that you got with bpf_core_cast() for a read-only usage.
> 
> A read-only function will get the same error.
> 
> I added a read-only function mptcp_subflow_get_scheduled() for testing:
> 
> bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow)
> {
>          return subflow->scheduled;
> }
> 
> And invoke it from BPF in mptcp_for_each_subflow() loop:
> 
> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
>               struct mptcp_sched_data *data)
> {
>          struct mptcp_subflow_context *subflow, *tmp;
> 
>          mptcp_for_each_subflow(msk, tmp) {
>                  subflow = bpf_core_cast(tmp, struct
> mptcp_subflow_context);
>                  mptcp_subflow_get_scheduled(subflow);
>          }
>          return 0;
> }
> 
> The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs.
> 
> Hope Martin can give us a solution for this issue.

I don't know the context for this list walking + modify-by-kfunc usage, so the 
following could be a grain of salt.

It seems like fitting the bpf_iter use case. Take a look at some recent bpf_iter 
additions, e.g. bpf_iter_{task,css}_next(). Also the bpf_for_each macro usage in 
selftests. There may be some secondary things that need to consider, e.g. how 
the walked subflow is protected, rcu, refcnt...etc.

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 2 months, 2 weeks ago

Hi Martin, Matt,

On Mon, 2024-08-26 at 22:22 -0700, Martin KaFai Lau wrote:
> On 8/26/24 2:24 AM, Geliang Tang wrote:
> > > I'm not an expert in this, but I guess it means you cannot use
> > > 'mptcp_subflow_active()', because it can modify the 'subflow'
> > > structure
> > > that you got with bpf_core_cast() for a read-only usage.
> > 
> > A read-only function will get the same error.
> > 
> > I added a read-only function mptcp_subflow_get_scheduled() for
> > testing:
> > 
> > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context
> > *subflow)
> > {
> >          return subflow->scheduled;
> > }
> > 
> > And invoke it from BPF in mptcp_for_each_subflow() loop:
> > 
> > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
> >               struct mptcp_sched_data *data)
> > {
> >          struct mptcp_subflow_context *subflow, *tmp;
> > 
> >          mptcp_for_each_subflow(msk, tmp) {
> >                  subflow = bpf_core_cast(tmp, struct
> > mptcp_subflow_context);
> >                  mptcp_subflow_get_scheduled(subflow);
> >          }
> >          return 0;
> > }
> > 
> > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs.
> > 
> > Hope Martin can give us a solution for this issue.
> 
> I don't know the context for this list walking + modify-by-kfunc
> usage, so the 
> following could be a grain of salt.
> 
> It seems like fitting the bpf_iter use case. Take a look at some
> recent bpf_iter 
> additions, e.g. bpf_iter_{task,css}_next(). Also the bpf_for_each
> macro usage in 
> selftests. There may be some secondary things that need to consider,
> e.g. how 
> the walked subflow is protected, rcu, refcnt...etc.

Great! Thanks. bpf_iter works well for MPTCP BPF scheduler.

I added a new bpf_iter type named "mptcp_subflow" in net/mptcp/bpf.c
like this:

'''
__bpf_kfunc_start_defs();

__bpf_kfunc int bpf_iter_mptcp_subflow_new(struct
bpf_iter_mptcp_subflow *it,
                struct mptcp_sock *msk, unsigned int flags)
{
        struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;

        kit->msk = msk;
        kit->pos = &msk->conn_list;
        spin_lock_bh(&msk->pm.lock);

        return 0;
}

__bpf_kfunc struct mptcp_subflow_context *
bpf_iter_mptcp_subflow_next(struct bpf_iter_mptcp_subflow *it)
{
        struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
        struct mptcp_subflow_context *subflow;
        struct mptcp_sock *msk = kit->msk;

        subflow = list_entry((kit->pos)->next, struct
mptcp_subflow_context, node);
        if (list_entry_is_head(subflow, &msk->conn_list, node))
                return NULL;

        kit->pos = &subflow->node;
        return subflow;
}

__bpf_kfunc void bpf_iter_mptcp_subflow_destroy(struct
bpf_iter_mptcp_subflow *it)
{
        struct bpf_iter_mptcp_subflow_kern *kit = (void *)it;
        struct mptcp_sock *msk = kit->msk;

        spin_unlock_bh(&msk->pm.lock);
}

__bpf_kfunc_end_defs();
'''

And use "bpf_for_each(mptcp_subflow)" like this in
progs/mptcp_bpf_burst.c:

'''
        i = 0;
        bpf_rcu_read_lock();
        bpf_for_each(mptcp_subflow, subflow, msk, 0) {
                bool backup = subflow->backup || subflow->request_bkup;

                if (i++ > MPTCP_SUBFLOWS_MAX)
                        break;

                ssk = mptcp_subflow_tcp_sock(subflow);
                if (!mptcp_subflow_active(subflow))
                        continue;

                nr_active += !backup;
                pace = subflow->avg_pacing_rate;
                if (!pace) {
                        /* init pacing rate from socket */
                        subflow->avg_pacing_rate = ssk->sk_pacing_rate;
                        pace = subflow->avg_pacing_rate;
                        if (!pace)
                                continue;
                }

                linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32,
pace);
                if (linger_time < send_info[backup].linger_time) {
                        send_info[backup].subflow_id = i;
                        send_info[backup].linger_time = linger_time;
                }
        }
        bpf_rcu_read_unlock();
'''

With this mptcp_subflow bpf_iter, we can get rid of the subflows array
"contexts" in struct mptcp_sched_data:

'''
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index c3d0ea07cf0c..a739f917b054 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -104,8 +104,6 @@ struct mptcp_out_options {
 
 struct mptcp_sched_data {
        bool    reinject;
-       u8      subflows;
-       struct mptcp_subflow_context *contexts[MPTCP_SUBFLOWS_MAX];
 };
 
 struct mptcp_sched_ops {
'''

I'll send out these patches for review soon, with Martin's "Suggested-
by" tags.

Thanks,
-Geliang

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Matthieu Baerts 2 months, 3 weeks ago

Hi Geliang, Martin,

On 26/08/2024 11:24, Geliang Tang wrote:
> Hi Matt, Martin,
> 
> On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote:
>> Hi Geliang,
>>
>> On 26/08/2024 04:57, Geliang Tang wrote:
>>> On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote:
>>>> On 21/08/2024 10:00, Geliang Tang wrote:
>>>>> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
>>>>>> On 20/08/2024 10:44, Geliang Tang wrote:
>>
>> (...)
>>
>>>>>>> +	mptcp_for_each_subflow(msk, subflow) {
>>>>>>
>>>>>> (might be better to use this helper in our WIP MPTCP BPF
>>>>>> packets
>>>>>> scheduler examples, instead of converting them to a fixed
>>>>>> array)
>>>>>
>>>>> Yes, but there are still some access permission issues that
>>>>> need to
>>>>> be
>>>>> resolved.
>>>>
>>>> OK, because structures cannot be modified I suppose.
>>>
>>> No, it seems the subflow cast by bpf_core_cast() can't be passed to
>>> a
>>> kernel function, regardless of whether this function modifies the
>>> subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_" error
>>> occurs.
>>>
>>> For example, mptcp_subflow_active() is a kernel function, and pass
>>> the
>>> subflow to it in progs/mptcp_bpf_first.c like this:
>>>
>>> '''
>>> ... ...
>>> extern bool mptcp_subflow_active(struct mptcp_subflow_context
>>> *subflow)
>>> __ksym;
>>>
>>> ... ...
>>> SEC("struct_ops")
>>> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
>>>              struct mptcp_sched_data *data)
>>> {
>>>         struct mptcp_subflow_context *subflow, *tmp;
>>>
>>>         mptcp_for_each_subflow(msk, tmp) {
>>>                 subflow = bpf_core_cast(tmp, struct
>>> mptcp_subflow_context);
>>
>> (Why do you need to cast "tmp" (struct mptcp_subflow_context *) in
>> the
>> same type of pointer? I don't think it changes anything for the error
>> you got (see below), but it looks strange.)
> 
> We must do the cast, otherwise, an "access beyond struct list_head"
> occurs:
> 
> ; token = subflow->token; @ mptcp_subflow.c:92
> 13: (61) r4 = *(u32 *)(r1 +524)
> access beyond struct list_head at off 524 size 4

OK!

> 
> See Martin's comment in [1].
> 
> [1]
> https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/
> 
>>
>>>                 if (!mptcp_subflow_active(subflow))
>>>                         continue;
>>>         }
>>>         return 0;
>>> }
>>>
>>> '''
>>>
>>> An "arg#0 is untrusted_ptr_ expected ptr_" error occurs:
>>>
>>> '''
>>> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
>>> 21: (e5) may_goto pc+1
>>> 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
>>> R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
>>> ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
>>> 22: (05) goto pc-14
>>> 9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
>>> 10: (1f) r6 -= r7                     ; R6_w=ptr_list_head() R7=0
>>> 11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
>>> R6_w=ptr_list_head()
>>> 12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
>>> 13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
>>> R8=trusted_ptr_mptcp_sock(off=2488)
>>> ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
>>> mptcp_bpf_first.c:28
>>> 14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
>>> R6=ptr_list_head()
>>> 15: (18) r2 = 0x6d14                  ; R2_w=27924
>>> 17: (85) call bpf_rdonly_cast#159867          ;
>>> R0_w=untrusted_ptr_mptcp_subflow_context()
>>> ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29
>>> 18: (bf) r1 = r0                      ;
>>> R0_w=untrusted_ptr_mptcp_subflow_context()
>>> R1_w=untrusted_ptr_mptcp_subflow_context()
>>> 19: (85) call mptcp_subflow_active#111397
>>> arg#0 is untrusted_ptr_ expected ptr_ or socket
>>> processed 23 insns (limit 1000000) max_states_per_insn 0
>>> total_states 2
>>> peak_states 2 mark_read 2
>>> -- END PROG LOAD LOG --
>>> '''
>>>
>>> How can I fix this? I need your advice.
>>
>> I'm not an expert in this, but I guess it means you cannot use
>> 'mptcp_subflow_active()', because it can modify the 'subflow'
>> structure
>> that you got with bpf_core_cast() for a read-only usage.
> 
> A read-only function will get the same error.
> 
> I added a read-only function mptcp_subflow_get_scheduled() for testing:
> 
> bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context *subflow)

Do you have the same issue if '*subflow' is marked as 'const'?

> {
>         return subflow->scheduled;
> }
> 
> And invoke it from BPF in mptcp_for_each_subflow() loop:
> 
> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
>              struct mptcp_sched_data *data)
> {
>         struct mptcp_subflow_context *subflow, *tmp;
> 
>         mptcp_for_each_subflow(msk, tmp) {
>                 subflow = bpf_core_cast(tmp, struct
> mptcp_subflow_context);
>                 mptcp_subflow_get_scheduled(subflow);
>         }
>         return 0;
> }
> 
> The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs.
> 
> Hope Martin can give us a solution for this issue.
> 
> Thanks,
> -Geliang
> 
>>
>> I guess it means we cannot iterate through the list and modify items
>> from the list "directly" with BPF. Except if there is something else
>> we
>> can use, and I don't know about (which is very likely), we might have
>> to
>> continue extracting the subflows into an array of a fixed size.
>>
>> Cheers,
>> Matt
> 

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Geliang Tang 2 months, 3 weeks ago

On Mon, 2024-08-26 at 11:49 +0200, Matthieu Baerts wrote:
> Hi Geliang, Martin,
> 
> On 26/08/2024 11:24, Geliang Tang wrote:
> > Hi Matt, Martin,
> > 
> > On Mon, 2024-08-26 at 10:44 +0200, Matthieu Baerts wrote:
> > > Hi Geliang,
> > > 
> > > On 26/08/2024 04:57, Geliang Tang wrote:
> > > > On Wed, 2024-08-21 at 11:37 +0200, Matthieu Baerts wrote:
> > > > > On 21/08/2024 10:00, Geliang Tang wrote:
> > > > > > On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
> > > > > > > On 20/08/2024 10:44, Geliang Tang wrote:
> > > 
> > > (...)
> > > 
> > > > > > > > +	mptcp_for_each_subflow(msk, subflow) {
> > > > > > > 
> > > > > > > (might be better to use this helper in our WIP MPTCP BPF
> > > > > > > packets
> > > > > > > scheduler examples, instead of converting them to a fixed
> > > > > > > array)
> > > > > > 
> > > > > > Yes, but there are still some access permission issues that
> > > > > > need to
> > > > > > be
> > > > > > resolved.
> > > > > 
> > > > > OK, because structures cannot be modified I suppose.
> > > > 
> > > > No, it seems the subflow cast by bpf_core_cast() can't be
> > > > passed to
> > > > a
> > > > kernel function, regardless of whether this function modifies
> > > > the
> > > > subflow or not. An "arg#0 is untrusted_ptr_ expected ptr_"
> > > > error
> > > > occurs.
> > > > 
> > > > For example, mptcp_subflow_active() is a kernel function, and
> > > > pass
> > > > the
> > > > subflow to it in progs/mptcp_bpf_first.c like this:
> > > > 
> > > > '''
> > > > ... ...
> > > > extern bool mptcp_subflow_active(struct mptcp_subflow_context
> > > > *subflow)
> > > > __ksym;
> > > > 
> > > > ... ...
> > > > SEC("struct_ops")
> > > > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
> > > >              struct mptcp_sched_data *data)
> > > > {
> > > >         struct mptcp_subflow_context *subflow, *tmp;
> > > > 
> > > >         mptcp_for_each_subflow(msk, tmp) {
> > > >                 subflow = bpf_core_cast(tmp, struct
> > > > mptcp_subflow_context);
> > > 
> > > (Why do you need to cast "tmp" (struct mptcp_subflow_context *)
> > > in
> > > the
> > > same type of pointer? I don't think it changes anything for the
> > > error
> > > you got (see below), but it looks strange.)
> > 
> > We must do the cast, otherwise, an "access beyond struct list_head"
> > occurs:
> > 
> > ; token = subflow->token; @ mptcp_subflow.c:92
> > 13: (61) r4 = *(u32 *)(r1 +524)
> > access beyond struct list_head at off 524 size 4
> 
> OK!
> 
> > 
> > See Martin's comment in [1].
> > 
> > [1]
> > https://patchwork.kernel.org/project/netdevbpf/patch/20240805-upstream-bpf-next-20240506-mptcp-subflow-test-v4-2-2b4ca6994993@kernel.org/
> > 
> > > 
> > > >                 if (!mptcp_subflow_active(subflow))
> > > >                         continue;
> > > >         }
> > > >         return 0;
> > > > }
> > > > 
> > > > '''
> > > > 
> > > > An "arg#0 is untrusted_ptr_ expected ptr_" error occurs:
> > > > 
> > > > '''
> > > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> > > > 21: (e5) may_goto pc+1
> > > > 22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
> > > > R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
> > > > ; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
> > > > 22: (05) goto pc-14
> > > > 9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
> > > > 10: (1f) r6 -= r7                     ; R6_w=ptr_list_head()
> > > > R7=0
> > > > 11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> > > > R6_w=ptr_list_head()
> > > > 12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
> > > > 13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
> > > > R8=trusted_ptr_mptcp_sock(off=2488)
> > > > ; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
> > > > mptcp_bpf_first.c:28
> > > > 14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
> > > > R6=ptr_list_head()
> > > > 15: (18) r2 = 0x6d14                  ; R2_w=27924
> > > > 17: (85) call bpf_rdonly_cast#159867          ;
> > > > R0_w=untrusted_ptr_mptcp_subflow_context()
> > > > ; if (!mptcp_subflow_active(subflow)) @ mptcp_bpf_first.c:29
> > > > 18: (bf) r1 = r0                      ;
> > > > R0_w=untrusted_ptr_mptcp_subflow_context()
> > > > R1_w=untrusted_ptr_mptcp_subflow_context()
> > > > 19: (85) call mptcp_subflow_active#111397
> > > > arg#0 is untrusted_ptr_ expected ptr_ or socket
> > > > processed 23 insns (limit 1000000) max_states_per_insn 0
> > > > total_states 2
> > > > peak_states 2 mark_read 2
> > > > -- END PROG LOAD LOG --
> > > > '''
> > > > 
> > > > How can I fix this? I need your advice.
> > > 
> > > I'm not an expert in this, but I guess it means you cannot use
> > > 'mptcp_subflow_active()', because it can modify the 'subflow'
> > > structure
> > > that you got with bpf_core_cast() for a read-only usage.
> > 
> > A read-only function will get the same error.
> > 
> > I added a read-only function mptcp_subflow_get_scheduled() for
> > testing:
> > 
> > bool mptcp_subflow_get_scheduled(struct mptcp_subflow_context
> > *subflow)
> 
> Do you have the same issue if '*subflow' is marked as 'const'?

Yes, the same error occurs:

'''
libbpf: prog 'bpf_first_get_subflow': -- BEGIN PROG LOAD LOG --
0: R1=ctx() R10=fp0
; int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk, @
mptcp_bpf_first.c:22
0: (b7) r7 = 0                        ; R7_w=0
1: (b7) r2 = 2488                     ; R2_w=2488
2: (79) r8 = *(u64 *)(r1 +0)
func 'get_subflow' arg0 has btf_id 27737 type STRUCT 'mptcp_sock'
3: R1=ctx() R8_w=trusted_ptr_mptcp_sock()
3: (bf) r6 = r8                       ; R6_w=trusted_ptr_mptcp_sock()
R8_w=trusted_ptr_mptcp_sock()
4: (0f) r6 += r2                      ; R2_w=2488
R6_w=trusted_ptr_mptcp_sock(off=2488)
5: (b7) r1 = 2488                     ; R1_w=2488
6: (0f) r8 += r1                      ; R1_w=2488
R8_w=trusted_ptr_mptcp_sock(off=2488)
7: (b7) r9 = 0                        ; R9_w=0
8: (05) goto pc+12
; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
21: (e5) may_goto pc+1
22: R1=2488 R2=2488 R6=trusted_ptr_mptcp_sock(off=2488) R7=0
R8=trusted_ptr_mptcp_sock(off=2488) R9=0 R10=fp0
; mptcp_for_each_subflow(msk, tmp) { @ mptcp_bpf_first.c:27
22: (05) goto pc-14
9: (79) r6 = *(u64 *)(r6 +0)          ; R6_w=ptr_list_head()
10: (1f) r6 -= r7                     ; R6_w=ptr_list_head() R7=0
11: (bf) r1 = r6                      ; R1_w=ptr_list_head()
R6_w=ptr_list_head()
12: (0f) r1 += r7                     ; R1=ptr_list_head() R7=0
13: (1d) if r1 == r8 goto pc+9        ; R1=ptr_list_head()
R8=trusted_ptr_mptcp_sock(off=2488)
; subflow = bpf_core_cast(tmp, struct mptcp_subflow_context); @
mptcp_bpf_first.c:28
14: (bf) r1 = r6                      ; R1_w=ptr_list_head()
R6=ptr_list_head()
15: (18) r2 = 0x6c55                  ; R2_w=27733
17: (85) call bpf_rdonly_cast#122463          ;
R0_w=untrusted_ptr_mptcp_subflow_context()
; mptcp_subflow_get_scheduled(subflow); @ mptcp_bpf_first.c:29
18: (bf) r1 = r0                      ;
R0_w=untrusted_ptr_mptcp_subflow_context()
R1_w=untrusted_ptr_mptcp_subflow_context()
19: (85) call mptcp_subflow_get_scheduled#127716
arg#0 is untrusted_ptr_ expected ptr_ or socket
processed 23 insns (limit 1000000) max_states_per_insn 0 total_states 2
peak_states 2 mark_read 2
-- END PROG LOAD LOG --
'''

net/mptcp/sched.c:

'''
bool mptcp_subflow_get_scheduled(const struct mptcp_subflow_context
*subflow)
{
       return subflow->scheduled;
}
'''

progs/mptcp_bpf_first.c:

'''
extern bool mptcp_subflow_get_scheduled(const struct
mptcp_subflow_context *subflow) __ksym;

... ...

SEC("struct_ops")
int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
             struct mptcp_sched_data *data)
{
        struct mptcp_subflow_context *subflow, *tmp;

        mptcp_for_each_subflow(msk, tmp) {
                subflow = bpf_core_cast(tmp, struct
mptcp_subflow_context);
                mptcp_subflow_get_scheduled(subflow);
        }
        return 0;
}
'''

Thanks,
-Geliang

> 
> > {
> >         return subflow->scheduled;
> > }
> > 
> > And invoke it from BPF in mptcp_for_each_subflow() loop:
> > 
> > int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
> >              struct mptcp_sched_data *data)
> > {
> >         struct mptcp_subflow_context *subflow, *tmp;
> > 
> >         mptcp_for_each_subflow(msk, tmp) {
> >                 subflow = bpf_core_cast(tmp, struct
> > mptcp_subflow_context);
> >                 mptcp_subflow_get_scheduled(subflow);
> >         }
> >         return 0;
> > }
> > 
> > The same "arg#0 is untrusted_ptr_ expected ptr_ or socket" occurs.
> > 
> > Hope Martin can give us a solution for this issue.
> > 
> > Thanks,
> > -Geliang
> > 
> > > 
> > > I guess it means we cannot iterate through the list and modify
> > > items
> > > from the list "directly" with BPF. Except if there is something
> > > else
> > > we
> > > can use, and I don't know about (which is very likely), we might
> > > have
> > > to
> > > continue extracting the subflows into an array of a fixed size.
> > > 
> > > Cheers,
> > > Matt
> > 
> 
> Cheers,
> Matt

Re: [PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow

Posted by Martin KaFai Lau 3 months ago

On 8/21/24 2:37 AM, Matthieu Baerts wrote:
> Hi Geliang,
> 
> Thank you for your reply!
> 
> On 21/08/2024 10:00, Geliang Tang wrote:
>> On Tue, 2024-08-20 at 11:48 +0200, Matthieu Baerts wrote:
>>> On 20/08/2024 10:44, Geliang Tang wrote:
> (...)
> 
>>>> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>>> b/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>>> index 69fdcb28249d..2178db94f764 100644
>>>> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>>> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
>>>> @@ -383,6 +383,7 @@ static void run_subflow(char *new)
>>>>   {
>>>>   	int server_fd, client_fd, err;
>>>>   	char cc[TCP_CA_NAME_MAX];
>>>> +	unsigned int mark;
>>>>   	socklen_t len;
>>>>   
>>>>   	server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1,
>>>> 0);
>>>> @@ -407,6 +408,10 @@ static void run_subflow(char *new)
>>>>   	ASSERT_OK(ss_search(ADDR_1, new), "ss_search new cc");
>>>>   	ASSERT_OK(ss_search(ADDR_2, cc), "ss_search default cc");
>>> I guess the next steps is to remove these 'ss_search()', right?
>> We can keep it for double checks.
> I would not mind, even if 'ss | grep' is used in other BPF tests, but I
> understood from Martin he prefers not to depend on external tools as
> much as possible. It should indeed ease the maintenance.

I would prefer to avoid the 'ss | grep'. The other bpf selftests using this "ss 
| grep" should be in the .sh which is not in the test_progs and not run by bpf 
CI. They are there to loop-waiting for something. The ss usage hopefully will be 
removed when eventually migrating the .sh tests to test_progs. At least the one 
in test_tc_tunnel.sh should be doable once moved to test_progs. Haven't looked 
at the test_xdp_features.sh yet.

Thanks for trying the getsockopt approach and it turns out working well.

[PATCH mptcp-next 1/2] Squash to "selftests/bpf: Add mptcp subflow subtest"
[PATCH mptcp-next 2/2] selftests/bpf: Add getsockopt to inspect mptcp subflow