[PATCH mptcp-next v9 06/13] Squash to "selftests/bpf: Add bpf_first scheduler & test"

Geliang Tang posted 13 patches 3 weeks, 3 days ago
[PATCH mptcp-next v9 06/13] Squash to "selftests/bpf: Add bpf_first scheduler & test"
Posted by Geliang Tang 3 weeks, 3 days ago
From: Geliang Tang <tanggeliang@kylinos.cn>

1. Update sched_init.

2. For drop bpf_object__find_map_by_name in test_bpf_sched(), change the
first parameter of it as bpf_map.

3. Implement mptcp_subflow_set_scheduled in BPF.

4. Drop bpf_mptcp_subflow_ctx_by_pos.

5. Use the newly added bpf_for_each() helper to walk the conn_list.

Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 tools/testing/selftests/bpf/prog_tests/mptcp.c    | 15 +++++++++------
 tools/testing/selftests/bpf/progs/mptcp_bpf.h     | 14 ++++++++------
 .../testing/selftests/bpf/progs/mptcp_bpf_first.c |  4 ++--
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index b00af99c5c9d..a6574a537679 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -645,27 +645,30 @@ static void test_default(void)
 	netns_free(netns);
 }
 
-static void test_bpf_sched(struct bpf_object *obj, char *sched,
+static void test_bpf_sched(struct bpf_map *map, char *sched,
 			   bool addr1, bool addr2)
 {
 	char bpf_sched[MPTCP_SCHED_NAME_MAX] = "bpf_";
 	struct netns_obj *netns;
 	struct bpf_link *link;
-	struct bpf_map *map;
+	int err;
 
 	if (!ASSERT_LT(strlen(bpf_sched) + strlen(sched),
 		       MPTCP_SCHED_NAME_MAX, "Scheduler name too long"))
 		return;
 
-	map = bpf_object__find_map_by_name(obj, sched);
 	link = bpf_map__attach_struct_ops(map);
-	if (CHECK(!link, sched, "attach_struct_ops: %d\n", errno))
+	if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
 		return;
 
-	netns = sched_init("subflow", strcat(bpf_sched, sched));
+	netns = netns_new(NS_TEST, true);
 	if (!netns)
 		goto fail;
 
+	err = sched_init("subflow", strcat(bpf_sched, sched));
+	if (!ASSERT_OK(err, "sched_init"))
+		goto fail;
+
 	send_data_and_verify(sched, addr1, addr2);
 
 fail:
@@ -681,7 +684,7 @@ static void test_first(void)
 	if (!ASSERT_OK_PTR(skel, "open_and_load: first"))
 		return;
 
-	test_bpf_sched(skel->obj, "first", WITH_DATA, WITHOUT_DATA);
+	test_bpf_sched(skel->maps.first, "first", WITH_DATA, WITHOUT_DATA);
 	mptcp_bpf_first__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
index 3b20cfd44505..f8c917dc2c1c 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
@@ -42,6 +42,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
 	return subflow->tcp_sock;
 }
 
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+
+static __always_inline void
+mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled)
+{
+	WRITE_ONCE(subflow->scheduled, scheduled);
+}
+
 /* ksym */
 extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym;
 extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym;
@@ -52,10 +60,4 @@ bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
 extern struct sock *
 bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym;
 
-extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
-					bool scheduled) __ksym;
-
-extern struct mptcp_subflow_context *
-bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
-
 #endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
index d57399b407a7..5d0f89c636f0 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
@@ -20,11 +20,11 @@ SEC("struct_ops")
 int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
 	     struct mptcp_sched_data *data)
 {
-	mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true);
+	mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx(msk->first), true);
 	return 0;
 }
 
-SEC(".struct_ops")
+SEC(".struct_ops.link")
 struct mptcp_sched_ops first = {
 	.init		= (void *)mptcp_sched_first_init,
 	.release	= (void *)mptcp_sched_first_release,
-- 
2.45.2
Re: [PATCH mptcp-next v9 06/13] Squash to "selftests/bpf: Add bpf_first scheduler & test"
Posted by Mat Martineau 1 week, 3 days ago
On Wed, 30 Oct 2024, Geliang Tang wrote:

> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> 1. Update sched_init.
>
> 2. For drop bpf_object__find_map_by_name in test_bpf_sched(), change the
> first parameter of it as bpf_map.
>
> 3. Implement mptcp_subflow_set_scheduled in BPF.
>
> 4. Drop bpf_mptcp_subflow_ctx_by_pos.
>
> 5. Use the newly added bpf_for_each() helper to walk the conn_list.
>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> tools/testing/selftests/bpf/prog_tests/mptcp.c    | 15 +++++++++------
> tools/testing/selftests/bpf/progs/mptcp_bpf.h     | 14 ++++++++------
> .../testing/selftests/bpf/progs/mptcp_bpf_first.c |  4 ++--
> 3 files changed, 19 insertions(+), 14 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> index b00af99c5c9d..a6574a537679 100644
> --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
> +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
> @@ -645,27 +645,30 @@ static void test_default(void)
> 	netns_free(netns);
> }
>
> -static void test_bpf_sched(struct bpf_object *obj, char *sched,
> +static void test_bpf_sched(struct bpf_map *map, char *sched,
> 			   bool addr1, bool addr2)
> {
> 	char bpf_sched[MPTCP_SCHED_NAME_MAX] = "bpf_";
> 	struct netns_obj *netns;
> 	struct bpf_link *link;
> -	struct bpf_map *map;
> +	int err;
>
> 	if (!ASSERT_LT(strlen(bpf_sched) + strlen(sched),
> 		       MPTCP_SCHED_NAME_MAX, "Scheduler name too long"))
> 		return;
>
> -	map = bpf_object__find_map_by_name(obj, sched);
> 	link = bpf_map__attach_struct_ops(map);
> -	if (CHECK(!link, sched, "attach_struct_ops: %d\n", errno))
> +	if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
> 		return;
>
> -	netns = sched_init("subflow", strcat(bpf_sched, sched));
> +	netns = netns_new(NS_TEST, true);
> 	if (!netns)
> 		goto fail;
>
> +	err = sched_init("subflow", strcat(bpf_sched, sched));
> +	if (!ASSERT_OK(err, "sched_init"))
> +		goto fail;
> +
> 	send_data_and_verify(sched, addr1, addr2);
>
> fail:
> @@ -681,7 +684,7 @@ static void test_first(void)
> 	if (!ASSERT_OK_PTR(skel, "open_and_load: first"))
> 		return;
>
> -	test_bpf_sched(skel->obj, "first", WITH_DATA, WITHOUT_DATA);
> +	test_bpf_sched(skel->maps.first, "first", WITH_DATA, WITHOUT_DATA);
> 	mptcp_bpf_first__destroy(skel);
> }
>
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> index 3b20cfd44505..f8c917dc2c1c 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h
> @@ -42,6 +42,14 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> 	return subflow->tcp_sock;
> }
>

Hi Geliang -

Thanks for the updates to this series. The change to a BPF-based helper 
does work around the pointer-handling issue, but it's important to be sure 
WRITE_ONCE() is being handled correctly.

> +#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))

We initially added the C version of the mptcp_subflow_set_scheduled() 
helper because it wasn't clear how to handle WRITE_ONCE() in BPF. The only 
other implementation I found is in the cilium code (also GPLv2):

https://github.com/cilium/cilium/blob/main/bpf/include/bpf/compiler.h

It's a lot more complex than the macro added above (using an asm statement 
to set up the memory barrier). I don't know enough about low-level BPF and 
BPF toolchains to know that the cilium WRITE_ONCE() technique works 
everywhere - anyone else have a comment on this?

Another concern I have is that a lot of boilerplate BPF code is needed to 
set the scheduled bit in every BPF scheduler, and it's easy for a 
scheduler author to just skip the WRITE_ONCE() stuff and possibly 
introduce subtle write-ordering bugs. The C helper was easy to use 
correctly.

I'm not sure what the correct solution is yet. Geliang, do you have any 
ideas for an approach that works around the kfunc pointer limitations and 
also prevents WRITE_ONCE() bugs?

- Mat


> +
> +static __always_inline void
> +mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, bool scheduled)
> +{
> +	WRITE_ONCE(subflow->scheduled, scheduled);
> +}
> +
> /* ksym */
> extern struct mptcp_sock *bpf_mptcp_sock_acquire(struct mptcp_sock *msk) __ksym;
> extern void bpf_mptcp_sock_release(struct mptcp_sock *msk) __ksym;
> @@ -52,10 +60,4 @@ bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
> extern struct sock *
> bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym;
>
> -extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
> -					bool scheduled) __ksym;
> -
> -extern struct mptcp_subflow_context *
> -bpf_mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
> -
> #endif
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> index d57399b407a7..5d0f89c636f0 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_first.c
> @@ -20,11 +20,11 @@ SEC("struct_ops")
> int BPF_PROG(bpf_first_get_subflow, struct mptcp_sock *msk,
> 	     struct mptcp_sched_data *data)
> {
> -	mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx_by_pos(data, 0), true);
> +	mptcp_subflow_set_scheduled(bpf_mptcp_subflow_ctx(msk->first), true);
> 	return 0;
> }
>
> -SEC(".struct_ops")
> +SEC(".struct_ops.link")
> struct mptcp_sched_ops first = {
> 	.init		= (void *)mptcp_sched_first_init,
> 	.release	= (void *)mptcp_sched_first_release,
> -- 
> 2.45.2
>
>
>