[PATCH mptcp-next v11 10/11] Squash to "selftests/bpf: Add bpf_rr scheduler"

Geliang Tang posted 11 patches 2 years, 7 months ago
Maintainers: Matthieu Baerts <matthieu.baerts@tessares.net>, Mat Martineau <martineau@kernel.org>, "David S. Miller" <davem@davemloft.net>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Andrii Nakryiko <andrii@kernel.org>, Mykola Lysenko <mykolal@fb.com>, Alexei Starovoitov <ast@kernel.org>, Daniel Borkmann <daniel@iogearbox.net>, Martin KaFai Lau <martin.lau@linux.dev>, Song Liu <song@kernel.org>, Yonghong Song <yhs@fb.com>, John Fastabend <john.fastabend@gmail.com>, KP Singh <kpsingh@kernel.org>, Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>, Jiri Olsa <jolsa@kernel.org>, Shuah Khan <shuah@kernel.org>
[PATCH mptcp-next v11 10/11] Squash to "selftests/bpf: Add bpf_rr scheduler"
Posted by Geliang Tang 2 years, 7 months ago
Use sk_storage to store last_snd, instead of using msk->last_snd.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 tools/testing/selftests/bpf/bpf_tcp_helpers.h |  6 ++-
 .../selftests/bpf/progs/mptcp_bpf_rr.c        | 44 ++++++++++++++-----
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index b4b766c7a68f..945dd46c98c0 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -259,7 +259,6 @@ struct mptcp_sched_ops {
 struct mptcp_sock {
 	struct inet_connection_sock	sk;
 
-	struct sock	*last_snd;
 	__u32		token;
 	struct sock	*first;
 	char		ca_name[TCP_CA_NAME_MAX];
@@ -271,5 +270,10 @@ extern void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk,
 					  struct mptcp_sched_data *data) __ksym;
 extern struct mptcp_subflow_context *
 mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
+static inline struct sock *
+mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
+{
+	return subflow->tcp_sock;
+}
 
 #endif
diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
index e101428e5906..21144e96ba56 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
@@ -6,33 +6,49 @@
 
 char _license[] SEC("license") = "GPL";
 
+struct mptcp_rr_storage {
+	struct sock *last_snd;
+};
+struct sock *last_snd;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct mptcp_rr_storage);
+} mptcp_rr_map SEC(".maps");
+
 SEC("struct_ops/mptcp_sched_rr_init")
-void BPF_PROG(mptcp_sched_rr_init, const struct mptcp_sock *msk)
+void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
 {
 }
 
 SEC("struct_ops/mptcp_sched_rr_release")
-void BPF_PROG(mptcp_sched_rr_release, const struct mptcp_sock *msk)
+void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
 {
+	bpf_sk_storage_delete(&mptcp_rr_map, msk);
 }
 
-void BPF_STRUCT_OPS(bpf_rr_data_init, const struct mptcp_sock *msk,
+void BPF_STRUCT_OPS(bpf_rr_data_init, struct mptcp_sock *msk,
 		    struct mptcp_sched_data *data)
 {
 	mptcp_sched_data_set_contexts(msk, data);
 }
 
-int BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
-		   struct mptcp_sched_data *data)
+int BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
+		   const struct mptcp_sched_data *data)
 {
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_rr_storage *ptr;
 	int nr = 0;
 
-	for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
-		if (!msk->last_snd || !data->contexts[i])
+	for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
+		subflow = mptcp_subflow_ctx_by_pos(data, i);
+		if (!last_snd || !subflow)
 			break;
 
-		if (data->contexts[i]->tcp_sock == msk->last_snd) {
-			if (i + 1 == MPTCP_SUBFLOWS_MAX || !data->contexts[i + 1])
+		if (mptcp_subflow_tcp_sock(subflow) == last_snd) {
+			if (i + 1 == MPTCP_SUBFLOWS_MAX || !mptcp_subflow_ctx_by_pos(data, i + 1))
 				break;
 
 			nr = i + 1;
@@ -40,7 +56,15 @@ int BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
 		}
 	}
 
-	mptcp_subflow_set_scheduled(data->contexts[nr], true);
+	subflow = mptcp_subflow_ctx_by_pos(data, nr);
+	if (!subflow)
+		return -1;
+	mptcp_subflow_set_scheduled(subflow, true);
+	last_snd = mptcp_subflow_tcp_sock(subflow);
+	ptr = bpf_sk_storage_get(&mptcp_rr_map, msk, 0,
+				 BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (ptr)
+		ptr->last_snd = last_snd;
 	return 0;
 }
 
-- 
2.35.3
Re: [PATCH mptcp-next v11 10/11] Squash to "selftests/bpf: Add bpf_rr scheduler"
Posted by Paolo Abeni 2 years, 7 months ago
On Tue, 2023-06-27 at 09:06 +0800, Geliang Tang wrote:
> Use sk_storage to store last_snd, instead of using msk->last_snd.
> 
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
>  tools/testing/selftests/bpf/bpf_tcp_helpers.h |  6 ++-
>  .../selftests/bpf/progs/mptcp_bpf_rr.c        | 44 ++++++++++++++-----
>  2 files changed, 39 insertions(+), 11 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> index b4b766c7a68f..945dd46c98c0 100644
> --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> @@ -259,7 +259,6 @@ struct mptcp_sched_ops {
>  struct mptcp_sock {
>  	struct inet_connection_sock	sk;
>  
> -	struct sock	*last_snd;
>  	__u32		token;
>  	struct sock	*first;
>  	char		ca_name[TCP_CA_NAME_MAX];
> @@ -271,5 +270,10 @@ extern void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk,
>  					  struct mptcp_sched_data *data) __ksym;
>  extern struct mptcp_subflow_context *
>  mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
> +static inline struct sock *
> +mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> +{
> +	return subflow->tcp_sock;
> +}
>  
>  #endif
> diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> index e101428e5906..21144e96ba56 100644
> --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> @@ -6,33 +6,49 @@
>  
>  char _license[] SEC("license") = "GPL";
>  
> +struct mptcp_rr_storage {
> +	struct sock *last_snd;
> +};
> +struct sock *last_snd;
> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
> +	__uint(map_flags, BPF_F_NO_PREALLOC);
> +	__type(key, int);
> +	__type(value, struct mptcp_rr_storage);
> +} mptcp_rr_map SEC(".maps");
> +
>  SEC("struct_ops/mptcp_sched_rr_init")
> -void BPF_PROG(mptcp_sched_rr_init, const struct mptcp_sock *msk)
> +void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
>  {
>  }
>  
>  SEC("struct_ops/mptcp_sched_rr_release")
> -void BPF_PROG(mptcp_sched_rr_release, const struct mptcp_sock *msk)
> +void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
>  {
> +	bpf_sk_storage_delete(&mptcp_rr_map, msk);
>  }
>  
> -void BPF_STRUCT_OPS(bpf_rr_data_init, const struct mptcp_sock *msk,
> +void BPF_STRUCT_OPS(bpf_rr_data_init, struct mptcp_sock *msk,
>  		    struct mptcp_sched_data *data)
>  {
>  	mptcp_sched_data_set_contexts(msk, data);
>  }
>  
> -int BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
> -		   struct mptcp_sched_data *data)
> +int BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
> +		   const struct mptcp_sched_data *data)
>  {
> +	struct mptcp_subflow_context *subflow;
> +	struct mptcp_rr_storage *ptr;
>  	int nr = 0;
>  
> -	for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> -		if (!msk->last_snd || !data->contexts[i])
> +	for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
> +		subflow = mptcp_subflow_ctx_by_pos(data, i);
> +		if (!last_snd || !subflow)

It looks like this is accessing a global variable ('last_snd'), which
is probably not very safe.

I think you could instead use inet_csk(msk)->icsk_ca_priv - currently
unused.

As this is the only comment I have, no need to send a whole v12. I
think we can merge the series as-is and then squash the needed change
to replace the global variable and map used here with inet_csk(msk)-
>icsk_ca_priv.

Thanks!

Paolo
Re: [PATCH mptcp-next v11 10/11] Squash to "selftests/bpf: Add bpf_rr scheduler"
Posted by Geliang Tang 2 years, 7 months ago
On Mon, Jul 03, 2023 at 05:38:39PM +0200, Paolo Abeni wrote:
> On Tue, 2023-06-27 at 09:06 +0800, Geliang Tang wrote:
> > Use sk_storage to store last_snd, instead of using msk->last_snd.
> > 
> > Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> > ---
> >  tools/testing/selftests/bpf/bpf_tcp_helpers.h |  6 ++-
> >  .../selftests/bpf/progs/mptcp_bpf_rr.c        | 44 ++++++++++++++-----
> >  2 files changed, 39 insertions(+), 11 deletions(-)
> > 
> > diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> > index b4b766c7a68f..945dd46c98c0 100644
> > --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> > +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
> > @@ -259,7 +259,6 @@ struct mptcp_sched_ops {
> >  struct mptcp_sock {
> >  	struct inet_connection_sock	sk;
> >  
> > -	struct sock	*last_snd;
> >  	__u32		token;
> >  	struct sock	*first;
> >  	char		ca_name[TCP_CA_NAME_MAX];
> > @@ -271,5 +270,10 @@ extern void mptcp_sched_data_set_contexts(const struct mptcp_sock *msk,
> >  					  struct mptcp_sched_data *data) __ksym;
> >  extern struct mptcp_subflow_context *
> >  mptcp_subflow_ctx_by_pos(const struct mptcp_sched_data *data, unsigned int pos) __ksym;
> > +static inline struct sock *
> > +mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
> > +{
> > +	return subflow->tcp_sock;
> > +}
> >  
> >  #endif
> > diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> > index e101428e5906..21144e96ba56 100644
> > --- a/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> > +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf_rr.c
> > @@ -6,33 +6,49 @@
> >  
> >  char _license[] SEC("license") = "GPL";
> >  
> > +struct mptcp_rr_storage {
> > +	struct sock *last_snd;
> > +};
> > +struct sock *last_snd;
> > +
> > +struct {
> > +	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
> > +	__uint(map_flags, BPF_F_NO_PREALLOC);
> > +	__type(key, int);
> > +	__type(value, struct mptcp_rr_storage);
> > +} mptcp_rr_map SEC(".maps");
> > +
> >  SEC("struct_ops/mptcp_sched_rr_init")
> > -void BPF_PROG(mptcp_sched_rr_init, const struct mptcp_sock *msk)
> > +void BPF_PROG(mptcp_sched_rr_init, struct mptcp_sock *msk)
> >  {
> >  }
> >  
> >  SEC("struct_ops/mptcp_sched_rr_release")
> > -void BPF_PROG(mptcp_sched_rr_release, const struct mptcp_sock *msk)
> > +void BPF_PROG(mptcp_sched_rr_release, struct mptcp_sock *msk)
> >  {
> > +	bpf_sk_storage_delete(&mptcp_rr_map, msk);
> >  }
> >  
> > -void BPF_STRUCT_OPS(bpf_rr_data_init, const struct mptcp_sock *msk,
> > +void BPF_STRUCT_OPS(bpf_rr_data_init, struct mptcp_sock *msk,
> >  		    struct mptcp_sched_data *data)
> >  {
> >  	mptcp_sched_data_set_contexts(msk, data);
> >  }
> >  
> > -int BPF_STRUCT_OPS(bpf_rr_get_subflow, const struct mptcp_sock *msk,
> > -		   struct mptcp_sched_data *data)
> > +int BPF_STRUCT_OPS(bpf_rr_get_subflow, struct mptcp_sock *msk,
> > +		   const struct mptcp_sched_data *data)
> >  {
> > +	struct mptcp_subflow_context *subflow;
> > +	struct mptcp_rr_storage *ptr;
> >  	int nr = 0;
> >  
> > -	for (int i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> > -		if (!msk->last_snd || !data->contexts[i])
> > +	for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
> > +		subflow = mptcp_subflow_ctx_by_pos(data, i);
> > +		if (!last_snd || !subflow)
> 
> It looks like this is accessing a global variable ('last_snd'), which
> is probably not very safe.

Yes, it's should be a local variable. I just sent a patch to ML to fix
this:

https://patchwork.kernel.org/project/mptcp/patch/0b57bf87dc30e15782ba1fe2e925a7ce6ef0fc90.1688434547.git.geliang.tang@suse.com/

> 
> I think you could instead use inet_csk(msk)->icsk_ca_priv - currently
> unused.
> 
> As this is the only comment I have, no need to send a whole v12. I
> think we can merge the series as-is and then squash the needed change
> to replace the global variable and map used here with inet_csk(msk)-
> >icsk_ca_priv.

One of the purposes of this test is to demonstrate an example of using a
bpf map to store persistent schedulers data, so I prefer to use the bpf
map to implement it here.

Using inet_csk(msk)->icsk_ca_priv is much more complex. We need to add
the special write accesses for icsk_ca_priv.

Thanks,
-Geliang

> 
> Thanks!
> 
> Paolo
>