[PATCH mptcp-next v4 03/10] Squash to "mptcp: add get_subflow wrappers"

Geliang Tang posted 10 patches 3 years, 3 months ago
Maintainers: Shuah Khan <shuah@kernel.org>, Andrii Nakryiko <andrii@kernel.org>, "David S. Miller" <davem@davemloft.net>, Daniel Borkmann <daniel@iogearbox.net>, Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>, Paolo Abeni <pabeni@redhat.com>, Jakub Kicinski <kuba@kernel.org>, John Fastabend <john.fastabend@gmail.com>, Matthieu Baerts <matthieu.baerts@tessares.net>, Alexei Starovoitov <ast@kernel.org>, Martin KaFai Lau <kafai@fb.com>, Mat Martineau <mathew.j.martineau@linux.intel.com>, Song Liu <songliubraving@fb.com>, Eric Dumazet <edumazet@google.com>
There is a newer version of this series
[PATCH mptcp-next v4 03/10] Squash to "mptcp: add get_subflow wrappers"
Posted by Geliang Tang 3 years, 3 months ago
Please update the commit log:

'''
This patch defines two new wrappers mptcp_sched_get_send() and
mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
Use them instead of using mptcp_subflow_get_send() or
mptcp_subflow_get_retrans() directly.

Set the subflow pointers array in struct mptcp_sched_data before invoking
get_subflow(), then it can be used in get_subflow() in the BPF contexts.

Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler.
'''

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/sched.c | 54 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
index 3ceb721e6489..613b7005938c 100644
--- a/net/mptcp/sched.c
+++ b/net/mptcp/sched.c
@@ -88,11 +88,25 @@ void mptcp_release_sched(struct mptcp_sock *msk)
 	bpf_module_put(sched, sched->owner);
 }
 
-static int mptcp_sched_data_init(struct mptcp_sock *msk,
+static int mptcp_sched_data_init(struct mptcp_sock *msk, bool reinject,
 				 struct mptcp_sched_data *data)
 {
-	data->sock = NULL;
-	data->call_again = 0;
+	struct mptcp_subflow_context *subflow;
+	int i = 0;
+
+	data->reinject = reinject;
+
+	mptcp_for_each_subflow(msk, subflow) {
+		if (i == MPTCP_SUBFLOWS_MAX) {
+			pr_warn_once("too many subflows");
+			break;
+		}
+		WRITE_ONCE(subflow->scheduled, false);
+		data->contexts[i++] = subflow;
+	}
+
+	for (; i < MPTCP_SUBFLOWS_MAX; i++)
+		data->contexts[i] = NULL;
 
 	return 0;
 }
@@ -100,6 +114,8 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
 struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	int i;
 
 	sock_owned_by_me((struct sock *)msk);
 
@@ -113,16 +129,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
 	if (!msk->sched)
 		return mptcp_subflow_get_send(msk);
 
-	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, false, &data);
+	mptcp_sched_data_init(msk, false, &data);
+	msk->sched->get_subflow(msk, &data);
+
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	return ssk;
 }
 
 struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 {
 	struct mptcp_sched_data data;
+	struct sock *ssk = NULL;
+	int i;
 
 	sock_owned_by_me((const struct sock *)msk);
 
@@ -133,9 +158,16 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
 	if (!msk->sched)
 		return mptcp_subflow_get_retrans(msk);
 
-	mptcp_sched_data_init(msk, &data);
-	msk->sched->get_subflow(msk, true, &data);
+	mptcp_sched_data_init(msk, true, &data);
+	msk->sched->get_subflow(msk, &data);
+
+	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
+		if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
+			ssk = data.contexts[i]->tcp_sock;
+			msk->last_snd = ssk;
+			break;
+		}
+	}
 
-	msk->last_snd = data.sock;
-	return data.sock;
+	return ssk;
 }
-- 
2.34.1


Re: [PATCH mptcp-next v4 03/10] Squash to "mptcp: add get_subflow wrappers"
Posted by Mat Martineau 3 years, 3 months ago
On Tue, 31 May 2022, Geliang Tang wrote:

> Please update the commit log:
>
> '''
> This patch defines two new wrappers mptcp_sched_get_send() and
> mptcp_sched_get_retrans(), invoke get_subflow() of msk->sched in them.
> Use them instead of using mptcp_subflow_get_send() or
> mptcp_subflow_get_retrans() directly.
>
> Set the subflow pointers array in struct mptcp_sched_data before invoking
> get_subflow(), then it can be used in get_subflow() in the BPF contexts.
>
> Check the subflow scheduled flags to test which subflow or subflows are
> picked by the scheduler.
> '''
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/sched.c | 54 +++++++++++++++++++++++++++++++++++++----------
> 1 file changed, 43 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c
> index 3ceb721e6489..613b7005938c 100644
> --- a/net/mptcp/sched.c
> +++ b/net/mptcp/sched.c
> @@ -88,11 +88,25 @@ void mptcp_release_sched(struct mptcp_sock *msk)
> 	bpf_module_put(sched, sched->owner);
> }
>
> -static int mptcp_sched_data_init(struct mptcp_sock *msk,
> +static int mptcp_sched_data_init(struct mptcp_sock *msk, bool reinject,
> 				 struct mptcp_sched_data *data)
> {
> -	data->sock = NULL;
> -	data->call_again = 0;
> +	struct mptcp_subflow_context *subflow;
> +	int i = 0;
> +
> +	data->reinject = reinject;
> +
> +	mptcp_for_each_subflow(msk, subflow) {
> +		if (i == MPTCP_SUBFLOWS_MAX) {
> +			pr_warn_once("too many subflows");
> +			break;
> +		}
> +		WRITE_ONCE(subflow->scheduled, false);

If subflow->scheduled is using READ_ONCE/WRITE_ONCE semantics, then 
writing it directly from BPF code is going to be a problem. The code in 
this patch set would work ok since all read and write access is under the 
msk lock, but I think integrating with all of the transmit and retransmit 
code (especially transmission in mptcp_subflow_process_delegated()) would 
make it important to use WRITE_ONCE() to set subflow->scheduled.

I think that requires using a C helper function called from BPF to do 
WRITE_ONCE(subflow->scheduled), or using a lock to order accesses. The 
mptcp_data_lock is already used in mptcp_subflow_process_delegated() but 
we probably don't want to add more locking to mptcp_sendmsg(). That makes 
me think the helper function might be better - unless there's a generic 
BPF technique for using WRITE_ONCE.

> +		data->contexts[i++] = subflow;
> +	}
> +
> +	for (; i < MPTCP_SUBFLOWS_MAX; i++)
> +		data->contexts[i] = NULL;
>
> 	return 0;
> }
> @@ -100,6 +114,8 @@ static int mptcp_sched_data_init(struct mptcp_sock *msk,
> struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	int i;
>
> 	sock_owned_by_me((struct sock *)msk);
>
> @@ -113,16 +129,25 @@ struct sock *mptcp_sched_get_send(struct mptcp_sock *msk)
> 	if (!msk->sched)
> 		return mptcp_subflow_get_send(msk);
>
> -	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, false, &data);
> +	mptcp_sched_data_init(msk, false, &data);
> +	msk->sched->get_subflow(msk, &data);
> +
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}

I think this is ok for a placeholder until more of the transmit 
integration is done.

>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	return ssk;
> }
>
> struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> {
> 	struct mptcp_sched_data data;
> +	struct sock *ssk = NULL;
> +	int i;
>
> 	sock_owned_by_me((const struct sock *)msk);
>
> @@ -133,9 +158,16 @@ struct sock *mptcp_sched_get_retrans(struct mptcp_sock *msk)
> 	if (!msk->sched)
> 		return mptcp_subflow_get_retrans(msk);
>
> -	mptcp_sched_data_init(msk, &data);
> -	msk->sched->get_subflow(msk, true, &data);
> +	mptcp_sched_data_init(msk, true, &data);
> +	msk->sched->get_subflow(msk, &data);
> +
> +	for (i = 0; i < MPTCP_SUBFLOWS_MAX; i++) {
> +		if (data.contexts[i] && READ_ONCE(data.contexts[i]->scheduled)) {
> +			ssk = data.contexts[i]->tcp_sock;
> +			msk->last_snd = ssk;
> +			break;
> +		}
> +	}
>
> -	msk->last_snd = data.sock;
> -	return data.sock;
> +	return ssk;
> }
> -- 
> 2.34.1
>
>
>

--
Mat Martineau
Intel