[PATCH mptcp-next v6 5/9] mptcp: redundant subflows retrans support

Geliang Tang posted 9 patches 3 years, 3 months ago
Maintainers: Martin KaFai Lau <kafai@fb.com>, "David S. Miller" <davem@davemloft.net>, Matthieu Baerts <matthieu.baerts@tessares.net>, Daniel Borkmann <daniel@iogearbox.net>, Shuah Khan <shuah@kernel.org>, KP Singh <kpsingh@kernel.org>, Eric Dumazet <edumazet@google.com>, Andrii Nakryiko <andrii@kernel.org>, John Fastabend <john.fastabend@gmail.com>, Jakub Kicinski <kuba@kernel.org>, Alexei Starovoitov <ast@kernel.org>, Yonghong Song <yhs@fb.com>, Paolo Abeni <pabeni@redhat.com>, Song Liu <songliubraving@fb.com>, Mat Martineau <mathew.j.martineau@linux.intel.com>
There is a newer version of this series
[PATCH mptcp-next v6 5/9] mptcp: redundant subflows retrans support
Posted by Geliang Tang 3 years, 3 months ago
This patch adds the redundant subflows support for __mptcp_retrans(). In
it, use sched_get_retrans() wrapper instead of mptcp_subflow_get_retrans().

Iterate each subflow of msk, check the scheduled flag to test if it is
picked by the scheduler. If so, use it to send data.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 49 +++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 17 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3bb3445c1eaf..14d3637d1e88 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2490,16 +2490,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
 static void __mptcp_retrans(struct sock *sk)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct mptcp_subflow_context *subflow;
 	struct mptcp_sendmsg_info info = {};
 	struct mptcp_data_frag *dfrag;
 	size_t copied = 0;
 	struct sock *ssk;
-	int ret;
 
 	mptcp_clean_una_wakeup(sk);
 
-	/* first check ssk: need to kick "stale" logic */
-	ssk = mptcp_subflow_get_retrans(msk);
 	dfrag = mptcp_rtx_head(sk);
 	if (!dfrag) {
 		if (mptcp_data_fin_enabled(msk)) {
@@ -2518,32 +2516,49 @@ static void __mptcp_retrans(struct sock *sk)
 		goto reset_timer;
 	}
 
-	if (!ssk)
-		goto reset_timer;
-
-	lock_sock(ssk);
-
 	/* limit retransmission to the bytes already sent on some subflows */
 	info.sent = 0;
 	info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
 	while (info.sent < info.limit) {
-		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
-		if (ret <= 0)
-			break;
+		int ret = 0, max = 0, err;
+
+		err = mptcp_sched_get_retrans(msk);
+		if (err)
+			goto reset_timer;
+
+		mptcp_for_each_subflow(msk, subflow) {
+			if (READ_ONCE(subflow->scheduled)) {
+				ssk = mptcp_subflow_tcp_sock(subflow);
+				if (!ssk)
+					goto reset_timer;
 
+				lock_sock(ssk);
+
+				ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
+				if (ret <= 0)
+					break;
+
+				if (ret > max)
+					max = ret;
+
+				tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
+					 info.size_goal);
+
+				release_sock(ssk);
+
+				msk->last_snd = ssk;
+				mptcp_subflow_set_scheduled(subflow, false);
+			}
+		}
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
-		copied += ret;
-		info.sent += ret;
+		copied += max;
+		info.sent += max;
 	}
 	if (copied) {
 		dfrag->already_sent = max(dfrag->already_sent, info.sent);
-		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
-			 info.size_goal);
 		WRITE_ONCE(msk->allow_infinite_fallback, false);
 	}
 
-	release_sock(ssk);
-
 reset_timer:
 	mptcp_check_and_set_pending(sk);
 
-- 
2.35.3


Re: [PATCH mptcp-next v6 5/9] mptcp: redundant subflows retrans support
Posted by Mat Martineau 3 years, 3 months ago
On Fri, 10 Jun 2022, Geliang Tang wrote:

> This patch adds the redundant subflows support for __mptcp_retrans(). In
> it, use sched_get_retrans() wrapper instead of mptcp_subflow_get_retrans().
>
> Iterate each subflow of msk, check the scheduled flag to test if it is
> picked by the scheduler. If so, use it to send data.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 49 +++++++++++++++++++++++++++++---------------
> 1 file changed, 32 insertions(+), 17 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 3bb3445c1eaf..14d3637d1e88 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2490,16 +2490,14 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
> static void __mptcp_retrans(struct sock *sk)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct mptcp_subflow_context *subflow;
> 	struct mptcp_sendmsg_info info = {};
> 	struct mptcp_data_frag *dfrag;
> 	size_t copied = 0;
> 	struct sock *ssk;
> -	int ret;
>
> 	mptcp_clean_una_wakeup(sk);
>
> -	/* first check ssk: need to kick "stale" logic */
> -	ssk = mptcp_subflow_get_retrans(msk);
> 	dfrag = mptcp_rtx_head(sk);
> 	if (!dfrag) {
> 		if (mptcp_data_fin_enabled(msk)) {
> @@ -2518,32 +2516,49 @@ static void __mptcp_retrans(struct sock *sk)
> 		goto reset_timer;
> 	}
>
> -	if (!ssk)
> -		goto reset_timer;
> -
> -	lock_sock(ssk);
> -
> 	/* limit retransmission to the bytes already sent on some subflows */
> 	info.sent = 0;
> 	info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
> 	while (info.sent < info.limit) {
> -		ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> -		if (ret <= 0)
> -			break;
> +		int ret = 0, max = 0, err;
> +
> +		err = mptcp_sched_get_retrans(msk);

The existing code calls the scheduler once and then loops to send data on 
the selected subflow.

I think the redundant scheduler can still work like that: call the 
scheduler once, then try to retransmit on each of the scheduled subflows. 
If we can reduce the number of calls to the scheduler, and also call 
mptcp_sendmsg_frag() multiple times on each subflow without releasing the 
lock, that's much more efficient.

> +		if (err)
> +			goto reset_timer;
> +
> +		mptcp_for_each_subflow(msk, subflow) {
> +			if (READ_ONCE(subflow->scheduled)) {
> +				ssk = mptcp_subflow_tcp_sock(subflow);
> +				if (!ssk)
> +					goto reset_timer;
>
> +				lock_sock(ssk);
> +
> +				ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
> +				if (ret <= 0)
> +					break;
> +
> +				if (ret > max)
> +					max = ret;
> +
> +				tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
> +					 info.size_goal);
> +
> +				release_sock(ssk);
> +
> +				msk->last_snd = ssk;
> +				mptcp_subflow_set_scheduled(subflow, false);
> +			}
> +		}
> 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS);
> -		copied += ret;
> -		info.sent += ret;
> +		copied += max;
> +		info.sent += max;
> 	}
> 	if (copied) {
> 		dfrag->already_sent = max(dfrag->already_sent, info.sent);
> -		tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
> -			 info.size_goal);
> 		WRITE_ONCE(msk->allow_infinite_fallback, false);
> 	}
>
> -	release_sock(ssk);
> -
> reset_timer:
> 	mptcp_check_and_set_pending(sk);
>
> -- 
> 2.35.3
>
>
>

--
Mat Martineau
Intel