[PATCH mptcp-next v6 3/9] mptcp: redundant subflows push pending

Geliang Tang posted 9 patches 3 years, 3 months ago
Maintainers: Martin KaFai Lau <kafai@fb.com>, "David S. Miller" <davem@davemloft.net>, Matthieu Baerts <matthieu.baerts@tessares.net>, Daniel Borkmann <daniel@iogearbox.net>, Shuah Khan <shuah@kernel.org>, KP Singh <kpsingh@kernel.org>, Eric Dumazet <edumazet@google.com>, Andrii Nakryiko <andrii@kernel.org>, John Fastabend <john.fastabend@gmail.com>, Jakub Kicinski <kuba@kernel.org>, Alexei Starovoitov <ast@kernel.org>, Yonghong Song <yhs@fb.com>, Paolo Abeni <pabeni@redhat.com>, Song Liu <songliubraving@fb.com>, Mat Martineau <mathew.j.martineau@linux.intel.com>
There is a newer version of this series
[PATCH mptcp-next v6 3/9] mptcp: redundant subflows push pending
Posted by Geliang Tang 3 years, 3 months ago
This patch adds the redundant subflows support for __mptcp_push_pending().
Use mptcp_sched_get_send() wrapper instead of mptcp_subflow_get_send()
in it.

Check the subflow scheduled flags to test which subflow or subflows are
picked by the scheduler, use them to send data.

Redundant subflows are not supported in __mptcp_subflow_push_pending()
yet. This patch adds a placeholder in mptcp_sched_get_send() to pick the
first subflow for the redundant subflows case.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 net/mptcp/protocol.c | 73 +++++++++++++++++++++++++++++++++++++++++---
 net/mptcp/subflow.c  |  1 -
 2 files changed, 68 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ab42059143fa..257b04315271 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1549,6 +1549,62 @@ void mptcp_check_and_set_pending(struct sock *sk)
 		mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
 }
 
+static int __mptcp_subflows_push_pending(struct sock *sk, struct mptcp_sendmsg_info *info)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct mptcp_subflow_context *subflow;
+	struct mptcp_data_frag *dfrag;
+	int len, copied = 0, err = 0;
+	struct sock *ssk = NULL;
+
+	while ((dfrag = mptcp_send_head(sk))) {
+		info->sent = dfrag->already_sent;
+		info->limit = dfrag->data_len;
+		len = dfrag->data_len - dfrag->already_sent;
+		while (len > 0) {
+			int ret = 0, max = 0;
+
+			mptcp_sched_get_send(msk, &err);
+			if (err)
+				goto out;
+
+			mptcp_for_each_subflow(msk, subflow) {
+				if (READ_ONCE(subflow->scheduled)) {
+					ssk = mptcp_subflow_tcp_sock(subflow);
+					if (!ssk)
+						goto out;
+
+					lock_sock(ssk);
+
+					ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
+					if (ret <= 0) {
+						mptcp_push_release(ssk, info);
+						goto out;
+					}
+
+					if (ret > max)
+						max = ret;
+
+					mptcp_push_release(ssk, info);
+
+					msk->last_snd = ssk;
+					mptcp_subflow_set_scheduled(subflow, false);
+				}
+			}
+
+			info->sent += max;
+			copied += max;
+			len -= max;
+
+			mptcp_update_post_push(msk, dfrag, max);
+		}
+		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
+	}
+
+out:
+	return copied;
+}
+
 void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 {
 	struct sock *prev_ssk = NULL, *ssk = NULL;
@@ -1559,15 +1615,20 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 	struct mptcp_data_frag *dfrag;
 	int len, copied = 0;
 
+	if (unlikely(msk->sched && msk->sched->redundant)) {
+		copied = __mptcp_subflows_push_pending(sk, &info);
+		goto out;
+	}
+
 	while ((dfrag = mptcp_send_head(sk))) {
 		info.sent = dfrag->already_sent;
 		info.limit = dfrag->data_len;
 		len = dfrag->data_len - dfrag->already_sent;
 		while (len > 0) {
-			int ret = 0;
+			int ret = 0, err = 0;
 
 			prev_ssk = ssk;
-			ssk = mptcp_subflow_get_send(msk);
+			ssk = mptcp_sched_get_send(msk, &err);
 
 			/* First check. If the ssk has changed since
 			 * the last round, release prev_ssk
@@ -1628,13 +1689,13 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
 		info.limit = dfrag->data_len;
 		len = dfrag->data_len - dfrag->already_sent;
 		while (len > 0) {
-			int ret = 0;
+			int ret = 0, err = 0;
 
 			/* the caller already invoked the packet scheduler,
 			 * check for a different subflow usage only after
 			 * spooling the first chunk of data
 			 */
-			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
+			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk), &err);
 			if (!xmit_ssk)
 				goto out;
 			if (xmit_ssk != ssk) {
@@ -3093,11 +3154,13 @@ void __mptcp_data_acked(struct sock *sk)
 
 void __mptcp_check_push(struct sock *sk, struct sock *ssk)
 {
+	int err = 0;
+
 	if (!mptcp_send_head(sk))
 		return;
 
 	if (!sock_owned_by_user(sk)) {
-		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
+		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk), &err);
 
 		if (xmit_ssk == ssk)
 			__mptcp_subflow_push_pending(sk, ssk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5351d54e514a..021b454640a3 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -881,7 +881,6 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
 				  subflow->map_data_len))) {
 		/* Mapping does covers past subflow data, invalid */
 		dbg_bad_map(subflow, ssn);
-		return false;
 	}
 	return true;
 }
-- 
2.35.3


Re: [PATCH mptcp-next v6 3/9] mptcp: redundant subflows push pending
Posted by Mat Martineau 3 years, 3 months ago
On Fri, 10 Jun 2022, Geliang Tang wrote:

> This patch adds the redundant subflows support for __mptcp_push_pending().
> Use mptcp_sched_get_send() wrapper instead of mptcp_subflow_get_send()
> in it.
>
> Check the subflow scheduled flags to test which subflow or subflows are
> picked by the scheduler, use them to send data.
>
> Redundant subflows are not supported in __mptcp_subflow_push_pending()
> yet. This patch adds a placeholder in mptcp_sched_get_send() to pick the
> first subflow for the redundant subflows case.
>
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
> net/mptcp/protocol.c | 73 +++++++++++++++++++++++++++++++++++++++++---
> net/mptcp/subflow.c  |  1 -
> 2 files changed, 68 insertions(+), 6 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index ab42059143fa..257b04315271 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1549,6 +1549,62 @@ void mptcp_check_and_set_pending(struct sock *sk)
> 		mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
> }
>
> +static int __mptcp_subflows_push_pending(struct sock *sk, struct mptcp_sendmsg_info *info)

This separate function is fine for experimenting with the transmit loop. I 
do think it will be easier to try some different approaches to handling 
the redundant transmissions, but before upstreaming I hope we can reduce 
the duplicate code.

I suggest renaming this to __mptcp_redundant_push_pending() for now.

> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct mptcp_subflow_context *subflow;
> +	struct mptcp_data_frag *dfrag;
> +	int len, copied = 0, err = 0;
> +	struct sock *ssk = NULL;
> +
> +	while ((dfrag = mptcp_send_head(sk))) {
> +		info->sent = dfrag->already_sent;
> +		info->limit = dfrag->data_len;
> +		len = dfrag->data_len - dfrag->already_sent;
> +		while (len > 0) {
> +			int ret = 0, max = 0;
> +
> +			mptcp_sched_get_send(msk, &err);
> +			if (err)
> +				goto out;
> +
> +			mptcp_for_each_subflow(msk, subflow) {
> +				if (READ_ONCE(subflow->scheduled)) {
> +					ssk = mptcp_subflow_tcp_sock(subflow);
> +					if (!ssk)
> +						goto out;

Wouldn't it be better to 'continue'? Other subflows might not have errors.

> +
> +					lock_sock(ssk);
> +
> +					ret = mptcp_sendmsg_frag(sk, ssk, dfrag, info);
> +					if (ret <= 0) {
> +						mptcp_push_release(ssk, info);
> +						goto out;

Same here (to 'continue' instead), the transmit might succeed on other 
subflows.

> +					}
> +
> +					if (ret > max)
> +						max = ret;
> +
> +					mptcp_push_release(ssk, info);
> +
> +					msk->last_snd = ssk;
> +					mptcp_subflow_set_scheduled(subflow, false);
> +				}
> +			}
> +
> +			info->sent += max;
> +			copied += max;
> +			len -= max;
> +
> +			mptcp_update_post_push(msk, dfrag, max);
> +		}
> +		WRITE_ONCE(msk->first_pending, mptcp_send_next(sk));
> +	}
> +
> +out:
> +	return copied;
> +}
> +
> void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> {
> 	struct sock *prev_ssk = NULL, *ssk = NULL;
> @@ -1559,15 +1615,20 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
> 	struct mptcp_data_frag *dfrag;
> 	int len, copied = 0;
>
> +	if (unlikely(msk->sched && msk->sched->redundant)) {
> +		copied = __mptcp_subflows_push_pending(sk, &info);
> +		goto out;
> +	}
> +
> 	while ((dfrag = mptcp_send_head(sk))) {
> 		info.sent = dfrag->already_sent;
> 		info.limit = dfrag->data_len;
> 		len = dfrag->data_len - dfrag->already_sent;
> 		while (len > 0) {
> -			int ret = 0;
> +			int ret = 0, err = 0;
>
> 			prev_ssk = ssk;
> -			ssk = mptcp_subflow_get_send(msk);
> +			ssk = mptcp_sched_get_send(msk, &err);
>
> 			/* First check. If the ssk has changed since
> 			 * the last round, release prev_ssk
> @@ -1628,13 +1689,13 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
> 		info.limit = dfrag->data_len;
> 		len = dfrag->data_len - dfrag->already_sent;
> 		while (len > 0) {
> -			int ret = 0;
> +			int ret = 0, err = 0;
>
> 			/* the caller already invoked the packet scheduler,
> 			 * check for a different subflow usage only after
> 			 * spooling the first chunk of data
> 			 */
> -			xmit_ssk = first ? ssk : mptcp_subflow_get_send(mptcp_sk(sk));
> +			xmit_ssk = first ? ssk : mptcp_sched_get_send(mptcp_sk(sk), &err);
> 			if (!xmit_ssk)
> 				goto out;
> 			if (xmit_ssk != ssk) {
> @@ -3093,11 +3154,13 @@ void __mptcp_data_acked(struct sock *sk)
>
> void __mptcp_check_push(struct sock *sk, struct sock *ssk)
> {
> +	int err = 0;
> +
> 	if (!mptcp_send_head(sk))
> 		return;
>
> 	if (!sock_owned_by_user(sk)) {
> -		struct sock *xmit_ssk = mptcp_subflow_get_send(mptcp_sk(sk));
> +		struct sock *xmit_ssk = mptcp_sched_get_send(mptcp_sk(sk), &err);
>
> 		if (xmit_ssk == ssk)
> 			__mptcp_subflow_push_pending(sk, ssk);
> diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
> index 5351d54e514a..021b454640a3 100644
> --- a/net/mptcp/subflow.c
> +++ b/net/mptcp/subflow.c
> @@ -881,7 +881,6 @@ static bool validate_mapping(struct sock *ssk, struct sk_buff *skb)
> 				  subflow->map_data_len))) {
> 		/* Mapping does covers past subflow data, invalid */
> 		dbg_bad_map(subflow, ssn);
> -		return false;

Is this change intended?

> 	}
> 	return true;
> }
> -- 
> 2.35.3
>
>
>

--
Mat Martineau
Intel