[PATCH mptcp-next v3] mptcp: refactor MP_FAIL response

Geliang Tang posted 1 patch 3 years, 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/multipath-tcp/mptcp_net-next tags/patchew/527414fccf0c041908900aadb2f043134a0ec09c.1654694739.git.geliang.tang@suse.com
Maintainers: Jakub Kicinski <kuba@kernel.org>, Mat Martineau <mathew.j.martineau@linux.intel.com>, Eric Dumazet <edumazet@google.com>, Paolo Abeni <pabeni@redhat.com>, Matthieu Baerts <matthieu.baerts@tessares.net>, "David S. Miller" <davem@davemloft.net>
There is a newer version of this series
net/mptcp/pm.c       |  7 ++-----
net/mptcp/protocol.c | 26 ++++++--------------------
net/mptcp/protocol.h |  3 ++-
net/mptcp/subflow.c  | 12 +++---------
4 files changed, 13 insertions(+), 35 deletions(-)
[PATCH mptcp-next v3] mptcp: refactor MP_FAIL response
Posted by Geliang Tang 3 years, 3 months ago
This patch refactors the MP_FAIL response logic.

Add fail_sock in struct mptcp_sock to record the MP_FAIL subflow. Add
fail_timeout in mptcp_sock to record the MP_FAIL timestamp. Check them
in mptcp_mp_fail_no_response() to reset the subflow when MP_FAIL response
timeout occurs.

Drop mp_fail_response_expect flag in struct mptcp_subflow_context and
the code to reuse sk_timer.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/281
Fixes: d9fb797046c5 ("mptcp: Do not traverse the subflow connection list without lock")
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
---
 v3:
 - update as Paolo suggested.
---
 net/mptcp/pm.c       |  7 ++-----
 net/mptcp/protocol.c | 26 ++++++--------------------
 net/mptcp/protocol.h |  3 ++-
 net/mptcp/subflow.c  | 12 +++---------
 4 files changed, 13 insertions(+), 35 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 59a85220edc9..c780e5d11b89 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -299,23 +299,20 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-	struct sock *s = (struct sock *)msk;
 
 	pr_debug("fail_seq=%llu", fail_seq);
 
 	if (!READ_ONCE(msk->allow_infinite_fallback))
 		return;
 
-	if (!READ_ONCE(subflow->mp_fail_response_expect)) {
+	if (!msk->fail_sock) {
 		pr_debug("send MP_FAIL response and infinite map");
 
 		subflow->send_mp_fail = 1;
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
 		subflow->send_infinite_map = 1;
-	} else if (!sock_flag(sk, SOCK_DEAD)) {
+	} else {
 		pr_debug("MP_FAIL response received");
-
-		sk_stop_timer(s, &s->sk_timer);
 	}
 }
 
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d6aef4b13b8a..9ffc96ddce01 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2167,21 +2167,6 @@ static void mptcp_retransmit_timer(struct timer_list *t)
 	sock_put(sk);
 }
 
-static struct mptcp_subflow_context *
-mp_fail_response_expect_subflow(struct mptcp_sock *msk)
-{
-	struct mptcp_subflow_context *subflow, *ret = NULL;
-
-	mptcp_for_each_subflow(msk, subflow) {
-		if (READ_ONCE(subflow->mp_fail_response_expect)) {
-			ret = subflow;
-			break;
-		}
-	}
-
-	return ret;
-}
-
 static void mptcp_timeout_timer(struct timer_list *t)
 {
 	struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2507,18 +2492,17 @@ static void __mptcp_retrans(struct sock *sk)
 
 static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
 {
-	struct mptcp_subflow_context *subflow;
-	struct sock *ssk;
+	struct sock *ssk = msk->fail_sock;
 	bool slow;
 
-	subflow = mp_fail_response_expect_subflow(msk);
-	if (subflow) {
+	if (ssk && time_after(jiffies, msk->fail_timeout)) {
 		pr_debug("MP_FAIL doesn't respond, reset the subflow");
 
-		ssk = mptcp_subflow_tcp_sock(subflow);
 		slow = lock_sock_fast(ssk);
 		mptcp_subflow_reset(ssk);
 		unlock_sock_fast(ssk, slow);
+
+		msk->fail_sock = NULL;
 	}
 }
 
@@ -2589,6 +2573,8 @@ static int __mptcp_init_sock(struct sock *sk)
 	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
 	WRITE_ONCE(msk->allow_infinite_fallback, true);
 	msk->recovery = false;
+	msk->fail_sock = NULL;
+	msk->fail_timeout = 0;
 
 	mptcp_pm_data_init(msk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d406b5afbee4..f0748870ce3e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -306,6 +306,8 @@ struct mptcp_sock {
 
 	u32 setsockopt_seq;
 	char		ca_name[TCP_CA_NAME_MAX];
+	struct sock	*fail_sock;
+	unsigned long	fail_timeout;
 };
 
 #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -468,7 +470,6 @@ struct mptcp_subflow_context {
 		local_id_valid : 1, /* local_id is correctly initialized */
 		valid_csum_seen : 1;        /* at least one csum validated */
 	enum mptcp_data_avail data_avail;
-	bool	mp_fail_response_expect;
 	bool	scheduled;
 	u32	remote_nonce;
 	u64	thmac;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8841e8cd9ad8..8209301dc8de 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -974,7 +974,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	bool csum_reqd = READ_ONCE(msk->csum_enabled);
-	struct sock *sk = (struct sock *)msk;
 	struct mptcp_ext *mpext;
 	struct sk_buff *skb;
 	u16 data_len;
@@ -1016,9 +1015,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
 		pr_debug("infinite mapping received");
 		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
 		subflow->map_data_len = 0;
-		if (!sock_flag(ssk, SOCK_DEAD))
-			sk_stop_timer(sk, &sk->sk_timer);
-
 		return MAPPING_INVALID;
 	}
 
@@ -1238,11 +1234,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
 				tcp_send_active_reset(ssk, GFP_ATOMIC);
 				while ((skb = skb_peek(&ssk->sk_receive_queue)))
 					sk_eat_skb(ssk, skb);
-			} else if (!sock_flag(ssk, SOCK_DEAD)) {
-				WRITE_ONCE(subflow->mp_fail_response_expect, true);
-				sk_reset_timer((struct sock *)msk,
-					       &((struct sock *)msk)->sk_timer,
-					       jiffies + TCP_RTO_MAX);
+			} else {
+				msk->fail_sock = ssk;
+				msk->fail_timeout = jiffies + TCP_RTO_MAX;
 			}
 			WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
 			return true;
-- 
2.35.3


Re: [PATCH mptcp-next v3] mptcp: refactor MP_FAIL response
Posted by Paolo Abeni 3 years, 3 months ago
On Wed, 2022-06-08 at 21:26 +0800, Geliang Tang wrote:
> This patch refactors the MP_FAIL response logic.
> 
> Add fail_sock in struct mptcp_sock to record the MP_FAIL subflow. Add
> fail_timeout in mptcp_sock to record the MP_FAIL timestamp. Check them
> in mptcp_mp_fail_no_response() to reset the subflow when MP_FAIL response
> timeout occurs.
> 
> Drop mp_fail_response_expect flag in struct mptcp_subflow_context and
> the code to reuse sk_timer.
> 
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/281
> Fixes: d9fb797046c5 ("mptcp: Do not traverse the subflow connection list without lock")
> Signed-off-by: Geliang Tang <geliang.tang@suse.com>
> ---
>  v3:
>  - update as Paolo suggested.
> ---
>  net/mptcp/pm.c       |  7 ++-----
>  net/mptcp/protocol.c | 26 ++++++--------------------
>  net/mptcp/protocol.h |  3 ++-
>  net/mptcp/subflow.c  | 12 +++---------
>  4 files changed, 13 insertions(+), 35 deletions(-)
> 
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index 59a85220edc9..c780e5d11b89 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -299,23 +299,20 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
>  {
>  	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
>  	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
> -	struct sock *s = (struct sock *)msk;
>  
>  	pr_debug("fail_seq=%llu", fail_seq);
>  
>  	if (!READ_ONCE(msk->allow_infinite_fallback))
>  		return;
>  
> -	if (!READ_ONCE(subflow->mp_fail_response_expect)) {
> +	if (!msk->fail_sock) {
>  		pr_debug("send MP_FAIL response and infinite map");
>  
>  		subflow->send_mp_fail = 1;
>  		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
>  		subflow->send_infinite_map = 1;
> -	} else if (!sock_flag(sk, SOCK_DEAD)) {
> +	} else {
>  		pr_debug("MP_FAIL response received");
> -
> -		sk_stop_timer(s, &s->sk_timer);
>  	}
>  }
>  
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index d6aef4b13b8a..9ffc96ddce01 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -2167,21 +2167,6 @@ static void mptcp_retransmit_timer(struct timer_list *t)
>  	sock_put(sk);
>  }
>  
> -static struct mptcp_subflow_context *
> -mp_fail_response_expect_subflow(struct mptcp_sock *msk)
> -{
> -	struct mptcp_subflow_context *subflow, *ret = NULL;
> -
> -	mptcp_for_each_subflow(msk, subflow) {
> -		if (READ_ONCE(subflow->mp_fail_response_expect)) {
> -			ret = subflow;
> -			break;
> -		}
> -	}
> -
> -	return ret;
> -}
> -
>  static void mptcp_timeout_timer(struct timer_list *t)
>  {
>  	struct sock *sk = from_timer(sk, t, sk_timer);
> @@ -2507,18 +2492,17 @@ static void __mptcp_retrans(struct sock *sk)
>  
>  static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
>  {
> -	struct mptcp_subflow_context *subflow;
> -	struct sock *ssk;
> +	struct sock *ssk = msk->fail_sock;
>  	bool slow;
>  
> -	subflow = mp_fail_response_expect_subflow(msk);
> -	if (subflow) {
> +	if (ssk && time_after(jiffies, msk->fail_timeout)) {

The patch LGTM, thanks! 

Acked-by: Paolo Abeni <pabeni@redhat.com>

As minor nits I *think* that 'fail_sock' could be renamed to
'fail_ssk', for consistency (a 'sock' is usally a 'struct socket') and
I *think* that the above msk->fail_sock/msk->fail_ssk check could be
moved into mptcp_worker() function to make it clear that the
fail_no_response thing is not unconditional.

As said, minor things, could be handled with a squash-to patch, if
there is agreement on them.

Cheers,

Paolo