[PATCH mptcp-next v1 7/9] mptcp: track prune recovery status

Paolo Abeni posted 9 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH mptcp-next v1 7/9] mptcp: track prune recovery status
Posted by Paolo Abeni 1 month, 2 weeks ago
After dropping any data already acked at the TCP level, the MPTCP must
avoid inducing TCP-level retransmission until the pruned data has been
successfully acked at MPTCP level. Otherwise the subflows could keep
retransmitting skbs carring OoO MPTCP data, preventing reinjections and
stalling completely the data transfer.

Explicitly keep track of the highest pruned MPTCP-level seq number and
stop dropping at TCP level until such sequence has been acked.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/mptcp/options.c  |  7 ++++++-
 net/mptcp/protocol.c | 14 +++++++++++++-
 net/mptcp/protocol.h |  1 +
 net/mptcp/subflow.c  |  1 +
 4 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index a49cb03954e5..941e4ec705fe 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1191,7 +1191,12 @@ static bool mptcp_over_limit(struct sock *sk, struct sk_buff *skb,
 		__set_bit(MPTCP_PRUNE, &msk->cb_flags);
 	}
 	mptcp_data_unlock(sk);
-	return ret;
+
+	/* After pruning any packets ensure that MPTCP-driven drops do not
+	 * cause TCP-level retransmission
+	 */
+	return ret &&
+	       !before(READ_ONCE(msk->ack_seq), READ_ONCE(msk->pruned_seq));
 }
 
 /* Return false when the caller must drop the packet, i.e. in case of error,
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0c57561ee046..44840020e53a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -369,12 +369,14 @@ static void mptcp_prune_ofo_queue(struct sock *sk, u32 seq)
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct rb_node *node, *prev;
 	bool pruned = false;
+	u32 pruned_seq;
 
 	if (RB_EMPTY_ROOT(&msk->out_of_order_queue))
 		return;
 
 	node = &msk->ooo_last_skb->rbnode;
 
+	pruned_seq = msk->pruned_seq;
 	do {
 		struct sk_buff *skb = rb_to_skb(node);
 
@@ -385,16 +387,21 @@ static void mptcp_prune_ofo_queue(struct sock *sk, u32 seq)
 		pruned = true;
 		prev = rb_prev(node);
 		rb_erase(node, &msk->out_of_order_queue);
+		if (after(MPTCP_SKB_CB(skb)->end_seq, pruned_seq))
+			pruned_seq = MPTCP_SKB_CB(skb)->end_seq;
 		mptcp_drop(sk, skb);
 		msk->ooo_last_skb = rb_to_skb(prev);
+
 		if (atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf)
 			break;
 
 		node = prev;
 	} while (node);
 
-	if (pruned)
+	if (pruned) {
+		WRITE_ONCE(msk->pruned_seq, pruned_seq);
 		NET_INC_STATS(sock_net(sk), MPTCP_MIB_OFO_PRUNED);
+	}
 }
 
 bool __mptcp_check_prune(struct sock *sk, u32 seq)
@@ -433,6 +440,8 @@ static bool __mptcp_move_skb(struct sock *sk, struct sk_buff *skb)
 	mptcp_borrow_fwdmem(sk, skb);
 
 	if (__mptcp_check_prune(sk, MPTCP_SKB_CB(skb)->map_seq)) {
+		if (after(MPTCP_SKB_CB(skb)->end_seq, msk->pruned_seq))
+			WRITE_ONCE(msk->pruned_seq, MPTCP_SKB_CB(skb)->end_seq);
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED);
 		mptcp_drop(sk, skb);
 		return false;
@@ -866,6 +875,8 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
 		WRITE_ONCE(msk->ack_seq, msk->ack_seq + seq_delta);
 		moved = true;
 	}
+	if (after(msk->ack_seq, msk->pruned_seq))
+		WRITE_ONCE(msk->pruned_seq, (u32)msk->ack_seq);
 	return moved;
 }
 
@@ -3520,6 +3531,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
 	/* for fallback's sake */
 	WRITE_ONCE(msk->ack_seq, 0);
 	WRITE_ONCE(msk->copied_seq, 0);
+	WRITE_ONCE(msk->pruned_seq, 0);
 
 	WRITE_ONCE(sk->sk_shutdown, 0);
 	sk_error_report(sk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a6b7eedf36cf..b7b32301e7c4 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -306,6 +306,7 @@ struct mptcp_sock {
 	u64		bytes_acked;
 	u64		snd_una;
 	u64		wnd_end;
+	u32		pruned_seq;		/* if after ack_seq, highest seq pruned */
 	u32		last_data_sent;
 	u32		last_data_recv;
 	u32		last_ack_recv;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2a8d5da4aaea..70a5c2a08278 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -495,6 +495,7 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
 	WRITE_ONCE(msk->remote_key, subflow->remote_key);
 	WRITE_ONCE(msk->ack_seq, subflow->iasn);
 	WRITE_ONCE(msk->copied_seq, subflow->iasn);
+	WRITE_ONCE(msk->pruned_seq, subflow->iasn);
 	WRITE_ONCE(msk->can_ack, true);
 	atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
 }
-- 
2.53.0