[PATCH mptcp-next] mptcp: drop __mptcp_fastopen_gen_msk_ackseq()

Paolo Abeni posted 1 patch 19 hours ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/multipath-tcp/mptcp_net-next tags/patchew/e96b1f70660ffd750c09c051991e2b65a7d1b492.1737398231.git.pabeni@redhat.com
net/mptcp/fastopen.c | 25 ++-----------------------
net/mptcp/protocol.c |  4 +++-
net/mptcp/protocol.h |  5 ++---
net/mptcp/subflow.c  |  3 ---
4 files changed, 7 insertions(+), 30 deletions(-)
[PATCH mptcp-next] mptcp: drop __mptcp_fastopen_gen_msk_ackseq()
Posted by Paolo Abeni 19 hours ago
When we will move the whole RX path under the msk socket lock, updating
the already queued skb for passive fastopen socket at 3rd ack time will
be extremely painful and race prone

The map_seq for already enqueued skbs is used only to allow correct
coalescing with later data; preventing collapsing to the first skb of
a fastopen connect we can completely remove the
__mptcp_fastopen_gen_msk_ackseq() helper.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/539

Must be applied just before commit:
 "mptcp: move the whole rx path under msk socket lock protection"

To such extent, the patch will require a little mangling, as
__mptcp_fastopen_gen_msk_ackseq() at that history point still lacks
the
	DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
annotation.

I preferred such option to a cleanly applying patch for the target
place in the repo log to trigger the CI at submission time.

Optimistically sharing this only with very little testing, to distract
myself from the net-next PR burden.
---
 net/mptcp/fastopen.c | 25 ++-----------------------
 net/mptcp/protocol.c |  4 +++-
 net/mptcp/protocol.h |  5 ++---
 net/mptcp/subflow.c  |  3 ---
 4 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index b0f1dddfb143..b9e451197902 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -40,13 +40,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
 	tp->copied_seq += skb->len;
 	subflow->ssn_offset += skb->len;
 
-	/* initialize a dummy sequence number, we will update it at MPC
-	 * completion, if needed
-	 */
+	/* Only the sequence delta is relevant */
 	MPTCP_SKB_CB(skb)->map_seq = -skb->len;
 	MPTCP_SKB_CB(skb)->end_seq = 0;
 	MPTCP_SKB_CB(skb)->offset = 0;
 	MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+	MPTCP_SKB_CB(skb)->cant_coalesce = 1;
 
 	mptcp_data_lock(sk);
 	DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
@@ -59,23 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
 
 	mptcp_data_unlock(sk);
 }
-
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
-				     const struct mptcp_options_received *mp_opt)
-{
-	struct sock *sk = (struct sock *)msk;
-	struct sk_buff *skb;
-
-	DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
-	skb = skb_peek_tail(&sk->sk_receive_queue);
-	if (skb) {
-		WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
-		pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk,
-			 MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
-			 MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
-		MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
-		MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
-	}
-
-	pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq);
-}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5cda189d29f2..5a75a39b9c67 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -124,7 +124,8 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
 	bool fragstolen;
 	int delta;
 
-	if (MPTCP_SKB_CB(from)->offset ||
+	if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) ||
+	    MPTCP_SKB_CB(from)->offset ||
 	    ((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
 	    !skb_try_coalesce(to, from, &fragstolen, &delta))
 		return false;
@@ -299,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 	MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
 	MPTCP_SKB_CB(skb)->offset = offset;
 	MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
+	MPTCP_SKB_CB(skb)->cant_coalesce = 0;
 
 	if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
 		/* in sequence */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b1ece280e139..2c1232bfd722 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -130,7 +130,8 @@ struct mptcp_skb_cb {
 	u64 map_seq;
 	u64 end_seq;
 	u32 offset;
-	u8  has_rxtstamp:1;
+	u8  has_rxtstamp;
+	u8  cant_coalesce;
 };
 
 #define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))
@@ -1056,8 +1057,6 @@ void mptcp_event_pm_listener(const struct sock *ssk,
 			     enum mptcp_event_type event);
 bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
 
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
-				     const struct mptcp_options_received *mp_opt);
 void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
 					      struct request_sock *req);
 int mptcp_nl_fill_addr(struct sk_buff *skb,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2926bdf88e42..d2caffa56bdd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
 	subflow_set_remote_key(msk, subflow, mp_opt);
 	WRITE_ONCE(subflow->fully_established, true);
 	WRITE_ONCE(msk->fully_established, true);
-
-	if (subflow->is_mptfo)
-		__mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
 }
 
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
-- 
2.47.1
Re: [PATCH mptcp-next] mptcp: drop __mptcp_fastopen_gen_msk_ackseq()
Posted by Matthieu Baerts 2 hours ago
Hi Paolo,

On 20/01/2025 19:39, Paolo Abeni wrote:
> When we will move the whole RX path under the msk socket lock, updating
> the already queued skb for passive fastopen socket at 3rd ack time will
> be extremely painful and race prone
> 
> The map_seq for already enqueued skbs is used only to allow correct
> coalescing with later data; preventing collapsing to the first skb of
> a fastopen connect we can completely remove the
> __mptcp_fastopen_gen_msk_ackseq() helper.

Thank you for having looked at that!

It looks good to me, just one question below.

> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/539
> 
> Must be applied just before commit:
>  "mptcp: move the whole rx path under msk socket lock protection"
> 
> To such extent, the patch will require a little mangling, as
> __mptcp_fastopen_gen_msk_ackseq() at that history point still lacks
> the
> 	DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
> annotation.
> 
> I preferred such option to a cleanly applying patch for the target
> place in the repo log to trigger the CI at submission time.

I prefer that too, for the same reason, and also because it often feels
easier for me to resolve conflicts when I have the end results.

> Optimistically sharing this only with very little testing, to distract
> myself from the net-next PR burden.

:)

It looks like the queue is progressing well :)

(...)

> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index b1ece280e139..2c1232bfd722 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -130,7 +130,8 @@ struct mptcp_skb_cb {
>  	u64 map_seq;
>  	u64 end_seq;
>  	u32 offset;
> -	u8  has_rxtstamp:1;
> +	u8  has_rxtstamp;
> +	u8  cant_coalesce;

Out of curiosity: while do you prefer to use a "full" u8, instead of one
bit in the previous u8, or a bool? Is it a small optimisation because
this new item is used in the fast path? We have free space there, so
that's fine, but just wondering :)

>  };
>  
>  #define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))

(...)

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.