net/mptcp/fastopen.c | 25 ++----------------------- net/mptcp/protocol.c | 4 +++- net/mptcp/protocol.h | 5 ++--- net/mptcp/subflow.c | 3 --- 4 files changed, 7 insertions(+), 30 deletions(-)
When we will move the whole RX path under the msk socket lock, updating
the already queued skb for passive fastopen socket at 3rd ack time will
be extremely painful and race prone
The map_seq for already enqueued skbs is used only to allow correct
coalescing with later data; preventing collapsing to the first skb of
a fastopen connect we can completely remove the
__mptcp_fastopen_gen_msk_ackseq() helper.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/539
Must be applied just before commit:
"mptcp: move the whole rx path under msk socket lock protection"
To such extent, the patch will require a little mangling, as
__mptcp_fastopen_gen_msk_ackseq() at that history point still lacks
the
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
annotation.
I preferred such option to a cleanly applying patch for the target
place in the repo log to trigger the CI at submission time.
Optimistically sharing this only with very little testing, to distract
myself from the net-next PR burden.
---
net/mptcp/fastopen.c | 25 ++-----------------------
net/mptcp/protocol.c | 4 +++-
net/mptcp/protocol.h | 5 ++---
net/mptcp/subflow.c | 3 ---
4 files changed, 7 insertions(+), 30 deletions(-)
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index b0f1dddfb143..b9e451197902 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -40,13 +40,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
tp->copied_seq += skb->len;
subflow->ssn_offset += skb->len;
- /* initialize a dummy sequence number, we will update it at MPC
- * completion, if needed
- */
+ /* Only the sequence delta is relevant */
MPTCP_SKB_CB(skb)->map_seq = -skb->len;
MPTCP_SKB_CB(skb)->end_seq = 0;
MPTCP_SKB_CB(skb)->offset = 0;
MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
+ MPTCP_SKB_CB(skb)->cant_coalesce = 1;
mptcp_data_lock(sk);
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
@@ -59,23 +58,3 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
-{
- struct sock *sk = (struct sock *)msk;
- struct sk_buff *skb;
-
- DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
- skb = skb_peek_tail(&sk->sk_receive_queue);
- if (skb) {
- WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
- pr_debug("msk %p moving seq %llx -> %llx end_seq %llx -> %llx\n", sk,
- MPTCP_SKB_CB(skb)->map_seq, MPTCP_SKB_CB(skb)->map_seq + msk->ack_seq,
- MPTCP_SKB_CB(skb)->end_seq, MPTCP_SKB_CB(skb)->end_seq + msk->ack_seq);
- MPTCP_SKB_CB(skb)->map_seq += msk->ack_seq;
- MPTCP_SKB_CB(skb)->end_seq += msk->ack_seq;
- }
-
- pr_debug("msk=%p ack_seq=%llx\n", msk, msk->ack_seq);
-}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5cda189d29f2..5a75a39b9c67 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -124,7 +124,8 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
bool fragstolen;
int delta;
- if (MPTCP_SKB_CB(from)->offset ||
+ if (unlikely(MPTCP_SKB_CB(to)->cant_coalesce) ||
+ MPTCP_SKB_CB(from)->offset ||
((to->len + from->len) > (sk->sk_rcvbuf >> 3)) ||
!skb_try_coalesce(to, from, &fragstolen, &delta))
return false;
@@ -299,6 +300,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
MPTCP_SKB_CB(skb)->offset = offset;
MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
+ MPTCP_SKB_CB(skb)->cant_coalesce = 0;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index b1ece280e139..2c1232bfd722 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -130,7 +130,8 @@ struct mptcp_skb_cb {
u64 map_seq;
u64 end_seq;
u32 offset;
- u8 has_rxtstamp:1;
+ u8 has_rxtstamp;
+ u8 cant_coalesce;
};
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
@@ -1056,8 +1057,6 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
int mptcp_nl_fill_addr(struct sk_buff *skb,
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2926bdf88e42..d2caffa56bdd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -802,9 +802,6 @@ void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
subflow_set_remote_key(msk, subflow, mp_opt);
WRITE_ONCE(subflow->fully_established, true);
WRITE_ONCE(msk->fully_established, true);
-
- if (subflow->is_mptfo)
- __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
--
2.47.1
Hi Paolo, On 20/01/2025 19:39, Paolo Abeni wrote: > When we will move the whole RX path under the msk socket lock, updating > the already queued skb for passive fastopen socket at 3rd ack time will > be extremely painful and race prone > > The map_seq for already enqueued skbs is used only to allow correct > coalescing with later data; preventing collapsing to the first skb of > a fastopen connect we can completely remove the > __mptcp_fastopen_gen_msk_ackseq() helper. Now in our tree (feat. for net-next, before commit "mptcp: move the whole rx path under msk socket lock protection" with a new paragraph explaining the bitfield conversion: New patches for t/upstream: - 0d73e07b64af: mptcp: drop __mptcp_fastopen_gen_msk_ackseq() - 9d07905af748: conflict in t/mptcp-move-the-whole-rx-path-under-msk-socket-lock-protection - Results: 0a4d2b745023..a9a670c7f9f9 (export) Tests are now in progress: - export: https://github.com/multipath-tcp/mptcp_net-next/commit/ebd784afc6c02b1ba980d99ffe6ffb90a6af53c2/checks Cheers, Matt -- Sponsored by the NGI0 Core fund.
Hi Paolo,
On 20/01/2025 19:39, Paolo Abeni wrote:
> When we will move the whole RX path under the msk socket lock, updating
> the already queued skb for passive fastopen socket at 3rd ack time will
> be extremely painful and race prone
>
> The map_seq for already enqueued skbs is used only to allow correct
> coalescing with later data; preventing collapsing to the first skb of
> a fastopen connect we can completely remove the
> __mptcp_fastopen_gen_msk_ackseq() helper.
Thank you for having looked at that!
It looks good to me, just one question below.
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/539
>
> Must be applied just before commit:
> "mptcp: move the whole rx path under msk socket lock protection"
>
> To such extent, the patch will require a little mangling, as
> __mptcp_fastopen_gen_msk_ackseq() at that history point still lacks
> the
> DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
> annotation.
>
> I preferred such option to a cleanly applying patch for the target
> place in the repo log to trigger the CI at submission time.
I prefer that too, for the same reason, and also because it often feels
easier for me to resolve conflicts when I have the end results.
> Optimistically sharing this only with very little testing, to distract
> myself from the net-next PR burden.
:)
It looks like the queue is progressing well :)
(...)
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index b1ece280e139..2c1232bfd722 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -130,7 +130,8 @@ struct mptcp_skb_cb {
> u64 map_seq;
> u64 end_seq;
> u32 offset;
> - u8 has_rxtstamp:1;
> + u8 has_rxtstamp;
> + u8 cant_coalesce;
Out of curiosity: while do you prefer to use a "full" u8, instead of one
bit in the previous u8, or a bool? Is it a small optimisation because
this new item is used in the fast path? We have free space there, so
that's fine, but just wondering :)
> };
>
> #define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
(...)
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
On 1/21/25 12:55 PM, Matthieu Baerts wrote:
> On 20/01/2025 19:39, Paolo Abeni wrote:
>> When we will move the whole RX path under the msk socket lock, updating
>> the already queued skb for passive fastopen socket at 3rd ack time will
>> be extremely painful and race prone
>>
>> The map_seq for already enqueued skbs is used only to allow correct
>> coalescing with later data; preventing collapsing to the first skb of
>> a fastopen connect we can completely remove the
>> __mptcp_fastopen_gen_msk_ackseq() helper.
>
> Thank you for having looked at that!
>
> It looks good to me, just one question below.
>
>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>> ---
>> Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/539
>>
>> Must be applied just before commit:
>> "mptcp: move the whole rx path under msk socket lock protection"
>>
>> To such extent, the patch will require a little mangling, as
>> __mptcp_fastopen_gen_msk_ackseq() at that history point still lacks
>> the
>> DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
>> annotation.
>>
>> I preferred such option to a cleanly applying patch for the target
>> place in the repo log to trigger the CI at submission time.
>
> I prefer that too, for the same reason, and also because it often feels
> easier for me to resolve conflicts when I have the end results.
>
>> Optimistically sharing this only with very little testing, to distract
>> myself from the net-next PR burden.
>
> :)
>
> It looks like the queue is progressing well :)
>
> (...)
>
>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>> index b1ece280e139..2c1232bfd722 100644
>> --- a/net/mptcp/protocol.h
>> +++ b/net/mptcp/protocol.h
>> @@ -130,7 +130,8 @@ struct mptcp_skb_cb {
>> u64 map_seq;
>> u64 end_seq;
>> u32 offset;
>> - u8 has_rxtstamp:1;
>> + u8 has_rxtstamp;
>> + u8 cant_coalesce;
>
> Out of curiosity: while do you prefer to use a "full" u8, instead of one
> bit in the previous u8, or a bool? Is it a small optimisation because
> this new item is used in the fast path? We have free space there, so
> that's fine, but just wondering :)
This is a minor optimization vs bitfield usage, I should have mentioned it.
With this patch, the 'cant_coalesce' field will be tested in fast path
frequently - almost on every packet.
Testing a bitfield generates worse code - the CPU has to mask out the
unrelevant bits. With u8 (or a bool) the CPU can load directly the
relevant byte with a single operation. In a similar way, single byte
store operations are faster.
Both u8 or bool could achieve the above. I preferred u8 because 'it was
already there'.
/P
Hi Paolo,
On 21/01/2025 15:31, Paolo Abeni wrote:
> On 1/21/25 12:55 PM, Matthieu Baerts wrote:
>> On 20/01/2025 19:39, Paolo Abeni wrote:
>>> When we will move the whole RX path under the msk socket lock, updating
>>> the already queued skb for passive fastopen socket at 3rd ack time will
>>> be extremely painful and race prone
>>>
>>> The map_seq for already enqueued skbs is used only to allow correct
>>> coalescing with later data; preventing collapsing to the first skb of
>>> a fastopen connect we can completely remove the
>>> __mptcp_fastopen_gen_msk_ackseq() helper.
>>
>> Thank you for having looked at that!
>>
>> It looks good to me, just one question below.
(...)
>>> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
>>> index b1ece280e139..2c1232bfd722 100644
>>> --- a/net/mptcp/protocol.h
>>> +++ b/net/mptcp/protocol.h
>>> @@ -130,7 +130,8 @@ struct mptcp_skb_cb {
>>> u64 map_seq;
>>> u64 end_seq;
>>> u32 offset;
>>> - u8 has_rxtstamp:1;
>>> + u8 has_rxtstamp;
>>> + u8 cant_coalesce;
>>
>> Out of curiosity: while do you prefer to use a "full" u8, instead of one
>> bit in the previous u8, or a bool? Is it a small optimisation because
>> this new item is used in the fast path? We have free space there, so
>> that's fine, but just wondering :)
>
> This is a minor optimization vs bitfield usage, I should have mentioned it.
>
> With this patch, the 'cant_coalesce' field will be tested in fast path
> frequently - almost on every packet.
>
> Testing a bitfield generates worse code - the CPU has to mask out the
> unrelevant bits. With u8 (or a bool) the CPU can load directly the
> relevant byte with a single operation. In a similar way, single byte
> store operations are faster.
>
> Both u8 or bool could achieve the above. I preferred u8 because 'it was
> already there'.
Thank you for your reply! OK, that's what I had in mind, and not an
accident.
I can add this in the commit message when applying the patch, if there
is no objection:
To fix this, a new item has been added to the mptcp_skb_cb structure.
Because it will be frequently tested in the fast path -- almost on every
packet -- and because there is free space there, a single byte is used
instead of a bitfield. This micro optimisation reduces the number of CPU
operation to do the associated check.
Other than that, the modification looks good to me:
Reviewed-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
© 2016 - 2026 Red Hat, Inc.