skbs will be left waiting in the subflow only in exceptional cases,
we want to avoid messing with the fast path by unintentionally
processing in __mptcp_move_skbs() packets landed into the subflows
after the last check.
Use a separate flag to mark delayed skbs and only process subflow
with such flag set. Also add new mibs to track the exceptional events.
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
net/mptcp/mib.c | 2 ++
net/mptcp/mib.h | 4 ++++
net/mptcp/protocol.c | 40 ++++++++++++----------------------------
net/mptcp/protocol.h | 1 +
4 files changed, 19 insertions(+), 28 deletions(-)
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index cf879c188ca26..7af9d35cde884 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -85,6 +85,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK),
SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
+ SNMP_MIB_ITEM("RcvDelayed", MPTCP_MIB_RCVDELAYED),
+ SNMP_MIB_ITEM("DelayedProcess", MPTCP_MIB_DELAYED_PROCESS),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 309bac6fea325..f6d0eaea463e5 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -88,6 +88,10 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */
+ MPTCP_MIB_RCVDELAYED, /* Data move from subflow is delayed due to msk
+ * receive buffer full
+ */
+ MPTCP_MIB_DELAYED_PROCESS, /* Delayed data moved in slowpath */
__MPTCP_MIB_MAX
};
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 9c3baed948d1d..f211a939daf39 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -665,13 +665,17 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
tp = tcp_sk(ssk);
do {
int mem = own_msk ? sk_rmem_alloc_get(sk) : sk->sk_backlog.len;
+ bool over_limit = mem > READ_ONCE(sk->sk_rcvbuf);
u32 map_remaining, offset;
u32 seq = tp->copied_seq;
struct sk_buff *skb;
bool fin;
- if (mem > READ_ONCE(sk->sk_rcvbuf))
+ WRITE_ONCE(subflow->data_delayed, over_limit);
+ if (subflow->data_delayed) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVDELAYED);
break;
+ }
/* try to move as much data as available */
map_remaining = subflow->map_data_len -
@@ -2081,32 +2085,13 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
msk->rcvq_space.time = mstamp;
}
-static struct mptcp_subflow_context *
-__mptcp_first_ready_from(struct mptcp_sock *msk,
- struct mptcp_subflow_context *subflow)
-{
- struct mptcp_subflow_context *start_subflow = subflow;
-
- while (!READ_ONCE(subflow->data_avail)) {
- subflow = mptcp_next_subflow(msk, subflow);
- if (subflow == start_subflow)
- return NULL;
- }
- return subflow;
-}
-
static bool __mptcp_move_skbs(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool ret = false;
- if (list_empty(&msk->conn_list))
- return false;
-
- subflow = list_first_entry(&msk->conn_list,
- struct mptcp_subflow_context, node);
- for (;;) {
+ mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk;
bool slowpath;
@@ -2117,23 +2102,22 @@ static bool __mptcp_move_skbs(struct sock *sk)
if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
break;
- subflow = __mptcp_first_ready_from(msk, subflow);
- if (!subflow)
- break;
+ if (!subflow->data_delayed)
+ continue;
ssk = mptcp_subflow_tcp_sock(subflow);
slowpath = lock_sock_fast(ssk);
- ret = __mptcp_move_skbs_from_subflow(msk, ssk, true) || ret;
+ ret |= __mptcp_move_skbs_from_subflow(msk, ssk, true);
if (unlikely(ssk->sk_err))
__mptcp_error_report(sk);
unlock_sock_fast(ssk, slowpath);
-
- subflow = mptcp_next_subflow(msk, subflow);
}
__mptcp_ofo_queue(msk);
- if (ret)
+ if (ret) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_DELAYED_PROCESS);
mptcp_check_data_fin((struct sock *)msk);
+ }
return ret;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index a6e775d6412e5..2905e4b250070 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -559,6 +559,7 @@ struct mptcp_subflow_context {
u8 reset_transient:1;
u8 reset_reason:4;
u8 stale_count;
+ bool data_delayed;
u32 subflow_id;
--
2.51.0
Hi Paolo,
Thanks for this patch.
On Tue, 2025-09-16 at 18:27 +0200, Paolo Abeni wrote:
> skbs will be left waiting in the subflow only in exceptional cases,
> we want to avoid messing with the fast path by unintentionally
> processing in __mptcp_move_skbs() packets landed into the subflows
> after the last check.
>
> Use a separate flag to mark delayed skbs and only process subflow
> with such flag set. Also add new mibs to track the exceptional
> events.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> net/mptcp/mib.c | 2 ++
> net/mptcp/mib.h | 4 ++++
> net/mptcp/protocol.c | 40 ++++++++++++----------------------------
> net/mptcp/protocol.h | 1 +
> 4 files changed, 19 insertions(+), 28 deletions(-)
>
> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
> index cf879c188ca26..7af9d35cde884 100644
> --- a/net/mptcp/mib.c
> +++ b/net/mptcp/mib.c
> @@ -85,6 +85,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
> SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
> SNMP_MIB_ITEM("SimultConnectFallback",
> MPTCP_MIB_SIMULTCONNFALLBACK),
> SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
> + SNMP_MIB_ITEM("RcvDelayed", MPTCP_MIB_RCVDELAYED),
> + SNMP_MIB_ITEM("DelayedProcess", MPTCP_MIB_DELAYED_PROCESS),
> SNMP_MIB_SENTINEL
I guess this line was accidentally pasted, which prevents this patch
from being applied.
Thanks,
-Geliang
> };
>
> diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
> index 309bac6fea325..f6d0eaea463e5 100644
> --- a/net/mptcp/mib.h
> +++ b/net/mptcp/mib.h
> @@ -88,6 +88,10 @@ enum linux_mptcp_mib_field {
> MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */
> MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */
> MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk
> status */
> + MPTCP_MIB_RCVDELAYED, /* Data move from subflow is
> delayed due to msk
> + * receive buffer full
> + */
> + MPTCP_MIB_DELAYED_PROCESS, /* Delayed data moved in
> slowpath */
> __MPTCP_MIB_MAX
> };
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 9c3baed948d1d..f211a939daf39 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -665,13 +665,17 @@ static bool
> __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
> tp = tcp_sk(ssk);
> do {
> int mem = own_msk ? sk_rmem_alloc_get(sk) : sk-
> >sk_backlog.len;
> + bool over_limit = mem > READ_ONCE(sk->sk_rcvbuf);
> u32 map_remaining, offset;
> u32 seq = tp->copied_seq;
> struct sk_buff *skb;
> bool fin;
>
> - if (mem > READ_ONCE(sk->sk_rcvbuf))
> + WRITE_ONCE(subflow->data_delayed, over_limit);
> + if (subflow->data_delayed) {
> + MPTCP_INC_STATS(sock_net(sk),
> MPTCP_MIB_RCVDELAYED);
> break;
> + }
>
> /* try to move as much data as available */
> map_remaining = subflow->map_data_len -
> @@ -2081,32 +2085,13 @@ static void mptcp_rcv_space_adjust(struct
> mptcp_sock *msk, int copied)
> msk->rcvq_space.time = mstamp;
> }
>
> -static struct mptcp_subflow_context *
> -__mptcp_first_ready_from(struct mptcp_sock *msk,
> - struct mptcp_subflow_context *subflow)
> -{
> - struct mptcp_subflow_context *start_subflow = subflow;
> -
> - while (!READ_ONCE(subflow->data_avail)) {
> - subflow = mptcp_next_subflow(msk, subflow);
> - if (subflow == start_subflow)
> - return NULL;
> - }
> - return subflow;
> -}
> -
> static bool __mptcp_move_skbs(struct sock *sk)
> {
> struct mptcp_subflow_context *subflow;
> struct mptcp_sock *msk = mptcp_sk(sk);
> bool ret = false;
>
> - if (list_empty(&msk->conn_list))
> - return false;
> -
> - subflow = list_first_entry(&msk->conn_list,
> - struct mptcp_subflow_context,
> node);
> - for (;;) {
> + mptcp_for_each_subflow(msk, subflow) {
> struct sock *ssk;
> bool slowpath;
>
> @@ -2117,23 +2102,22 @@ static bool __mptcp_move_skbs(struct sock
> *sk)
> if (sk_rmem_alloc_get(sk) > sk->sk_rcvbuf)
> break;
>
> - subflow = __mptcp_first_ready_from(msk, subflow);
> - if (!subflow)
> - break;
> + if (!subflow->data_delayed)
> + continue;
>
> ssk = mptcp_subflow_tcp_sock(subflow);
> slowpath = lock_sock_fast(ssk);
> - ret = __mptcp_move_skbs_from_subflow(msk, ssk, true)
> || ret;
> + ret |= __mptcp_move_skbs_from_subflow(msk, ssk,
> true);
> if (unlikely(ssk->sk_err))
> __mptcp_error_report(sk);
> unlock_sock_fast(ssk, slowpath);
> -
> - subflow = mptcp_next_subflow(msk, subflow);
> }
>
> __mptcp_ofo_queue(msk);
> - if (ret)
> + if (ret) {
> + MPTCP_INC_STATS(sock_net(sk),
> MPTCP_MIB_DELAYED_PROCESS);
> mptcp_check_data_fin((struct sock *)msk);
> + }
> return ret;
> }
>
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index a6e775d6412e5..2905e4b250070 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -559,6 +559,7 @@ struct mptcp_subflow_context {
> u8 reset_transient:1;
> u8 reset_reason:4;
> u8 stale_count;
> + bool data_delayed;
>
> u32 subflow_id;
>
Hi Geliang, Paolo,
On 17/09/2025 05:39, Geliang Tang wrote:
> Hi Paolo,
>
> Thanks for this patch.
>
> On Tue, 2025-09-16 at 18:27 +0200, Paolo Abeni wrote:
>> skbs will be left waiting in the subflow only in exceptional cases,
>> we want to avoid messing with the fast path by unintentionally
>> processing in __mptcp_move_skbs() packets landed into the subflows
>> after the last check.
>>
>> Use a separate flag to mark delayed skbs and only process subflow
>> with such flag set. Also add new mibs to track the exceptional
>> events.
>>
>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>> ---
>> net/mptcp/mib.c | 2 ++
>> net/mptcp/mib.h | 4 ++++
>> net/mptcp/protocol.c | 40 ++++++++++++----------------------------
>> net/mptcp/protocol.h | 1 +
>> 4 files changed, 19 insertions(+), 28 deletions(-)
>>
>> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
>> index cf879c188ca26..7af9d35cde884 100644
>> --- a/net/mptcp/mib.c
>> +++ b/net/mptcp/mib.c
>> @@ -85,6 +85,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
>> SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
>> SNMP_MIB_ITEM("SimultConnectFallback",
>> MPTCP_MIB_SIMULTCONNFALLBACK),
>> SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
>> + SNMP_MIB_ITEM("RcvDelayed", MPTCP_MIB_RCVDELAYED),
>> + SNMP_MIB_ITEM("DelayedProcess", MPTCP_MIB_DELAYED_PROCESS),
>> SNMP_MIB_SENTINEL
>
> I guess this line was accidentally pasted, which prevents this patch
> from being applied.
FYI, this line has been removed recently by commit 35cb2da0abaf ("mptcp:
snmp: do not use SNMP_MIB_SENTINEL anymore").
I manually fixed the conflict, and sent the series to the CI. I also
restarted my syzkaller instances to validate your series.
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.
On 9/17/25 2:53 PM, Matthieu Baerts wrote:
> On 17/09/2025 05:39, Geliang Tang wrote:
>> On Tue, 2025-09-16 at 18:27 +0200, Paolo Abeni wrote:
>>> skbs will be left waiting in the subflow only in exceptional cases,
>>> we want to avoid messing with the fast path by unintentionally
>>> processing in __mptcp_move_skbs() packets landed into the subflows
>>> after the last check.
>>>
>>> Use a separate flag to mark delayed skbs and only process subflow
>>> with such flag set. Also add new mibs to track the exceptional
>>> events.
>>>
>>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>>> ---
>>> net/mptcp/mib.c | 2 ++
>>> net/mptcp/mib.h | 4 ++++
>>> net/mptcp/protocol.c | 40 ++++++++++++----------------------------
>>> net/mptcp/protocol.h | 1 +
>>> 4 files changed, 19 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
>>> index cf879c188ca26..7af9d35cde884 100644
>>> --- a/net/mptcp/mib.c
>>> +++ b/net/mptcp/mib.c
>>> @@ -85,6 +85,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
>>> SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK),
>>> SNMP_MIB_ITEM("SimultConnectFallback",
>>> MPTCP_MIB_SIMULTCONNFALLBACK),
>>> SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED),
>>> + SNMP_MIB_ITEM("RcvDelayed", MPTCP_MIB_RCVDELAYED),
>>> + SNMP_MIB_ITEM("DelayedProcess", MPTCP_MIB_DELAYED_PROCESS),
>>> SNMP_MIB_SENTINEL
>>
>> I guess this line was accidentally pasted, which prevents this patch
>> from being applied.
>
> FYI, this line has been removed recently by commit 35cb2da0abaf ("mptcp:
> snmp: do not use SNMP_MIB_SENTINEL anymore").
>
> I manually fixed the conflict, and sent the series to the CI. I also
> restarted my syzkaller instances to validate your series.
Oh wow! many thanks! I though I pulled and rebased the series just
before submitting it, but perhaps PEBKAC hit again.
In any case I'll send a v2 to address the comments on other patches.
/P
© 2016 - 2026 Red Hat, Inc.