On Fri, 2025-09-19 at 17:53 +0200, Paolo Abeni wrote:
> In the MPTCP receive path, we release the subflow allocated
> fwd memory just to allocate it again shortly after for the msk.
>
> That could increases the failures chances, especially during
> backlog processing, when other actions could consume the just
> released memory before the msk socket has a chance to do the
> rcv allocation.
>
> Replace the skb_orphan() call with an open-coded variant that
> explicitly borrows, with a PAGE_SIZE granularity, the fwd memory
> from the subflow socket instead of releasing it. During backlog
> processing the borrowed memory is accounted at release_cb time.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
LGTM!
Reviewed-by: Geliang Tang <geliang@kernel.org>
Tested-by: Geliang Tang <geliang@kernel.org>
Thanks,
-Geliang
> ---
> v1 -> v2:
> - rebased
> - explain why skb_orphan is removed
> ---
> net/mptcp/protocol.c | 27 +++++++++++++++++++++------
> net/mptcp/protocol.h | 1 +
> 2 files changed, 22 insertions(+), 6 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2a025c0c4ca0c..7db5adb43d41b 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -338,11 +338,12 @@ static void mptcp_data_queue_ofo(struct
> mptcp_sock *msk, struct sk_buff *skb)
> mptcp_rcvbuf_grow(sk);
> }
>
> -static void mptcp_init_skb(struct sock *ssk,
> - struct sk_buff *skb, int offset, int
> copy_len)
> +static int mptcp_init_skb(struct sock *ssk,
> + struct sk_buff *skb, int offset, int
> copy_len)
> {
> const struct mptcp_subflow_context *subflow =
> mptcp_subflow_ctx(ssk);
> bool has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
> + int borrowed;
>
> /* the skb map_seq accounts for the skb offset:
> * mptcp_subflow_get_mapped_dsn() is based on the current
> tp->copied_seq
> @@ -358,6 +359,15 @@ static void mptcp_init_skb(struct sock *ssk,
>
> skb_ext_reset(skb);
> skb_dst_drop(skb);
> +
> + /* "borrow" the fwd memory from the subflow, instead of
> reclaiming it */
> + skb->destructor = NULL;
> + skb->sk = NULL;
> + atomic_sub(skb->truesize, &ssk->sk_rmem_alloc);
> + borrowed = ssk->sk_forward_alloc -
> sk_unused_reserved_mem(ssk);
> + borrowed &= ~(PAGE_SIZE - 1);
> + sk_forward_alloc_add(ssk, skb->truesize - borrowed);
> + return borrowed;
> }
>
> static void __mptcp_add_backlog(struct sock *sk, struct sock *ssk,
> @@ -717,14 +727,17 @@ static bool
> __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
>
> if (offset < skb->len) {
> size_t len = skb->len - offset;
> + int bmem;
>
> - mptcp_init_skb(ssk, skb, offset, len);
> - skb_orphan(skb);
> + bmem = mptcp_init_skb(ssk, skb, offset,
> len);
>
> - if (own_msk)
> + if (own_msk) {
> + sk_forward_alloc_add(sk, bmem);
> ret |= __mptcp_move_skb(sk, skb);
> - else
> + } else {
> + msk->borrowed_fwd_mem += bmem;
> __mptcp_add_backlog(sk, ssk, skb);
> + }
> seq += len;
>
> if (unlikely(map_remaining < len)) {
> @@ -3514,6 +3527,8 @@ static void mptcp_release_cb(struct sock *sk)
> if (__test_and_clear_bit(MPTCP_SYNC_SNDBUF, &msk-
> >cb_flags))
> __mptcp_sync_sndbuf(sk);
> }
> + sk_forward_alloc_add(sk, msk->borrowed_fwd_mem);
> + msk->borrowed_fwd_mem = 0;
> }
>
> /* MP_JOIN client subflow must wait for 4th ack before sending any
> data:
> diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
> index a295ce11774ea..ff87dd9a0da5a 100644
> --- a/net/mptcp/protocol.h
> +++ b/net/mptcp/protocol.h
> @@ -298,6 +298,7 @@ struct mptcp_sock {
> u32 last_data_sent;
> u32 last_data_recv;
> u32 last_ack_recv;
> + int borrowed_fwd_mem;
> unsigned long timer_ival;
> u32 token;
> unsigned long flags;