net/mptcp/protocol.c | 48 +++++++++++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 14 deletions(-)
Backlog entries may retain references to a subflow socket through
skb->sk. These references are later cleaned up during subflow teardown.
Currently, backlog cleanup is not synchronized with RX-side backlog
enqueue, which is serialized under mptcp_data_lock(). This allows a
race where teardown misses an skb referencing the subflow socket, and
the stale skb->sk pointer survives after the socket is released.
Later backlog purge may then dereference the stale pointer, leading to
memory accounting warnings in inet_sock_destruct() followed by a
use-after-free in mptcp_backlog_purge().
Prevent this by synchronizing backlog cleanup against RX enqueue while
the subflow is being closed, ensuring no stale skb->sk references
survive teardown.
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/621
Signed-off-by: Kalpan Jani <kalpan.jani@mpiricsoftware.com>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
---
v1:
- Detect and clear stale skb->sk references from the MPTCP backlog during subflow teardown to prevent use-after-free in backlog purge.
v2:
- Rework backlog cleanup based on Paolo Abeni's suggestion to serialize cleanup with RX-side backlog enqueue under mptcp_data_lock().
- Move cleanup into __mptcp_close_ssk() while holding the subflow socket lock, ensuring no new backlog skb can retain a reference to the closing subflow after cleanup completes.
- Keep the fallback cleanup path for detached subflows after releasing the subflow socket, preserving correctness for that teardown path.
net/mptcp/protocol.c | 48 +++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 718e910ff..3dcffe4b5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2527,6 +2527,22 @@ static void __mptcp_subflow_disconnect(struct sock *ssk,
}
}
+static void mptcp_cleanup_ssk_backlog(struct sock *sk, struct sock *ssk)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sk_buff *skb;
+
+ mptcp_data_lock(sk);
+ list_for_each_entry(skb, &msk->backlog_list, list) {
+ if (skb->sk != ssk)
+ continue;
+
+ atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
+ skb->sk = NULL;
+ }
+ mptcp_data_unlock(sk);
+}
+
/* subflow sockets can be either outgoing (connect) or incoming
* (accept).
*
@@ -2540,6 +2556,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
unsigned int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sk_buff *skb;
bool dispose_it, need_push = false;
int fwd_remaining;
@@ -2549,6 +2566,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
*/
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
subflow->closing = 1;
+
+ /* Remove any reference from the backlog to this ssk; backlog skbs
+ * consume space in the msk receive queue, no need to touch
+ * sk->sk_rmem_alloc. Serialize with mptcp_data_ready() under
+ * mptcp_data_lock() while the ssk lock is still held, so the
+ * cleanup is exhaustive: no new skb can be enqueued after this point.
+ */
+ mptcp_data_lock(sk);
+ list_for_each_entry(skb, &msk->backlog_list, list) {
+ if (skb->sk != ssk)
+ continue;
+
+ atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
+ skb->sk = NULL;
+ }
+ mptcp_data_unlock(sk);
/* Borrow the fwd allocated page left-over; fwd memory for the subflow
* could be negative at this point, but will be reach zero soon - when
@@ -2587,6 +2620,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
__mptcp_subflow_disconnect(ssk, subflow, msk->fastclosing);
release_sock(ssk);
+ mptcp_cleanup_ssk_backlog(sk, ssk);
goto out;
}
@@ -2641,9 +2675,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
- struct sk_buff *skb;
-
/* The first subflow can already be closed or disconnected */
if (subflow->close_event_done || READ_ONCE(subflow->local_id) < 0)
return;
@@ -2653,17 +2684,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
if (sk->sk_state == TCP_ESTABLISHED)
mptcp_event(MPTCP_EVENT_SUB_CLOSED, mptcp_sk(sk), ssk, GFP_KERNEL);
- /* Remove any reference from the backlog to this ssk; backlog skbs consume
- * space in the msk receive queue, no need to touch sk->sk_rmem_alloc
- */
- list_for_each_entry(skb, &msk->backlog_list, list) {
- if (skb->sk != ssk)
- continue;
-
- atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
- skb->sk = NULL;
- }
-
/* subflow aborted before reaching the fully_established status
* attempt the creation of the next subflow
*/
--
2.43.0
On 5/18/26 7:00 AM, Kalpan Jani wrote:
> @@ -2549,6 +2566,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> */
> lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
> subflow->closing = 1;
> +
> + /* Remove any reference from the backlog to this ssk; backlog skbs
> + * consume space in the msk receive queue, no need to touch
> + * sk->sk_rmem_alloc. Serialize with mptcp_data_ready() under
> + * mptcp_data_lock() while the ssk lock is still held, so the
> + * cleanup is exhaustive: no new skb can be enqueued after this point.
> + */
> + mptcp_data_lock(sk);
> + list_for_each_entry(skb, &msk->backlog_list, list) {
> + if (skb->sk != ssk)
> + continue;
> +
> + atomic_sub(skb->truesize, &skb->sk->sk_rmem_alloc);
> + skb->sk = NULL;
> + }
> + mptcp_data_unlock(sk);
Here you should use the just defined helper, and only when `flags &
MPTCP_CF_PUSH`, otherwise the backlog cleanup is already performed by
the caller.
>
> /* Borrow the fwd allocated page left-over; fwd memory for the subflow
> * could be negative at this point, but will be reach zero soon - when
> @@ -2587,6 +2620,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
> __mptcp_subflow_disconnect(ssk, subflow, msk->fastclosing);
> release_sock(ssk);
>
> + mptcp_cleanup_ssk_backlog(sk, ssk);
Why are you cleaning the backlog again?
Why you did not use the code I suggested?
For the next iteration, please go trough the mptcp ML only, to reduce
traffic on more busy places.
/P
/P
© 2016 - 2026 Red Hat, Inc.