[PATCH mptcp-net v5 05/20] mptcp: pm: ADD_ADDR rtx: free sk if last

Matthieu Baerts (NGI0) posted 20 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH mptcp-net v5 05/20] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Matthieu Baerts (NGI0) 1 month, 3 weeks ago
When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
and released at the end.

If at that moment, it was the last reference being held, the sk would
not be freed. sock_put() should then be called instead of __sock_put().

But that's not enough: if it is the last reference, sock_put() will call
sk_free(), which will end up calling sk_stop_timer_sync() on the same
timer, and waiting indefinitely to finish. So it is needed to mark that
the timer is done at the end of the timer handler when it has not been
rescheduled, not to call sk_stop_timer_sync() on "itself".

Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
v3: support calling sk_free() from the timer handler. Note: I'm not very
    happy with this patch, it looks too big. Did I miss a simpler way?
v4: init timer_done after 'reset_timer' label to handle cases where the
    sysctl is changed in between.
v5: only set timer_done to true in the timer handler, safer and easier.
---
 net/mptcp/pm.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d3fcf441b208..0ff35f49d451 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
 	struct list_head	list;
 	struct mptcp_addr_info	addr;
 	u8			retrans_times;
+	bool			timer_done;
 	struct timer_list	add_timer;
 	struct mptcp_sock	*sock;
 	struct rcu_head		rcu;
@@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 							      add_timer);
 	struct mptcp_sock *msk = entry->sock;
 	struct sock *sk = (struct sock *)msk;
-	unsigned int timeout;
+	unsigned int timeout = 0;
 
 	pr_debug("msk=%p\n", msk);
 
-	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
-		goto exit;
-
 	bh_lock_sock(sk);
+	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
+		goto out;
+
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
-		sk_reset_timer(sk, timer, jiffies + HZ / 20);
+		timeout = HZ / 20;
 		goto out;
 	}
 
 	if (mptcp_pm_should_add_signal_addr(msk)) {
-		sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+		timeout = TCP_RTO_MAX / 8;
 		goto out;
 	}
 
@@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 	}
 
 	if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
-		sk_reset_timer(sk, timer,
-			       jiffies + (timeout << entry->retrans_times));
+		timeout <<= entry->retrans_times;
+	else
+		timeout = 0;
 
 	spin_unlock_bh(&msk->pm.lock);
 
@@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 		mptcp_pm_subflow_established(msk);
 
 out:
+	if (timeout)
+		sk_reset_timer(sk, timer, jiffies + timeout);
+	else
+		/* if sock_put calls sk_free: avoid waiting for this timer */
+		entry->timer_done = true;
 	bh_unlock_sock(sk);
-exit:
-	__sock_put(sk);
+	sock_put(sk);
 }
 
 struct mptcp_pm_add_entry *
@@ -434,6 +440,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 
 	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
 reset_timer:
+	add_entry->timer_done = false;
 	timeout = mptcp_adjust_add_addr_timeout(msk);
 	if (timeout)
 		sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
@@ -454,7 +461,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
 	spin_unlock_bh(&msk->pm.lock);
 
 	list_for_each_entry_safe(entry, tmp, &free_list, list) {
-		sk_stop_timer_sync(sk, &entry->add_timer);
+		if (!entry->timer_done)
+			sk_stop_timer_sync(sk, &entry->add_timer);
 		kfree_rcu(entry, rcu);
 	}
 }

-- 
2.53.0
Re: [PATCH mptcp-net v5 05/20] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Mat Martineau 1 month, 2 weeks ago
On Wed, 15 Apr 2026, Matthieu Baerts (NGI0) wrote:

> When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
> and released at the end.
>
> If at that moment, it was the last reference being held, the sk would
> not be freed. sock_put() should then be called instead of __sock_put().
>
> But that's not enough: if it is the last reference, sock_put() will call
> sk_free(), which will end up calling sk_stop_timer_sync() on the same
> timer, and waiting indefinitely to finish. So it is needed to mark that
> the timer is done at the end of the timer handler when it has not been
> rescheduled, not to call sk_stop_timer_sync() on "itself".
>
> Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
> ---
> v3: support calling sk_free() from the timer handler. Note: I'm not very
>    happy with this patch, it looks too big. Did I miss a simpler way?
> v4: init timer_done after 'reset_timer' label to handle cases where the
>    sysctl is changed in between.
> v5: only set timer_done to true in the timer handler, safer and easier.
> ---
> net/mptcp/pm.c | 30 +++++++++++++++++++-----------
> 1 file changed, 19 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index d3fcf441b208..0ff35f49d451 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
> 	struct list_head	list;
> 	struct mptcp_addr_info	addr;
> 	u8			retrans_times;
> +	bool			timer_done;
> 	struct timer_list	add_timer;
> 	struct mptcp_sock	*sock;
> 	struct rcu_head		rcu;
> @@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> 							      add_timer);
> 	struct mptcp_sock *msk = entry->sock;
> 	struct sock *sk = (struct sock *)msk;
> -	unsigned int timeout;
> +	unsigned int timeout = 0;
>
> 	pr_debug("msk=%p\n", msk);
>
> -	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> -		goto exit;
> -
> 	bh_lock_sock(sk);
> +	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> +		goto out;
> +
> 	if (sock_owned_by_user(sk)) {
> 		/* Try again later. */
> -		sk_reset_timer(sk, timer, jiffies + HZ / 20);
> +		timeout = HZ / 20;
> 		goto out;
> 	}
>
> 	if (mptcp_pm_should_add_signal_addr(msk)) {
> -		sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
> +		timeout = TCP_RTO_MAX / 8;
> 		goto out;
> 	}
>
> @@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> 	}
>
> 	if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
> -		sk_reset_timer(sk, timer,
> -			       jiffies + (timeout << entry->retrans_times));
> +		timeout <<= entry->retrans_times;
> +	else
> +		timeout = 0;
>
> 	spin_unlock_bh(&msk->pm.lock);
>
> @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> 		mptcp_pm_subflow_established(msk);
>
> out:
> +	if (timeout)
> +		sk_reset_timer(sk, timer, jiffies + timeout);
> +	else
> +		/* if sock_put calls sk_free: avoid waiting for this timer */
> +		entry->timer_done = true;
> 	bh_unlock_sock(sk);
> -exit:

This label was just added in the previous patch (which was quite small). I 
suggest squashing these two patches, but not a big problem to leave them 
separate.

- Mat

> -	__sock_put(sk);
> +	sock_put(sk);
> }
>
> struct mptcp_pm_add_entry *
> @@ -434,6 +440,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
>
> 	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
> reset_timer:
> +	add_entry->timer_done = false;
> 	timeout = mptcp_adjust_add_addr_timeout(msk);
> 	if (timeout)
> 		sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
> @@ -454,7 +461,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
> 	spin_unlock_bh(&msk->pm.lock);
>
> 	list_for_each_entry_safe(entry, tmp, &free_list, list) {
> -		sk_stop_timer_sync(sk, &entry->add_timer);
> +		if (!entry->timer_done)
> +			sk_stop_timer_sync(sk, &entry->add_timer);
> 		kfree_rcu(entry, rcu);
> 	}
> }
>
> -- 
> 2.53.0
>
>
>
Re: [PATCH mptcp-net v5 05/20] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Matthieu Baerts 1 month, 2 weeks ago
Hi Mat,

Thank you very much for this review!

On 18/04/2026 20:00, Mat Martineau wrote:
> On Wed, 15 Apr 2026, Matthieu Baerts (NGI0) wrote:
> 
>> When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
>> and released at the end.
>>
>> If at that moment, it was the last reference being held, the sk would
>> not be freed. sock_put() should then be called instead of __sock_put().
>>
>> But that's not enough: if it is the last reference, sock_put() will call
>> sk_free(), which will end up calling sk_stop_timer_sync() on the same
>> timer, and waiting indefinitely to finish. So it is needed to mark that
>> the timer is done at the end of the timer handler when it has not been
>> rescheduled, not to call sk_stop_timer_sync() on "itself".

(...)

>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
>> index d3fcf441b208..0ff35f49d451 100644
>> --- a/net/mptcp/pm.c
>> +++ b/net/mptcp/pm.c

(...)

>> @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list
>> *timer)
>>         mptcp_pm_subflow_established(msk);
>>
>> out:
>> +    if (timeout)
>> +        sk_reset_timer(sk, timer, jiffies + timeout);
>> +    else
>> +        /* if sock_put calls sk_free: avoid waiting for this timer */
>> +        entry->timer_done = true;
>>     bh_unlock_sock(sk);
>> -exit:
> 
> This label was just added in the previous patch (which was quite small).
> I suggest squashing these two patches, but not a big problem to leave
> them separate.

I understand, but if that's OK, I would prefer to keep them separated:
even if they are linked, the problems are different. The previous patch
makes sure the refcount is decremented in all cases, while this one
actually free the socket if it was the last one.

In fact, I was looking at squashing them, but I didn't know what commit
message to add. Having "while at it, always decrement the refcount"
feels like hiding this important issue. But maybe not. Up to you for the
squashing.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.

Re: [PATCH mptcp-net v5 05/20] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Mat Martineau 1 month, 2 weeks ago
On Mon, 20 Apr 2026, Matthieu Baerts wrote:

> Hi Mat,
>
> Thank you very much for this review!
>
> On 18/04/2026 20:00, Mat Martineau wrote:
>> On Wed, 15 Apr 2026, Matthieu Baerts (NGI0) wrote:
>>
>>> When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
>>> and released at the end.
>>>
>>> If at that moment, it was the last reference being held, the sk would
>>> not be freed. sock_put() should then be called instead of __sock_put().
>>>
>>> But that's not enough: if it is the last reference, sock_put() will call
>>> sk_free(), which will end up calling sk_stop_timer_sync() on the same
>>> timer, and waiting indefinitely to finish. So it is needed to mark that
>>> the timer is done at the end of the timer handler when it has not been
>>> rescheduled, not to call sk_stop_timer_sync() on "itself".
>
> (...)
>
>>> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
>>> index d3fcf441b208..0ff35f49d451 100644
>>> --- a/net/mptcp/pm.c
>>> +++ b/net/mptcp/pm.c
>
> (...)
>
>>> @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list
>>> *timer)
>>>         mptcp_pm_subflow_established(msk);
>>>
>>> out:
>>> +    if (timeout)
>>> +        sk_reset_timer(sk, timer, jiffies + timeout);
>>> +    else
>>> +        /* if sock_put calls sk_free: avoid waiting for this timer */
>>> +        entry->timer_done = true;
>>>     bh_unlock_sock(sk);
>>> -exit:
>>
>> This label was just added in the previous patch (which was quite small).
>> I suggest squashing these two patches, but not a big problem to leave
>> them separate.
>
> I understand, but if that's OK, I would prefer to keep them separated:
> even if they are linked, the problems are different. The previous patch
> makes sure the refcount is decremented in all cases, while this one
> actually free the socket if it was the last one.
>
> In fact, I was looking at squashing them, but I didn't know what commit
> message to add. Having "while at it, always decrement the refcount"
> feels like hiding this important issue. But maybe not. Up to you for the
> squashing.
>

It's fine to leave them separate, thanks for explaining.

- Mat