[PATCH mptcp-net v3 05/19] mptcp: pm: ADD_ADDR rtx: free sk if last

Matthieu Baerts (NGI0) posted 19 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH mptcp-net v3 05/19] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Matthieu Baerts (NGI0) 1 month, 3 weeks ago
When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
and released at the end.

If at that moment, it was the last reference being held, the sk would
not be freed. sock_put() should then be called instead of __sock_put().

But that's not enough: if it is the last reference, sock_put() will call
sk_free(), which will end up calling sk_stop_timer_sync() on the same
timer, and waiting indefinitely to finish. So it is needed to mark that
the timer is done, and no need to call sk_stop_timer_sync().

Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
---
v3: support calling sk_free() from the timer handler. Note: I'm not very
    happy with this patch, it looks too big. Did I miss a simpler way?
---
 net/mptcp/pm.c | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index d3fcf441b208..adbebf46dcce 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
 	struct list_head	list;
 	struct mptcp_addr_info	addr;
 	u8			retrans_times;
+	bool			timer_done;
 	struct timer_list	add_timer;
 	struct mptcp_sock	*sock;
 	struct rcu_head		rcu;
@@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 							      add_timer);
 	struct mptcp_sock *msk = entry->sock;
 	struct sock *sk = (struct sock *)msk;
-	unsigned int timeout;
+	unsigned int timeout = 0;
 
 	pr_debug("msk=%p\n", msk);
 
-	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
-		goto exit;
-
 	bh_lock_sock(sk);
+	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
+		goto out;
+
 	if (sock_owned_by_user(sk)) {
 		/* Try again later. */
-		sk_reset_timer(sk, timer, jiffies + HZ / 20);
+		timeout = HZ / 20;
 		goto out;
 	}
 
 	if (mptcp_pm_should_add_signal_addr(msk)) {
-		sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
+		timeout = TCP_RTO_MAX / 8;
 		goto out;
 	}
 
@@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 	}
 
 	if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
-		sk_reset_timer(sk, timer,
-			       jiffies + (timeout << entry->retrans_times));
+		timeout <<= entry->retrans_times;
+	else
+		timeout = 0;
 
 	spin_unlock_bh(&msk->pm.lock);
 
@@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
 		mptcp_pm_subflow_established(msk);
 
 out:
+	if (timeout)
+		sk_reset_timer(sk, timer, jiffies + timeout);
+	else
+		/* if sock_put calls sk_free: avoid waiting for this timer */
+		entry->timer_done = true;
 	bh_unlock_sock(sk);
-exit:
-	__sock_put(sk);
+	sock_put(sk);
 }
 
 struct mptcp_pm_add_entry *
@@ -431,12 +437,15 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
 	add_entry->addr = *addr;
 	add_entry->sock = msk;
 	add_entry->retrans_times = 0;
+	add_entry->timer_done = false;
 
 	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
 reset_timer:
 	timeout = mptcp_adjust_add_addr_timeout(msk);
 	if (timeout)
 		sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
+	else
+		add_entry->timer_done = true;
 
 	return true;
 }
@@ -454,7 +463,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
 	spin_unlock_bh(&msk->pm.lock);
 
 	list_for_each_entry_safe(entry, tmp, &free_list, list) {
-		sk_stop_timer_sync(sk, &entry->add_timer);
+		if (!entry->timer_done)
+			sk_stop_timer_sync(sk, &entry->add_timer);
 		kfree_rcu(entry, rcu);
 	}
 }

-- 
2.53.0
Re: [PATCH mptcp-net v3 05/19] mptcp: pm: ADD_ADDR rtx: free sk if last
Posted by Matthieu Baerts 1 month, 3 weeks ago
Hello,

On 13/04/2026 16:11, Matthieu Baerts (NGI0) wrote:
> When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
> and released at the end.
> 
> If at that moment, it was the last reference being held, the sk would
> not be freed. sock_put() should then be called instead of __sock_put().
> 
> But that's not enough: if it is the last reference, sock_put() will call
> sk_free(), which will end up calling sk_stop_timer_sync() on the same
> timer, and waiting indefinitely to finish. So it is needed to mark that
> the timer is done, and no need to call sk_stop_timer_sync().
> 
> Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
> ---
> v3: support calling sk_free() from the timer handler. Note: I'm not very
>     happy with this patch, it looks too big. Did I miss a simpler way?
> ---
>  net/mptcp/pm.c | 32 +++++++++++++++++++++-----------
>  1 file changed, 21 insertions(+), 11 deletions(-)
> 
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index d3fcf441b208..adbebf46dcce 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
>  	struct list_head	list;
>  	struct mptcp_addr_info	addr;
>  	u8			retrans_times;
> +	bool			timer_done;
>  	struct timer_list	add_timer;
>  	struct mptcp_sock	*sock;
>  	struct rcu_head		rcu;
> @@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
>  							      add_timer);
>  	struct mptcp_sock *msk = entry->sock;
>  	struct sock *sk = (struct sock *)msk;
> -	unsigned int timeout;
> +	unsigned int timeout = 0;
>  
>  	pr_debug("msk=%p\n", msk);
>  
> -	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> -		goto exit;
> -
>  	bh_lock_sock(sk);
> +	if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> +		goto out;
> +
>  	if (sock_owned_by_user(sk)) {
>  		/* Try again later. */
> -		sk_reset_timer(sk, timer, jiffies + HZ / 20);
> +		timeout = HZ / 20;
>  		goto out;
>  	}
>  
>  	if (mptcp_pm_should_add_signal_addr(msk)) {
> -		sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
> +		timeout = TCP_RTO_MAX / 8;
>  		goto out;
>  	}
>  
> @@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
>  	}
>  
>  	if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
> -		sk_reset_timer(sk, timer,
> -			       jiffies + (timeout << entry->retrans_times));
> +		timeout <<= entry->retrans_times;
> +	else
> +		timeout = 0;
>  
>  	spin_unlock_bh(&msk->pm.lock);
>  
> @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
>  		mptcp_pm_subflow_established(msk);
>  
>  out:
> +	if (timeout)
> +		sk_reset_timer(sk, timer, jiffies + timeout);
> +	else
> +		/* if sock_put calls sk_free: avoid waiting for this timer */
> +		entry->timer_done = true;
>  	bh_unlock_sock(sk);
> -exit:
> -	__sock_put(sk);
> +	sock_put(sk);
>  }
>  
>  struct mptcp_pm_add_entry *
> @@ -431,12 +437,15 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
>  	add_entry->addr = *addr;
>  	add_entry->sock = msk;
>  	add_entry->retrans_times = 0;
> +	add_entry->timer_done = false;

As recommended by sashiko, this should be moved under the "reset_timer"
label just in case the sysctl value has been changed in between.

Also, I don't think we need to protect that with READ/WRITE_ONCE: this
variable is only useful when 'sock_put()' is called from the timer
handler, and it removes the last socket reference: when it is the last
consumer, alone, no concurrency then.

>  
>  	timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
>  reset_timer:
>  	timeout = mptcp_adjust_add_addr_timeout(msk);
>  	if (timeout)
>  		sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
> +	else
> +		add_entry->timer_done = true;
>  
>  	return true;
>  }
> @@ -454,7 +463,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
>  	spin_unlock_bh(&msk->pm.lock);
>  
>  	list_for_each_entry_safe(entry, tmp, &free_list, list) {
> -		sk_stop_timer_sync(sk, &entry->add_timer);
> +		if (!entry->timer_done)
> +			sk_stop_timer_sync(sk, &entry->add_timer);
>  		kfree_rcu(entry, rcu);
>  	}
>  }
> 

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.