Hello,
On 13/04/2026 16:11, Matthieu Baerts (NGI0) wrote:
> When an ADD_ADDR is retransmitted, the sk is held in sk_reset_timer(),
> and released at the end.
>
> If at that moment, it was the last reference being held, the sk would
> not be freed. sock_put() should then be called instead of __sock_put().
>
> But that's not enough: if it is the last reference, sock_put() will call
> sk_free(), which will end up calling sk_stop_timer_sync() on the same
> timer, and waiting indefinitely to finish. So it is needed to mark that
> the timer is done, and no need to call sk_stop_timer_sync().
>
> Fixes: 00cfd77b9063 ("mptcp: retransmit ADD_ADDR when timeout")
> Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
> ---
> v3: support calling sk_free() from the timer handler. Note: I'm not very
> happy with this patch, it looks too big. Did I miss a simpler way?
> ---
> net/mptcp/pm.c | 32 +++++++++++++++++++++-----------
> 1 file changed, 21 insertions(+), 11 deletions(-)
>
> diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
> index d3fcf441b208..adbebf46dcce 100644
> --- a/net/mptcp/pm.c
> +++ b/net/mptcp/pm.c
> @@ -16,6 +16,7 @@ struct mptcp_pm_add_entry {
> struct list_head list;
> struct mptcp_addr_info addr;
> u8 retrans_times;
> + bool timer_done;
> struct timer_list add_timer;
> struct mptcp_sock *sock;
> struct rcu_head rcu;
> @@ -327,22 +328,22 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> add_timer);
> struct mptcp_sock *msk = entry->sock;
> struct sock *sk = (struct sock *)msk;
> - unsigned int timeout;
> + unsigned int timeout = 0;
>
> pr_debug("msk=%p\n", msk);
>
> - if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> - goto exit;
> -
> bh_lock_sock(sk);
> + if (unlikely(inet_sk_state_load(sk) == TCP_CLOSE))
> + goto out;
> +
> if (sock_owned_by_user(sk)) {
> /* Try again later. */
> - sk_reset_timer(sk, timer, jiffies + HZ / 20);
> + timeout = HZ / 20;
> goto out;
> }
>
> if (mptcp_pm_should_add_signal_addr(msk)) {
> - sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
> + timeout = TCP_RTO_MAX / 8;
> goto out;
> }
>
> @@ -360,8 +361,9 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> }
>
> if (entry->retrans_times < ADD_ADDR_RETRANS_MAX)
> - sk_reset_timer(sk, timer,
> - jiffies + (timeout << entry->retrans_times));
> + timeout <<= entry->retrans_times;
> + else
> + timeout = 0;
>
> spin_unlock_bh(&msk->pm.lock);
>
> @@ -369,9 +371,13 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
> mptcp_pm_subflow_established(msk);
>
> out:
> + if (timeout)
> + sk_reset_timer(sk, timer, jiffies + timeout);
> + else
> + /* if sock_put calls sk_free: avoid waiting for this timer */
> + entry->timer_done = true;
> bh_unlock_sock(sk);
> -exit:
> - __sock_put(sk);
> + sock_put(sk);
> }
>
> struct mptcp_pm_add_entry *
> @@ -431,12 +437,15 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
> add_entry->addr = *addr;
> add_entry->sock = msk;
> add_entry->retrans_times = 0;
> + add_entry->timer_done = false;
As recommended by sashiko, this should be moved under the "reset_timer"
label just in case the sysctl value has been changed in between.
Also, I don't think we need to protect that with READ/WRITE_ONCE: this
variable is only useful when 'sock_put()' is called from the timer
handler, and it removes the last socket reference: when it is the last
consumer, alone, no concurrency then.
>
> timer_setup(&add_entry->add_timer, mptcp_pm_add_timer, 0);
> reset_timer:
> timeout = mptcp_adjust_add_addr_timeout(msk);
> if (timeout)
> sk_reset_timer(sk, &add_entry->add_timer, jiffies + timeout);
> + else
> + add_entry->timer_done = true;
>
> return true;
> }
> @@ -454,7 +463,8 @@ static void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
> spin_unlock_bh(&msk->pm.lock);
>
> list_for_each_entry_safe(entry, tmp, &free_list, list) {
> - sk_stop_timer_sync(sk, &entry->add_timer);
> + if (!entry->timer_done)
> + sk_stop_timer_sync(sk, &entry->add_timer);
> kfree_rcu(entry, rcu);
> }
> }
>
Cheers,
Matt
--
Sponsored by the NGI0 Core fund.