The tasklet_unlock_spin_wait() via tasklet_disable_in_atomic() is
provided for a few legacy tasklet users. The interface is used from
atomic context (which is either softirq or disabled preemption) on
non-PREEMPT_RT an relies on spinning until the tasklet callback
completes.
On PREEMPT_RT the context is never atomic but the busy polling logic
remains. It possible that the thread invoking tasklet_unlock_spin_wait()
has higher priority than the tasklet. If both run on the same CPU the
the tasklet makes no progress and the thread trying to cancel the
tasklet will live-lock the system.
To avoid the lockup tasklet_unlock_spin_wait() uses local_bh_disable()/
enable() which utilizes the local_lock_t for synchronisation. This lock
is a central per-CPU BKL and about to be removed.
Acquire a lock in tasklet_action_common() which is held while the
tasklet's callback is invoked. This lock will be acquired from
tasklet_unlock_spin_wait() via tasklet_callback_cancel_wait_running().
After the tasklet completed tasklet_callback_sync_wait_running() drops
the lock and acquires it again. In order to avoid unlocking the lock
even if there is no cancel request, there is a cb_waiters counter which
is incremented during a cancel request.
Blocking on the lock will PI-boost the tasklet if needed, ensuring
progress is made.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
kernel/softirq.c | 62 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 57 insertions(+), 5 deletions(-)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 513b1945987cc..4e2c980e7712e 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -805,6 +805,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t)
return false;
}
+#ifdef CONFIG_PREEMPT_RT
+struct tasklet_sync_callback {
+ spinlock_t cb_lock;
+ atomic_t cb_waiters;
+};
+
+static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = {
+ .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock),
+ .cb_waiters = ATOMIC_INIT(0),
+};
+
+static void tasklet_lock_callback(void)
+{
+ spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_unlock_callback(void)
+{
+ spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_callback_cancel_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ atomic_inc(&sync_cb->cb_waiters);
+ spin_lock(&sync_cb->cb_lock);
+ atomic_dec(&sync_cb->cb_waiters);
+ spin_unlock(&sync_cb->cb_lock);
+}
+
+static void tasklet_callback_sync_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ if (atomic_read(&sync_cb->cb_waiters)) {
+ spin_unlock(&sync_cb->cb_lock);
+ spin_lock(&sync_cb->cb_lock);
+ }
+}
+
+#else /* !CONFIG_PREEMPT_RT: */
+
+static void tasklet_lock_callback(void) { }
+static void tasklet_unlock_callback(void) { }
+static void tasklet_callback_sync_wait_running(void) { }
+
+#ifdef CONFIG_SMP
+static void tasklet_callback_cancel_wait_running(void) { }
+#endif
+#endif /* !CONFIG_PREEMPT_RT */
+
static void tasklet_action_common(struct tasklet_head *tl_head,
unsigned int softirq_nr)
{
@@ -816,6 +868,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
tl_head->tail = &tl_head->head;
local_irq_enable();
+ tasklet_lock_callback();
while (list) {
struct tasklet_struct *t = list;
@@ -835,6 +888,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
}
}
tasklet_unlock(t);
+ tasklet_callback_sync_wait_running();
continue;
}
tasklet_unlock(t);
@@ -847,6 +901,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
__raise_softirq_irqoff(softirq_nr);
local_irq_enable();
}
+ tasklet_unlock_callback();
}
static __latent_entropy void tasklet_action(void)
@@ -897,12 +952,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t)
/*
* Prevent a live lock when current preempted soft
* interrupt processing or prevents ksoftirqd from
- * running. If the tasklet runs on a different CPU
- * then this has no effect other than doing the BH
- * disable/enable dance for nothing.
+ * running.
*/
- local_bh_disable();
- local_bh_enable();
+ tasklet_callback_cancel_wait_running();
} else {
cpu_relax();
}
--
2.51.0
On Thu, 4 Sep 2025 16:25:24 +0200 Sebastian Andrzej Siewior wrote: > The tasklet_unlock_spin_wait() via tasklet_disable_in_atomic() is > provided for a few legacy tasklet users. The interface is used from > atomic context (which is either softirq or disabled preemption) on > non-PREEMPT_RT an relies on spinning until the tasklet callback > completes. > On PREEMPT_RT the context is never atomic but the busy polling logic > remains. It possible that the thread invoking tasklet_unlock_spin_wait() > has higher priority than the tasklet. If both run on the same CPU the > the tasklet makes no progress and the thread trying to cancel the > tasklet will live-lock the system. > To avoid the lockup tasklet_unlock_spin_wait() uses local_bh_disable()/ > enable() which utilizes the local_lock_t for synchronisation. This lock > is a central per-CPU BKL and about to be removed. > > Acquire a lock in tasklet_action_common() which is held while the > tasklet's callback is invoked. This lock will be acquired from > tasklet_unlock_spin_wait() via tasklet_callback_cancel_wait_running(). > After the tasklet completed tasklet_callback_sync_wait_running() drops > the lock and acquires it again. In order to avoid unlocking the lock > even if there is no cancel request, there is a cb_waiters counter which > is incremented during a cancel request. > Blocking on the lock will PI-boost the tasklet if needed, ensuring > progress is made. > > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > --- > kernel/softirq.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 57 insertions(+), 5 deletions(-) > > diff --git a/kernel/softirq.c b/kernel/softirq.c > index 513b1945987cc..4e2c980e7712e 100644 > --- a/kernel/softirq.c > +++ b/kernel/softirq.c > @@ -805,6 +805,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t) > return false; > } > > +#ifdef CONFIG_PREEMPT_RT > +struct tasklet_sync_callback { > + spinlock_t cb_lock; > + atomic_t cb_waiters; > +}; > + > +static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = { > + .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock), > + .cb_waiters = ATOMIC_INIT(0), > +}; > + > +static void tasklet_lock_callback(void) > +{ > + spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); > +} > + > +static void tasklet_unlock_callback(void) > +{ > + spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock)); > +} > + > +static void tasklet_callback_cancel_wait_running(void) > +{ > + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); > + > + atomic_inc(&sync_cb->cb_waiters); > + spin_lock(&sync_cb->cb_lock); > + atomic_dec(&sync_cb->cb_waiters); > + spin_unlock(&sync_cb->cb_lock); > +} > + CPU0 CPU1 ---- ---- lock A tasklet C callback lock A cancel tasklet B DEADLOCK-01 After this work could DEADLOCK-01 be triggered, given no chance for DEADLOCK-02 ? CPU2 CPU3 ---- ---- lock A timer C callback lock A timer_delete_sync(timer B) DEADLOCK-02 > +static void tasklet_callback_sync_wait_running(void) > +{ > + struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback); > + > + if (atomic_read(&sync_cb->cb_waiters)) { > + spin_unlock(&sync_cb->cb_lock); > + spin_lock(&sync_cb->cb_lock); > + } > +} > + > +#else /* !CONFIG_PREEMPT_RT: */ > + > +static void tasklet_lock_callback(void) { } > +static void tasklet_unlock_callback(void) { } > +static void tasklet_callback_sync_wait_running(void) { } > + > +#ifdef CONFIG_SMP > +static void tasklet_callback_cancel_wait_running(void) { } > +#endif > +#endif /* !CONFIG_PREEMPT_RT */ > + > static void tasklet_action_common(struct tasklet_head *tl_head, > unsigned int softirq_nr) > { > @@ -816,6 +868,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, > tl_head->tail = &tl_head->head; > local_irq_enable(); > > + tasklet_lock_callback(); > while (list) { > struct tasklet_struct *t = list; > > @@ -835,6 +888,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, > } > } > tasklet_unlock(t); > + tasklet_callback_sync_wait_running(); > continue; > } > tasklet_unlock(t); > @@ -847,6 +901,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head, > __raise_softirq_irqoff(softirq_nr); > local_irq_enable(); > } > + tasklet_unlock_callback(); > } > > static __latent_entropy void tasklet_action(void) > @@ -897,12 +952,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t) > /* > * Prevent a live lock when current preempted soft > * interrupt processing or prevents ksoftirqd from > - * running. If the tasklet runs on a different CPU > - * then this has no effect other than doing the BH > - * disable/enable dance for nothing. > + * running. > */ > - local_bh_disable(); > - local_bh_enable(); > + tasklet_callback_cancel_wait_running(); > } else { > cpu_relax(); > } > -- > 2.51.0
On 2025-09-05 18:15:01 [+0800], Hillf Danton wrote: > CPU0 CPU1 > ---- ---- > lock A > tasklet C callback > lock A > cancel tasklet B > DEADLOCK-01 > > After this work could DEADLOCK-01 be triggered, given no chance for DEADLOCK-02 ? > > CPU2 CPU3 > ---- ---- > lock A > timer C callback > lock A > timer_delete_sync(timer B) > DEADLOCK-02 You are not supposed to acquire the lock, that is also acquired in the callback, while canceling the timer/ tasklet. Tell me please, how is this relevant? If lock A is acquired on CPU0/ 2 then tasklet/ timer on CPU1/ 3 can't make progress. Now CPU0/ 2 waits for the callback to complete. This deadlocks as of today regardless of PREEMPT_RT and this change. The difference is that !RT requires two CPU for this to happen while RT is efficient and can trigger this with just one CPU. Sebastian
On Mon, 15 Sep 2025 09:39:33 +0200 Sebastian Andrzej Siewior wrote: >On 2025-09-05 18:15:01 [+0800], Hillf Danton wrote: >> CPU0 CPU1 >> ---- ---- >> lock A >> tasklet C callback >> lock A >> cancel tasklet B >> DEADLOCK-01 >> >> After this work could DEADLOCK-01 be triggered, given no chance for DEADLOCK-02 ? >> >> CPU2 CPU3 >> ---- ---- >> lock A >> timer C callback >> lock A >> timer_delete_sync(timer B) >> DEADLOCK-02 > > You are not supposed to acquire the lock, that is also acquired in the > callback, while canceling the timer/ tasklet. > Tell me please, how is this relevant? > > If lock A is acquired on CPU0/ 2 then tasklet/ timer on CPU1/ 3 can't > make progress. Now CPU0/ 2 waits for the callback to complete. This > deadlocks as of today regardless of PREEMPT_RT and this change. > In case of !RT, the chance for DEADLOCK-02 is zero because deadlock is detected based on per-timer instead of per-cpu. > The difference is that !RT requires two CPU for this to happen while RT > is efficient and can trigger this with just one CPU. In case of RT OTOH, false positive deadlock could be triggered because canceling taskletB has nothing to do with the callback of taskletC. In short I am highlighting the gap between per-timer/tasklet and per-cpu.
On 2025-09-18 21:47:52 [+0800], Hillf Danton wrote: > On Mon, 15 Sep 2025 09:39:33 +0200 Sebastian Andrzej Siewior wrote: > >On 2025-09-05 18:15:01 [+0800], Hillf Danton wrote: > >> CPU0 CPU1 > >> ---- ---- > >> lock A > >> tasklet C callback > >> lock A > >> cancel tasklet B > >> DEADLOCK-01 > >> > >> After this work could DEADLOCK-01 be triggered, given no chance for DEADLOCK-02 ? > >> > >> CPU2 CPU3 > >> ---- ---- > >> lock A > >> timer C callback > >> lock A > >> timer_delete_sync(timer B) > >> DEADLOCK-02 > > > > You are not supposed to acquire the lock, that is also acquired in the > > callback, while canceling the timer/ tasklet. > > Tell me please, how is this relevant? > > > > If lock A is acquired on CPU0/ 2 then tasklet/ timer on CPU1/ 3 can't > > make progress. Now CPU0/ 2 waits for the callback to complete. This > > deadlocks as of today regardless of PREEMPT_RT and this change. > > > In case of !RT, the chance for DEADLOCK-02 is zero because deadlock is > detected based on per-timer instead of per-cpu. But your "lock A" is global, isn't it? > > The difference is that !RT requires two CPU for this to happen while RT > > is efficient and can trigger this with just one CPU. > > In case of RT OTOH, false positive deadlock could be triggered because > canceling taskletB has nothing to do with the callback of taskletC. > > In short I am highlighting the gap between per-timer/tasklet and per-cpu. I don't see a problem here. Sebastian
On Thu, 18 Sep 2025 17:49:37 +0200 Sebastian Andrzej Siewior wrote: >On 2025-09-18 21:47:52 [+0800], Hillf Danton wrote: >> On Mon, 15 Sep 2025 09:39:33 +0200 Sebastian Andrzej Siewior wrote: >> >On 2025-09-05 18:15:01 [+0800], Hillf Danton wrote: >> >> CPU0 CPU1 >> >> ---- ---- >> >> lock A >> >> tasklet C callback >> >> lock A >> >> cancel tasklet B >> >> DEADLOCK-01 >> >> >> >> After this work could DEADLOCK-01 be triggered, given no chance for DEADLOCK-02 ? >> >> >> >> CPU2 CPU3 >> >> ---- ---- >> >> lock A >> >> timer C callback >> >> lock A >> >> timer_delete_sync(timer B) >> >> DEADLOCK-02 >> > >> > You are not supposed to acquire the lock, that is also acquired in the >> > callback, while canceling the timer/ tasklet. >> > Tell me please, how is this relevant? >> > >> > If lock A is acquired on CPU0/ 2 then tasklet/ timer on CPU1/ 3 can't >> > make progress. Now CPU0/ 2 waits for the callback to complete. This >> > deadlocks as of today regardless of PREEMPT_RT and this change. >> > >> In case of !RT, the chance for DEADLOCK-02 is zero because deadlock is >> detected based on per-timer instead of per-cpu. > > But your "lock A" is global, isn't it? > IIUC whether lockA is global can be safely ignored here. DEADLOCK-02 can not be detected with !RT because by define the callback of timerB has nothing to do with timerC in addition to the current per-timer detecting mechanism. This work however adds per-cpu detecting mechanism that fails to tell the difference between timerB and timerC, thus false positive result comes. For example the callback of timerB does not acquire lockA.
The following commit has been merged into the irq/core branch of tip:
Commit-ID: fd4e876f59b7e70283b4025c717cad8948397be1
Gitweb: https://git.kernel.org/tip/fd4e876f59b7e70283b4025c717cad8948397be1
Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
AuthorDate: Thu, 04 Sep 2025 16:25:24 +02:00
Committer: Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Wed, 17 Sep 2025 16:25:41 +02:00
softirq: Provide a handshake for canceling tasklets via polling
The tasklet_unlock_spin_wait() via tasklet_disable_in_atomic() is
provided for a few legacy tasklet users. The interface is used from
atomic context (which is either softirq or disabled preemption) on
non-PREEMPT_RT and relies on spinning until the tasklet callback
completes.
On PREEMPT_RT the context is never atomic but the busy polling logic
remains. It is possible that the thread invoking tasklet_unlock_spin_wait()
has higher priority than the tasklet. If both run on the same CPU the the
tasklet makes no progress and the thread trying to cancel the tasklet will
live-lock the system.
To avoid the lockup tasklet_unlock_spin_wait() uses local_bh_disable()/
enable() which utilizes the local_lock_t for synchronisation. This lock is
a central per-CPU BKL and about to be removed.
Solve this by acquire a lock in tasklet_action_common() which is held while
the tasklet's callback is invoked. This lock will be acquired from
tasklet_unlock_spin_wait() via tasklet_callback_cancel_wait_running().
After the tasklet completed tasklet_callback_sync_wait_running() drops the
lock and acquires it again. In order to avoid unlocking the lock even if
there is no cancel request, there is a cb_waiters counter which is
incremented during a cancel request. Blocking on the lock will PI-boost
the tasklet if needed, ensuring progress is made.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
kernel/softirq.c | 62 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 57 insertions(+), 5 deletions(-)
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 513b194..4e2c980 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -805,6 +805,58 @@ static bool tasklet_clear_sched(struct tasklet_struct *t)
return false;
}
+#ifdef CONFIG_PREEMPT_RT
+struct tasklet_sync_callback {
+ spinlock_t cb_lock;
+ atomic_t cb_waiters;
+};
+
+static DEFINE_PER_CPU(struct tasklet_sync_callback, tasklet_sync_callback) = {
+ .cb_lock = __SPIN_LOCK_UNLOCKED(tasklet_sync_callback.cb_lock),
+ .cb_waiters = ATOMIC_INIT(0),
+};
+
+static void tasklet_lock_callback(void)
+{
+ spin_lock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_unlock_callback(void)
+{
+ spin_unlock(this_cpu_ptr(&tasklet_sync_callback.cb_lock));
+}
+
+static void tasklet_callback_cancel_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ atomic_inc(&sync_cb->cb_waiters);
+ spin_lock(&sync_cb->cb_lock);
+ atomic_dec(&sync_cb->cb_waiters);
+ spin_unlock(&sync_cb->cb_lock);
+}
+
+static void tasklet_callback_sync_wait_running(void)
+{
+ struct tasklet_sync_callback *sync_cb = this_cpu_ptr(&tasklet_sync_callback);
+
+ if (atomic_read(&sync_cb->cb_waiters)) {
+ spin_unlock(&sync_cb->cb_lock);
+ spin_lock(&sync_cb->cb_lock);
+ }
+}
+
+#else /* !CONFIG_PREEMPT_RT: */
+
+static void tasklet_lock_callback(void) { }
+static void tasklet_unlock_callback(void) { }
+static void tasklet_callback_sync_wait_running(void) { }
+
+#ifdef CONFIG_SMP
+static void tasklet_callback_cancel_wait_running(void) { }
+#endif
+#endif /* !CONFIG_PREEMPT_RT */
+
static void tasklet_action_common(struct tasklet_head *tl_head,
unsigned int softirq_nr)
{
@@ -816,6 +868,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
tl_head->tail = &tl_head->head;
local_irq_enable();
+ tasklet_lock_callback();
while (list) {
struct tasklet_struct *t = list;
@@ -835,6 +888,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
}
}
tasklet_unlock(t);
+ tasklet_callback_sync_wait_running();
continue;
}
tasklet_unlock(t);
@@ -847,6 +901,7 @@ static void tasklet_action_common(struct tasklet_head *tl_head,
__raise_softirq_irqoff(softirq_nr);
local_irq_enable();
}
+ tasklet_unlock_callback();
}
static __latent_entropy void tasklet_action(void)
@@ -897,12 +952,9 @@ void tasklet_unlock_spin_wait(struct tasklet_struct *t)
/*
* Prevent a live lock when current preempted soft
* interrupt processing or prevents ksoftirqd from
- * running. If the tasklet runs on a different CPU
- * then this has no effect other than doing the BH
- * disable/enable dance for nothing.
+ * running.
*/
- local_bh_disable();
- local_bh_enable();
+ tasklet_callback_cancel_wait_running();
} else {
cpu_relax();
}
© 2016 - 2025 Red Hat, Inc.