include/linux/sched.h | 12 ++++++++++++ include/linux/sched/task.h | 10 +++++++--- 2 files changed, 19 insertions(+), 3 deletions(-)
With PREEMPT_RT enabled, some of the calls to put_task_struct() coming
from rt_mutex_adjust_prio_chain() could happen in preemptible context and
with a mutex enqueued. That could lead to this sequence:
rt_mutex_adjust_prio_chain()
put_task_struct()
__put_task_struct()
sched_ext_free()
spin_lock_irqsave()
rtlock_lock() ---> TRIGGERS
lockdep_assert(!current->pi_blocked_on);
Adjust the check in put_task_struct() to also consider pi_blocked_on before
calling __put_task_struct(), resorting to the deferred call in case it is
set.
v2: Rostedt suggested removing the #ifdef from put_task_struct() and
creating tsk_is_pi_blocked_on() in sched.h to make the change cleaner.
Suggested-by: Crystal Wood <crwood@redhat.com>
Signed-off-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
---
include/linux/sched.h | 12 ++++++++++++
include/linux/sched/task.h | 10 +++++++---
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ec93e5ba53a9..9fbfa7f55a83d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2148,6 +2148,18 @@ static inline bool task_is_runnable(struct task_struct *p)
return p->on_rq && !p->se.sched_delayed;
}
+#ifdef CONFIG_RT_MUTEXES
+static inline bool tsk_is_pi_blocked_on(struct task_struct *tsk)
+{
+ return tsk->pi_blocked_on != NULL;
+}
+#else
+static inline bool tsk_is_pi_blocked_on(strut task_struct *tsk)
+{
+ return false;
+}
+#endif
+
extern bool sched_task_on_rq(struct task_struct *p);
extern unsigned long get_wchan(struct task_struct *p);
extern struct task_struct *cpu_curr_snapshot(int cpu);
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 0f2aeb37bbb04..1f17a3dd51774 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -135,9 +135,11 @@ static inline void put_task_struct(struct task_struct *t)
/*
* In !RT, it is always safe to call __put_task_struct().
- * Under RT, we can only call it in preemptible context.
+ * Under RT, we can only call it in preemptible context,
+ * when not blocked on a PI chain.
*/
- if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT) ||
+ (preemptible() || !tsk_is_pi_blocked_on(current))) {
static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);
lock_map_acquire_try(&put_task_map);
@@ -149,7 +151,9 @@ static inline void put_task_struct(struct task_struct *t)
/*
* under PREEMPT_RT, we can't call put_task_struct
* in atomic context because it will indirectly
- * acquire sleeping locks.
+ * acquire sleeping locks. The same is true if the
+ * current process has a mutex enqueued (blocked on
+ * a PI chain).
*
* call_rcu() will schedule delayed_put_task_struct_rcu()
* to be called in process context.
--
2.49.0
+ sched folks.
On 2025-04-09 15:58:32 [-0300], Luis Claudio R. Goncalves wrote:
> With PREEMPT_RT enabled, some of the calls to put_task_struct() coming
> from rt_mutex_adjust_prio_chain() could happen in preemptible context and
> with a mutex enqueued. That could lead to this sequence:
>
> rt_mutex_adjust_prio_chain()
> put_task_struct()
> __put_task_struct()
> sched_ext_free()
> spin_lock_irqsave()
> rtlock_lock() ---> TRIGGERS
> lockdep_assert(!current->pi_blocked_on);
>
> Adjust the check in put_task_struct() to also consider pi_blocked_on before
> calling __put_task_struct(), resorting to the deferred call in case it is
> set.
>
> v2: Rostedt suggested removing the #ifdef from put_task_struct() and
> creating tsk_is_pi_blocked_on() in sched.h to make the change cleaner.
I complained about this special RT case in put_task_struct() when it was
first got introduced. Couldn't we just just unconditionally do the RCU
put?
> Suggested-by: Crystal Wood <crwood@redhat.com>
> Signed-off-by: Luis Claudio R. Goncalves <lgoncalv@redhat.com>
> ---
> include/linux/sched.h | 12 ++++++++++++
> include/linux/sched/task.h | 10 +++++++---
> 2 files changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5ec93e5ba53a9..9fbfa7f55a83d 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -2148,6 +2148,18 @@ static inline bool task_is_runnable(struct task_struct *p)
> return p->on_rq && !p->se.sched_delayed;
> }
>
> +#ifdef CONFIG_RT_MUTEXES
> +static inline bool tsk_is_pi_blocked_on(struct task_struct *tsk)
> +{
> + return tsk->pi_blocked_on != NULL;
> +}
> +#else
> +static inline bool tsk_is_pi_blocked_on(strut task_struct *tsk)
> +{
> + return false;
> +}
> +#endif
> +
> extern bool sched_task_on_rq(struct task_struct *p);
> extern unsigned long get_wchan(struct task_struct *p);
> extern struct task_struct *cpu_curr_snapshot(int cpu);
> diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> index 0f2aeb37bbb04..1f17a3dd51774 100644
> --- a/include/linux/sched/task.h
> +++ b/include/linux/sched/task.h
> @@ -135,9 +135,11 @@ static inline void put_task_struct(struct task_struct *t)
>
> /*
> * In !RT, it is always safe to call __put_task_struct().
> - * Under RT, we can only call it in preemptible context.
> + * Under RT, we can only call it in preemptible context,
> + * when not blocked on a PI chain.
> */
> - if (!IS_ENABLED(CONFIG_PREEMPT_RT) || preemptible()) {
> + if (!IS_ENABLED(CONFIG_PREEMPT_RT) ||
> + (preemptible() || !tsk_is_pi_blocked_on(current))) {
> static DEFINE_WAIT_OVERRIDE_MAP(put_task_map, LD_WAIT_SLEEP);
>
> lock_map_acquire_try(&put_task_map);
> @@ -149,7 +151,9 @@ static inline void put_task_struct(struct task_struct *t)
> /*
> * under PREEMPT_RT, we can't call put_task_struct
> * in atomic context because it will indirectly
> - * acquire sleeping locks.
> + * acquire sleeping locks. The same is true if the
> + * current process has a mutex enqueued (blocked on
> + * a PI chain).
> *
> * call_rcu() will schedule delayed_put_task_struct_rcu()
> * to be called in process context.
Sebastian
On Thu, Apr 10, 2025 at 08:48:44AM +0200, Sebastian Andrzej Siewior wrote: > + sched folks. > > On 2025-04-09 15:58:32 [-0300], Luis Claudio R. Goncalves wrote: > > With PREEMPT_RT enabled, some of the calls to put_task_struct() coming > > from rt_mutex_adjust_prio_chain() could happen in preemptible context and > > with a mutex enqueued. That could lead to this sequence: > > > > rt_mutex_adjust_prio_chain() > > put_task_struct() > > __put_task_struct() > > sched_ext_free() > > spin_lock_irqsave() > > rtlock_lock() ---> TRIGGERS > > lockdep_assert(!current->pi_blocked_on); > > > > Adjust the check in put_task_struct() to also consider pi_blocked_on before > > calling __put_task_struct(), resorting to the deferred call in case it is > > set. > > > > v2: Rostedt suggested removing the #ifdef from put_task_struct() and > > creating tsk_is_pi_blocked_on() in sched.h to make the change cleaner. Oh gawd, this patch makes a sad situation worse. > I complained about this special RT case in put_task_struct() when it was > first got introduced. Couldn't we just just unconditionally do the RCU > put? Yeah, please make it simpler, not more complex.
On 2025-04-10 09:51:03 [+0200], Peter Zijlstra wrote: > > I complained about this special RT case in put_task_struct() when it was > > first got introduced. Couldn't we just just unconditionally do the RCU > > put? > > Yeah, please make it simpler, not more complex. Just so we clear: simpler as in everyone does call_rcu() or RT does always call_rcu() and everyone else __put_task_struct()? I mean we would end up with one call chain I am just not sure how expensive it gets for !RT. Sebastian
On Thu, Apr 10, 2025 at 05:32:05PM +0200, Sebastian Andrzej Siewior wrote: > On 2025-04-10 09:51:03 [+0200], Peter Zijlstra wrote: > > > I complained about this special RT case in put_task_struct() when it was > > > first got introduced. Couldn't we just just unconditionally do the RCU > > > put? > > > > Yeah, please make it simpler, not more complex. > > Just so we clear: simpler as in everyone does call_rcu() or RT does > always call_rcu() and everyone else __put_task_struct()? I mean we would > end up with one call chain I am just not sure how expensive it gets for > !RT. Sebastian, I implemented the change where put_task_struct() unconditionally resorted to: call_rcu(&t->rcu, __put_task_struct_rcu_cb); I submitted the kernels I built with that change and a pristine upstream kenrel to LTP and stress-ng and also ran 'perf bench all'. I built kernels with and without lockdep and extra debug. All kernels survived the tests without a scratch and I haven't observed differences in behaviors nor timings (for the tests that had that information). What would be a good benchmark to compare the kernels with and without the put_task_struct() change? I would like to observe whether there is a penalty or added overhead with the change in place. Best, Luis > Sebastian > ---end quoted text---
© 2016 - 2026 Red Hat, Inc.