Add trace_contended_release() calls to the slowpath unlock paths of
sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
RT-specific rwbase locks. Each call site fires only when there are
blocked waiters being woken, except percpu_up_write() which always wakes
via __wake_up().
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
kernel/locking/mutex.c | 1 +
kernel/locking/percpu-rwsem.c | 3 +++
kernel/locking/rtmutex.c | 1 +
kernel/locking/rwbase_rt.c | 8 +++++++-
kernel/locking/rwsem.c | 9 +++++++--
kernel/locking/semaphore.c | 4 +++-
6 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index c867f6c15530..54ca045987a2 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -970,6 +970,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
next = waiter->task;
+ trace_contended_release(lock);
debug_mutex_wake_waiter(lock, waiter);
__clear_task_blocked_on(next, lock);
wake_q_add(&wake_q, next);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 4190635458da..0f2e8e63d252 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,8 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
{
rwsem_release(&sem->dep_map, _RET_IP_);
+ trace_contended_release(sem);
+
/*
* Signal the writer is done, no fast path yet.
*
@@ -297,6 +299,7 @@ void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem)
* writer.
*/
smp_mb(); /* B matches C */
+ trace_contended_release(sem);
/*
* In other words, if they see our decrement (presumably to
* aggregate zero, as that is the only time it matters) they
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c80902eacd79..e0873f0ed982 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1457,6 +1457,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
raw_spin_lock_irqsave(&lock->wait_lock, flags);
}
+ trace_contended_release(lock);
/*
* The wakeup next waiter path does not suffer from the above
* race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 9f4322c07486..42f3658c0059 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -162,8 +162,10 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
* worst case which can happen is a spurious wakeup.
*/
owner = rt_mutex_owner(rtm);
- if (owner)
+ if (owner) {
+ trace_contended_release(rwb);
rt_mutex_wake_q_add_task(&wqh, owner, state);
+ }
/* Pairs with the preempt_enable in rt_mutex_wake_up_q() */
preempt_disable();
@@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
}
@@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
unsigned long flags;
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+ if (rt_mutex_has_waiters(rtm))
+ trace_contended_release(rwb);
/* Release it and account current as reader */
__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d2..4e61dc0bb045 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem)
if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
RWSEM_FLAG_WAITERS)) {
clear_nonspinnable(sem);
+ trace_contended_release(sem);
rwsem_wake(sem);
}
preempt_enable();
@@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem)
preempt_disable();
rwsem_clear_owner(sem);
tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
- if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+ if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+ trace_contended_release(sem);
rwsem_wake(sem);
+ }
preempt_enable();
}
@@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
tmp = atomic_long_fetch_add_release(
-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
rwsem_set_reader_owned(sem);
- if (tmp & RWSEM_FLAG_WAITERS)
+ if (tmp & RWSEM_FLAG_WAITERS) {
+ trace_contended_release(sem);
rwsem_downgrade_wake(sem);
+ }
preempt_enable();
}
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 3ef032e22f7e..3cef5ba88f7e 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
else
__up(sem, &wake_q);
raw_spin_unlock_irqrestore(&sem->lock, flags);
- if (!wake_q_empty(&wake_q))
+ if (!wake_q_empty(&wake_q)) {
+ trace_contended_release(sem);
wake_up_q(&wake_q);
+ }
}
EXPORT_SYMBOL(up);
--
2.47.3
On Wed, 4 Mar 2026 16:56:17 +0000
Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> + if (rt_mutex_has_waiters(rtm))
> + trace_contended_release(rwb);
Hmm, if statements should never be used just for tracepoints without a
static branch. The above should be:
if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
trace_contended_release(rwb);
The above "trace_contened_release_enabled()" is a static_branch where it
turns the if statement into a nop when the tracepoint is not enabled, and a
jmp when it is.
> __rwbase_write_unlock(rwb, WRITER_BIAS, flags);
> }
>
> @@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
> unsigned long flags;
>
> raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> + if (rt_mutex_has_waiters(rtm))
> + trace_contended_release(rwb);
Same here.
-- Steve
> /* Release it and account current as reader */
> __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
> }
> diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
> index 24df4d98f7d2..4e61dc0bb045 100644
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem)
> if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
> RWSEM_FLAG_WAITERS)) {
> clear_nonspinnable(sem);
> + trace_contended_release(sem);
> rwsem_wake(sem);
> }
> preempt_enable();
> @@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem)
> preempt_disable();
> rwsem_clear_owner(sem);
> tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
> - if (unlikely(tmp & RWSEM_FLAG_WAITERS))
> + if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
> + trace_contended_release(sem);
> rwsem_wake(sem);
> + }
> preempt_enable();
> }
>
> @@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
> tmp = atomic_long_fetch_add_release(
> -RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
> rwsem_set_reader_owned(sem);
> - if (tmp & RWSEM_FLAG_WAITERS)
> + if (tmp & RWSEM_FLAG_WAITERS) {
> + trace_contended_release(sem);
> rwsem_downgrade_wake(sem);
> + }
> preempt_enable();
> }
>
> diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
> index 3ef032e22f7e..3cef5ba88f7e 100644
> --- a/kernel/locking/semaphore.c
> +++ b/kernel/locking/semaphore.c
> @@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
> else
> __up(sem, &wake_q);
> raw_spin_unlock_irqrestore(&sem->lock, flags);
> - if (!wake_q_empty(&wake_q))
> + if (!wake_q_empty(&wake_q)) {
> + trace_contended_release(sem);
> wake_up_q(&wake_q);
> + }
> }
> EXPORT_SYMBOL(up);
>
On Thu, Mar 05, 2026 at 10:59:24AM -0500, Steven Rostedt wrote: > On Wed, 4 Mar 2026 16:56:17 +0000 > Dmitry Ilvokhin <d@ilvokhin.com> wrote: > > > @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb) > > unsigned long flags; > > > > raw_spin_lock_irqsave(&rtm->wait_lock, flags); > > + if (rt_mutex_has_waiters(rtm)) > > + trace_contended_release(rwb); > > Hmm, if statements should never be used just for tracepoints without a > static branch. The above should be: > > if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm)) > trace_contended_release(rwb); > > The above "trace_contened_release_enabled()" is a static_branch where it > turns the if statement into a nop when the tracepoint is not enabled, and a > jmp when it is. Thanks for catching this, Steven. Fixed locally.
On Thu, Mar 05, 2026 at 10:59:24AM -0500, Steven Rostedt wrote: > On Wed, 4 Mar 2026 16:56:17 +0000 > Dmitry Ilvokhin <d@ilvokhin.com> wrote: > > > @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb) > > unsigned long flags; > > > > raw_spin_lock_irqsave(&rtm->wait_lock, flags); > > + if (rt_mutex_has_waiters(rtm)) > > + trace_contended_release(rwb); > > Hmm, if statements should never be used just for tracepoints without a > static branch. The above should be: > > if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm)) > trace_contended_release(rwb); > I still wish you would accept: if (trace_foo_enabled() && foo) __do_trace_foo(); The compilers can't optimize the static branches and thus you'll get it twice for no reason. I really wish they would just accept __pure, but alas.
On Thu, 5 Mar 2026 18:42:23 +0100 Peter Zijlstra <peterz@infradead.org> wrote: > I still wish you would accept: > > if (trace_foo_enabled() && foo) > __do_trace_foo(); > > The compilers can't optimize the static branches and thus you'll get it > twice for no reason. > > I really wish they would just accept __pure, but alas. Makes sense, and that could probably be done. It shouldn't be too hard to do. If I find some time I could look at it, or perhaps someone lurking on this thread could possibly give it a try! (I may even Cc some people that want to learn this code). -- Steve
© 2016 - 2026 Red Hat, Inc.