[PATCH RFC 3/3] locking: Wire up contended_release tracepoint

Dmitry Ilvokhin posted 3 patches 1 month ago
There is a newer version of this series
[PATCH RFC 3/3] locking: Wire up contended_release tracepoint
Posted by Dmitry Ilvokhin 1 month ago
Add trace_contended_release() calls to the slowpath unlock paths of
sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
RT-specific rwbase locks. Each call site fires only when there are
blocked waiters being woken, except percpu_up_write() which always wakes
via __wake_up().

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 kernel/locking/mutex.c        | 1 +
 kernel/locking/percpu-rwsem.c | 3 +++
 kernel/locking/rtmutex.c      | 1 +
 kernel/locking/rwbase_rt.c    | 8 +++++++-
 kernel/locking/rwsem.c        | 9 +++++++--
 kernel/locking/semaphore.c    | 4 +++-
 6 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index c867f6c15530..54ca045987a2 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -970,6 +970,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 
 		next = waiter->task;
 
+		trace_contended_release(lock);
 		debug_mutex_wake_waiter(lock, waiter);
 		__clear_task_blocked_on(next, lock);
 		wake_q_add(&wake_q, next);
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 4190635458da..0f2e8e63d252 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,8 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
 
+	trace_contended_release(sem);
+
 	/*
 	 * Signal the writer is done, no fast path yet.
 	 *
@@ -297,6 +299,7 @@ void __percpu_up_read_slowpath(struct percpu_rw_semaphore *sem)
 	 * writer.
 	 */
 	smp_mb(); /* B matches C */
+	trace_contended_release(sem);
 	/*
 	 * In other words, if they see our decrement (presumably to
 	 * aggregate zero, as that is the only time it matters) they
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index c80902eacd79..e0873f0ed982 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1457,6 +1457,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
 		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	}
 
+	trace_contended_release(lock);
 	/*
 	 * The wakeup next waiter path does not suffer from the above
 	 * race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 9f4322c07486..42f3658c0059 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -162,8 +162,10 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
 	 * worst case which can happen is a spurious wakeup.
 	 */
 	owner = rt_mutex_owner(rtm);
-	if (owner)
+	if (owner) {
+		trace_contended_release(rwb);
 		rt_mutex_wake_q_add_task(&wqh, owner, state);
+	}
 
 	/* Pairs with the preempt_enable in rt_mutex_wake_up_q() */
 	preempt_disable();
@@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+	if (rt_mutex_has_waiters(rtm))
+		trace_contended_release(rwb);
 	__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
 }
 
@@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+	if (rt_mutex_has_waiters(rtm))
+		trace_contended_release(rwb);
 	/* Release it and account current as reader */
 	__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index 24df4d98f7d2..4e61dc0bb045 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
 		      RWSEM_FLAG_WAITERS)) {
 		clear_nonspinnable(sem);
+		trace_contended_release(sem);
 		rwsem_wake(sem);
 	}
 	preempt_enable();
@@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem)
 	preempt_disable();
 	rwsem_clear_owner(sem);
 	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
-	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+	if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+		trace_contended_release(sem);
 		rwsem_wake(sem);
+	}
 	preempt_enable();
 }
 
@@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	tmp = atomic_long_fetch_add_release(
 		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
 	rwsem_set_reader_owned(sem);
-	if (tmp & RWSEM_FLAG_WAITERS)
+	if (tmp & RWSEM_FLAG_WAITERS) {
+		trace_contended_release(sem);
 		rwsem_downgrade_wake(sem);
+	}
 	preempt_enable();
 }
 
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 3ef032e22f7e..3cef5ba88f7e 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
 	else
 		__up(sem, &wake_q);
 	raw_spin_unlock_irqrestore(&sem->lock, flags);
-	if (!wake_q_empty(&wake_q))
+	if (!wake_q_empty(&wake_q)) {
+		trace_contended_release(sem);
 		wake_up_q(&wake_q);
+	}
 }
 EXPORT_SYMBOL(up);
 
-- 
2.47.3
Re: [PATCH RFC 3/3] locking: Wire up contended_release tracepoint
Posted by Steven Rostedt 1 month ago
On Wed,  4 Mar 2026 16:56:17 +0000
Dmitry Ilvokhin <d@ilvokhin.com> wrote:

> @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
>  	unsigned long flags;
>  
>  	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> +	if (rt_mutex_has_waiters(rtm))
> +		trace_contended_release(rwb);

Hmm, if statements should never be used just for tracepoints without a
static branch. The above should be:

	if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
		trace_contended_release(rwb);

The above "trace_contened_release_enabled()" is a static_branch where it
turns the if statement into a nop when the tracepoint is not enabled, and a
jmp when it is.


>  	__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
>  }
>  
> @@ -213,6 +217,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
>  	unsigned long flags;
>  
>  	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> +	if (rt_mutex_has_waiters(rtm))
> +		trace_contended_release(rwb);

Same here.

-- Steve

>  	/* Release it and account current as reader */
>  	__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
>  }
> diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
> index 24df4d98f7d2..4e61dc0bb045 100644
> --- a/kernel/locking/rwsem.c
> +++ b/kernel/locking/rwsem.c
> @@ -1360,6 +1360,7 @@ static inline void __up_read(struct rw_semaphore *sem)
>  	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
>  		      RWSEM_FLAG_WAITERS)) {
>  		clear_nonspinnable(sem);
> +		trace_contended_release(sem);
>  		rwsem_wake(sem);
>  	}
>  	preempt_enable();
> @@ -1383,8 +1384,10 @@ static inline void __up_write(struct rw_semaphore *sem)
>  	preempt_disable();
>  	rwsem_clear_owner(sem);
>  	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
> -	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
> +	if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
> +		trace_contended_release(sem);
>  		rwsem_wake(sem);
> +	}
>  	preempt_enable();
>  }
>  
> @@ -1407,8 +1410,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
>  	tmp = atomic_long_fetch_add_release(
>  		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
>  	rwsem_set_reader_owned(sem);
> -	if (tmp & RWSEM_FLAG_WAITERS)
> +	if (tmp & RWSEM_FLAG_WAITERS) {
> +		trace_contended_release(sem);
>  		rwsem_downgrade_wake(sem);
> +	}
>  	preempt_enable();
>  }
>  
> diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
> index 3ef032e22f7e..3cef5ba88f7e 100644
> --- a/kernel/locking/semaphore.c
> +++ b/kernel/locking/semaphore.c
> @@ -231,8 +231,10 @@ void __sched up(struct semaphore *sem)
>  	else
>  		__up(sem, &wake_q);
>  	raw_spin_unlock_irqrestore(&sem->lock, flags);
> -	if (!wake_q_empty(&wake_q))
> +	if (!wake_q_empty(&wake_q)) {
> +		trace_contended_release(sem);
>  		wake_up_q(&wake_q);
> +	}
>  }
>  EXPORT_SYMBOL(up);
>
Re: [PATCH RFC 3/3] locking: Wire up contended_release tracepoint
Posted by Dmitry Ilvokhin 1 month ago
On Thu, Mar 05, 2026 at 10:59:24AM -0500, Steven Rostedt wrote:
> On Wed,  4 Mar 2026 16:56:17 +0000
> Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
> >  	unsigned long flags;
> >  
> >  	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> > +	if (rt_mutex_has_waiters(rtm))
> > +		trace_contended_release(rwb);
> 
> Hmm, if statements should never be used just for tracepoints without a
> static branch. The above should be:
> 
> 	if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
> 		trace_contended_release(rwb);
> 
> The above "trace_contened_release_enabled()" is a static_branch where it
> turns the if statement into a nop when the tracepoint is not enabled, and a
> jmp when it is.

Thanks for catching this, Steven. Fixed locally.
Re: [PATCH RFC 3/3] locking: Wire up contended_release tracepoint
Posted by Peter Zijlstra 1 month ago
On Thu, Mar 05, 2026 at 10:59:24AM -0500, Steven Rostedt wrote:
> On Wed,  4 Mar 2026 16:56:17 +0000
> Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > @@ -204,6 +206,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
> >  	unsigned long flags;
> >  
> >  	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
> > +	if (rt_mutex_has_waiters(rtm))
> > +		trace_contended_release(rwb);
> 
> Hmm, if statements should never be used just for tracepoints without a
> static branch. The above should be:
> 
> 	if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
> 		trace_contended_release(rwb);
> 

I still wish you would accept:

	if (trace_foo_enabled() && foo)
		__do_trace_foo();

The compilers can't optimize the static branches and thus you'll get it
twice for no reason.

I really wish they would just accept __pure, but alas.
Re: [PATCH RFC 3/3] locking: Wire up contended_release tracepoint
Posted by Steven Rostedt 1 month ago
On Thu, 5 Mar 2026 18:42:23 +0100
Peter Zijlstra <peterz@infradead.org> wrote:

> I still wish you would accept:
> 
> 	if (trace_foo_enabled() && foo)
> 		__do_trace_foo();
> 
> The compilers can't optimize the static branches and thus you'll get it
> twice for no reason.
> 
> I really wish they would just accept __pure, but alas.

Makes sense, and that could probably be done. It shouldn't be too hard to
do. If I find some time I could look at it, or perhaps someone lurking on
this thread could possibly give it a try! (I may even Cc some people that
want to learn this code).

-- Steve