[PATCH v4 3/5] locking: Add contended_release tracepoint to sleepable locks

Dmitry Ilvokhin posted 5 patches 1 week ago
[PATCH v4 3/5] locking: Add contended_release tracepoint to sleepable locks
Posted by Dmitry Ilvokhin 1 week ago
Add the contended_release trace event. This tracepoint fires on the
holder side when a contended lock is released, complementing the
existing contention_begin/contention_end tracepoints which fire on the
waiter side.

This enables correlating lock hold time under contention with waiter
events by lock address.

Add trace_contended_release() calls to the slowpath unlock paths of
sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
RT-specific rwbase locks.

Where possible, trace_contended_release() fires before the lock is
released and before the waiter is woken. For some lock types, the
tracepoint fires after the release but before the wake. Making the
placement consistent across all lock types is not worth the added
complexity.

For reader/writer locks, the tracepoint fires for every reader releasing
while a writer is waiting, not only for the last reader.

Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
 include/trace/events/lock.h   | 17 +++++++++++++++++
 kernel/locking/mutex.c        |  4 ++++
 kernel/locking/percpu-rwsem.c | 11 +++++++++++
 kernel/locking/rtmutex.c      |  1 +
 kernel/locking/rwbase_rt.c    |  6 ++++++
 kernel/locking/rwsem.c        | 10 ++++++++--
 kernel/locking/semaphore.c    |  4 ++++
 7 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
index da978f2afb45..1ded869cd619 100644
--- a/include/trace/events/lock.h
+++ b/include/trace/events/lock.h
@@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
 	TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
 );
 
+TRACE_EVENT(contended_release,
+
+	TP_PROTO(void *lock),
+
+	TP_ARGS(lock),
+
+	TP_STRUCT__entry(
+		__field(void *, lock_addr)
+	),
+
+	TP_fast_assign(
+		__entry->lock_addr = lock;
+	),
+
+	TP_printk("%p", __entry->lock_addr)
+);
+
 #endif /* _TRACE_LOCK_H */
 
 /* This part must be outside protection */
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 427187ff02db..6c2c9312eb8f 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -997,6 +997,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
 		wake_q_add(&wake_q, next);
 	}
 
+	if (trace_contended_release_enabled() && waiter)
+		trace_contended_release(lock);
+
 	if (owner & MUTEX_FLAG_HANDOFF)
 		__mutex_handoff(lock, next);
 
@@ -1194,6 +1197,7 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible);
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin);
 EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end);
+EXPORT_TRACEPOINT_SYMBOL_GPL(contended_release);
 
 /**
  * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index f3ee7a0d6047..46b5903989b8 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -263,6 +263,9 @@ void percpu_up_write(struct percpu_rw_semaphore *sem)
 {
 	rwsem_release(&sem->dep_map, _RET_IP_);
 
+	if (trace_contended_release_enabled() && wq_has_sleeper(&sem->waiters))
+		trace_contended_release(sem);
+
 	/*
 	 * Signal the writer is done, no fast path yet.
 	 *
@@ -292,6 +295,14 @@ EXPORT_SYMBOL_GPL(percpu_up_write);
 void __percpu_up_read(struct percpu_rw_semaphore *sem)
 {
 	lockdep_assert_preemption_disabled();
+	/*
+	 * After percpu_up_write() completes, rcu_sync_is_idle() can still
+	 * return false during the grace period, forcing readers into this
+	 * slowpath. Only trace when a writer is actually waiting for
+	 * readers to drain.
+	 */
+	if (trace_contended_release_enabled() && rcuwait_active(&sem->writer))
+		trace_contended_release(sem);
 	/*
 	 * slowpath; reader will only ever wake a single blocked
 	 * writer.
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index ccaba6148b61..3db8a840b4e8 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1466,6 +1466,7 @@ static void __sched rt_mutex_slowunlock(struct rt_mutex_base *lock)
 		raw_spin_lock_irqsave(&lock->wait_lock, flags);
 	}
 
+	trace_contended_release(lock);
 	/*
 	 * The wakeup next waiter path does not suffer from the above
 	 * race. See the comments there.
diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c
index 82e078c0665a..74da5601018f 100644
--- a/kernel/locking/rwbase_rt.c
+++ b/kernel/locking/rwbase_rt.c
@@ -174,6 +174,8 @@ static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb,
 static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb,
 					       unsigned int state)
 {
+	if (trace_contended_release_enabled() && rt_mutex_owner(&rwb->rtmutex))
+		trace_contended_release(rwb);
 	/*
 	 * rwb->readers can only hit 0 when a writer is waiting for the
 	 * active readers to leave the critical section.
@@ -205,6 +207,8 @@ static inline void rwbase_write_unlock(struct rwbase_rt *rwb)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+	if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+		trace_contended_release(rwb);
 	__rwbase_write_unlock(rwb, WRITER_BIAS, flags);
 }
 
@@ -214,6 +218,8 @@ static inline void rwbase_write_downgrade(struct rwbase_rt *rwb)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&rtm->wait_lock, flags);
+	if (trace_contended_release_enabled() && rt_mutex_has_waiters(rtm))
+		trace_contended_release(rwb);
 	/* Release it and account current as reader */
 	__rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags);
 }
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index bf647097369c..602d5fd3c91a 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1387,6 +1387,8 @@ static inline void __up_read(struct rw_semaphore *sem)
 	rwsem_clear_reader_owned(sem);
 	tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
 	DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
+	if (trace_contended_release_enabled() && (tmp & RWSEM_FLAG_WAITERS))
+		trace_contended_release(sem);
 	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
 		      RWSEM_FLAG_WAITERS)) {
 		clear_nonspinnable(sem);
@@ -1413,8 +1415,10 @@ static inline void __up_write(struct rw_semaphore *sem)
 	preempt_disable();
 	rwsem_clear_owner(sem);
 	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
-	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
+	if (unlikely(tmp & RWSEM_FLAG_WAITERS)) {
+		trace_contended_release(sem);
 		rwsem_wake(sem);
+	}
 	preempt_enable();
 }
 
@@ -1437,8 +1441,10 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 	tmp = atomic_long_fetch_add_release(
 		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
 	rwsem_set_reader_owned(sem);
-	if (tmp & RWSEM_FLAG_WAITERS)
+	if (tmp & RWSEM_FLAG_WAITERS) {
+		trace_contended_release(sem);
 		rwsem_downgrade_wake(sem);
+	}
 	preempt_enable();
 }
 
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index 74d41433ba13..35ac3498dca5 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -230,6 +230,10 @@ void __sched up(struct semaphore *sem)
 		sem->count++;
 	else
 		__up(sem, &wake_q);
+
+	if (trace_contended_release_enabled() && !wake_q_empty(&wake_q))
+		trace_contended_release(sem);
+
 	raw_spin_unlock_irqrestore(&sem->lock, flags);
 	if (!wake_q_empty(&wake_q))
 		wake_up_q(&wake_q);
-- 
2.52.0
Re: [PATCH v4 3/5] locking: Add contended_release tracepoint to sleepable locks
Posted by Usama Arif 2 days, 9 hours ago
On Thu, 26 Mar 2026 15:10:02 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:

> Add the contended_release trace event. This tracepoint fires on the
> holder side when a contended lock is released, complementing the
> existing contention_begin/contention_end tracepoints which fire on the
> waiter side.
> 
> This enables correlating lock hold time under contention with waiter
> events by lock address.
> 
> Add trace_contended_release() calls to the slowpath unlock paths of
> sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
> RT-specific rwbase locks.
> 
> Where possible, trace_contended_release() fires before the lock is
> released and before the waiter is woken. For some lock types, the
> tracepoint fires after the release but before the wake. Making the
> placement consistent across all lock types is not worth the added
> complexity.
> 
> For reader/writer locks, the tracepoint fires for every reader releasing
> while a writer is waiting, not only for the last reader.
> 
> Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
>  include/trace/events/lock.h   | 17 +++++++++++++++++
>  kernel/locking/mutex.c        |  4 ++++
>  kernel/locking/percpu-rwsem.c | 11 +++++++++++
>  kernel/locking/rtmutex.c      |  1 +
>  kernel/locking/rwbase_rt.c    |  6 ++++++
>  kernel/locking/rwsem.c        | 10 ++++++++--
>  kernel/locking/semaphore.c    |  4 ++++
>  7 files changed, 51 insertions(+), 2 deletions(-)
> 
> diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
> index da978f2afb45..1ded869cd619 100644
> --- a/include/trace/events/lock.h
> +++ b/include/trace/events/lock.h
> @@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
>  	TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
>  );
>  
> +TRACE_EVENT(contended_release,
> +
> +	TP_PROTO(void *lock),
> +
> +	TP_ARGS(lock),
> +
> +	TP_STRUCT__entry(
> +		__field(void *, lock_addr)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->lock_addr = lock;
> +	),
> +
> +	TP_printk("%p", __entry->lock_addr)
> +);
> +
>  #endif /* _TRACE_LOCK_H */
>  
>  /* This part must be outside protection */
> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> index 427187ff02db..6c2c9312eb8f 100644
> --- a/kernel/locking/mutex.c
> +++ b/kernel/locking/mutex.c
> @@ -997,6 +997,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
>  		wake_q_add(&wake_q, next);
>  	}
>  
> +	if (trace_contended_release_enabled() && waiter)
> +		trace_contended_release(lock);
> +

This won't compile? waiter is declared in the if block, so you are using
it outside scope here.
Re: [PATCH v4 3/5] locking: Add contended_release tracepoint to sleepable locks
Posted by Dmitry Ilvokhin 2 days, 8 hours ago
On Tue, Mar 31, 2026 at 03:34:50AM -0700, Usama Arif wrote:
> On Thu, 26 Mar 2026 15:10:02 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
> 
> > Add the contended_release trace event. This tracepoint fires on the
> > holder side when a contended lock is released, complementing the
> > existing contention_begin/contention_end tracepoints which fire on the
> > waiter side.
> > 
> > This enables correlating lock hold time under contention with waiter
> > events by lock address.
> > 
> > Add trace_contended_release() calls to the slowpath unlock paths of
> > sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
> > RT-specific rwbase locks.
> > 
> > Where possible, trace_contended_release() fires before the lock is
> > released and before the waiter is woken. For some lock types, the
> > tracepoint fires after the release but before the wake. Making the
> > placement consistent across all lock types is not worth the added
> > complexity.
> > 
> > For reader/writer locks, the tracepoint fires for every reader releasing
> > while a writer is waiting, not only for the last reader.
> > 
> > Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> > ---
> >  include/trace/events/lock.h   | 17 +++++++++++++++++
> >  kernel/locking/mutex.c        |  4 ++++
> >  kernel/locking/percpu-rwsem.c | 11 +++++++++++
> >  kernel/locking/rtmutex.c      |  1 +
> >  kernel/locking/rwbase_rt.c    |  6 ++++++
> >  kernel/locking/rwsem.c        | 10 ++++++++--
> >  kernel/locking/semaphore.c    |  4 ++++
> >  7 files changed, 51 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
> > index da978f2afb45..1ded869cd619 100644
> > --- a/include/trace/events/lock.h
> > +++ b/include/trace/events/lock.h
> > @@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
> >  	TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
> >  );
> >  
> > +TRACE_EVENT(contended_release,
> > +
> > +	TP_PROTO(void *lock),
> > +
> > +	TP_ARGS(lock),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(void *, lock_addr)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__entry->lock_addr = lock;
> > +	),
> > +
> > +	TP_printk("%p", __entry->lock_addr)
> > +);
> > +
> >  #endif /* _TRACE_LOCK_H */
> >  
> >  /* This part must be outside protection */
> > diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
> > index 427187ff02db..6c2c9312eb8f 100644
> > --- a/kernel/locking/mutex.c
> > +++ b/kernel/locking/mutex.c
> > @@ -997,6 +997,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
> >  		wake_q_add(&wake_q, next);
> >  	}
> >  
> > +	if (trace_contended_release_enabled() && waiter)
> > +		trace_contended_release(lock);
> > +
> 
> This won't compile? waiter is declared in the if block, so you are using
> it outside scope here.
>

Thanks for the feedback, Usama.

waiter is declared at function scope, right on top. It's also assigned
before the if block, so it's still in scope at the tracepoint.
Re: [PATCH v4 3/5] locking: Add contended_release tracepoint to sleepable locks
Posted by Usama Arif 2 days, 6 hours ago

On 31/03/2026 15:16, Dmitry Ilvokhin wrote:
> On Tue, Mar 31, 2026 at 03:34:50AM -0700, Usama Arif wrote:
>> On Thu, 26 Mar 2026 15:10:02 +0000 Dmitry Ilvokhin <d@ilvokhin.com> wrote:
>>
>>> Add the contended_release trace event. This tracepoint fires on the
>>> holder side when a contended lock is released, complementing the
>>> existing contention_begin/contention_end tracepoints which fire on the
>>> waiter side.
>>>
>>> This enables correlating lock hold time under contention with waiter
>>> events by lock address.
>>>
>>> Add trace_contended_release() calls to the slowpath unlock paths of
>>> sleepable locks: mutex, rtmutex, semaphore, rwsem, percpu-rwsem, and
>>> RT-specific rwbase locks.
>>>
>>> Where possible, trace_contended_release() fires before the lock is
>>> released and before the waiter is woken. For some lock types, the
>>> tracepoint fires after the release but before the wake. Making the
>>> placement consistent across all lock types is not worth the added
>>> complexity.
>>>
>>> For reader/writer locks, the tracepoint fires for every reader releasing
>>> while a writer is waiting, not only for the last reader.
>>>
>>> Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
>>> ---
>>>  include/trace/events/lock.h   | 17 +++++++++++++++++
>>>  kernel/locking/mutex.c        |  4 ++++
>>>  kernel/locking/percpu-rwsem.c | 11 +++++++++++
>>>  kernel/locking/rtmutex.c      |  1 +
>>>  kernel/locking/rwbase_rt.c    |  6 ++++++
>>>  kernel/locking/rwsem.c        | 10 ++++++++--
>>>  kernel/locking/semaphore.c    |  4 ++++
>>>  7 files changed, 51 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/include/trace/events/lock.h b/include/trace/events/lock.h
>>> index da978f2afb45..1ded869cd619 100644
>>> --- a/include/trace/events/lock.h
>>> +++ b/include/trace/events/lock.h
>>> @@ -137,6 +137,23 @@ TRACE_EVENT(contention_end,
>>>  	TP_printk("%p (ret=%d)", __entry->lock_addr, __entry->ret)
>>>  );
>>>  
>>> +TRACE_EVENT(contended_release,
>>> +
>>> +	TP_PROTO(void *lock),
>>> +
>>> +	TP_ARGS(lock),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__field(void *, lock_addr)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +		__entry->lock_addr = lock;
>>> +	),
>>> +
>>> +	TP_printk("%p", __entry->lock_addr)
>>> +);
>>> +
>>>  #endif /* _TRACE_LOCK_H */
>>>  
>>>  /* This part must be outside protection */
>>> diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
>>> index 427187ff02db..6c2c9312eb8f 100644
>>> --- a/kernel/locking/mutex.c
>>> +++ b/kernel/locking/mutex.c
>>> @@ -997,6 +997,9 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
>>>  		wake_q_add(&wake_q, next);
>>>  	}
>>>  
>>> +	if (trace_contended_release_enabled() && waiter)
>>> +		trace_contended_release(lock);
>>> +
>>
>> This won't compile? waiter is declared in the if block, so you are using
>> it outside scope here.
>>
> 
> Thanks for the feedback, Usama.
> 
> waiter is declared at function scope, right on top. It's also assigned
> before the if block, so it's still in scope at the tracepoint.

Ah ok, I was reviewing on top of mm-new branch from today where waiter
is declared in the if block. Probably something changed related to
locking/tracing and its not in mm-new yet.