Extend the contended_release tracepoint to queued spinlocks and queued
rwlocks.
Use the arch-overridable queued_spin_release(), introduced in the
previous commit, to ensure the tracepoint works correctly across all
architectures, including those with custom unlock implementations (e.g.
x86 paravirt).
When the tracepoint is disabled, the only addition to the hot path is a
single NOP instruction (the static branch). When enabled, the contention
check, trace call, and unlock are combined in an out-of-line function to
minimize hot path impact, avoiding the compiler needing to preserve the
lock pointer in a callee-saved register across the trace call.
Binary size impact (x86_64, defconfig):
uninlined unlock (common case): +983 bytes (+0.00%)
inlined unlock (worst case): +58165 bytes (+0.24%)
The inlined unlock case could not be achieved through Kconfig options on
x86_64 as PREEMPT_BUILD unconditionally selects UNINLINE_SPIN_UNLOCK on
x86_64. The UNINLINE_SPIN_UNLOCK guards were manually inverted to force
inline the unlock path and estimate the worst case binary size increase.
Architectures with fully custom qspinlock implementations (e.g.
PowerPC) are not covered by this change.
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/asm-generic/qrwlock.h | 48 +++++++++++++++++++++++++++------
include/asm-generic/qspinlock.h | 18 +++++++++++++
kernel/locking/qrwlock.c | 16 +++++++++++
kernel/locking/qspinlock.c | 8 ++++++
4 files changed, 82 insertions(+), 8 deletions(-)
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 75b8f4601b28..e24dc537fd66 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -14,6 +14,7 @@
#define __ASM_GENERIC_QRWLOCK_H
#include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
#include <asm/barrier.h>
#include <asm/processor.h>
@@ -35,6 +36,10 @@
*/
extern void queued_read_lock_slowpath(struct qrwlock *lock);
extern void queued_write_lock_slowpath(struct qrwlock *lock);
+extern void queued_read_unlock_traced(struct qrwlock *lock);
+extern void queued_write_unlock_traced(struct qrwlock *lock);
+
+DECLARE_TRACEPOINT(contended_release);
/**
* queued_read_trylock - try to acquire read lock of a queued rwlock
@@ -102,10 +107,16 @@ static inline void queued_write_lock(struct qrwlock *lock)
}
/**
- * queued_read_unlock - release read lock of a queued rwlock
+ * queued_rwlock_is_contended - check if the lock is contended
* @lock : Pointer to queued rwlock structure
+ * Return: 1 if lock contended, 0 otherwise
*/
-static inline void queued_read_unlock(struct qrwlock *lock)
+static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+{
+ return arch_spin_is_locked(&lock->wait_lock);
+}
+
+static __always_inline void __queued_read_unlock(struct qrwlock *lock)
{
/*
* Atomically decrement the reader count
@@ -114,22 +125,43 @@ static inline void queued_read_unlock(struct qrwlock *lock)
}
/**
- * queued_write_unlock - release write lock of a queued rwlock
+ * queued_read_unlock - release read lock of a queued rwlock
* @lock : Pointer to queued rwlock structure
*/
-static inline void queued_write_unlock(struct qrwlock *lock)
+static inline void queued_read_unlock(struct qrwlock *lock)
+{
+ /*
+ * Trace and unlock are combined in the traced unlock variant so
+ * the compiler does not need to preserve the lock pointer across
+ * the function call, avoiding callee-saved register save/restore
+ * on the hot path.
+ */
+ if (tracepoint_enabled(contended_release)) {
+ queued_read_unlock_traced(lock);
+ return;
+ }
+
+ __queued_read_unlock(lock);
+}
+
+static __always_inline void __queued_write_unlock(struct qrwlock *lock)
{
smp_store_release(&lock->wlocked, 0);
}
/**
- * queued_rwlock_is_contended - check if the lock is contended
+ * queued_write_unlock - release write lock of a queued rwlock
* @lock : Pointer to queued rwlock structure
- * Return: 1 if lock contended, 0 otherwise
*/
-static inline int queued_rwlock_is_contended(struct qrwlock *lock)
+static inline void queued_write_unlock(struct qrwlock *lock)
{
- return arch_spin_is_locked(&lock->wait_lock);
+ /* See comment in queued_read_unlock(). */
+ if (tracepoint_enabled(contended_release)) {
+ queued_write_unlock_traced(lock);
+ return;
+ }
+
+ __queued_write_unlock(lock);
}
/*
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index df76f34645a0..915a4c2777f6 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -41,6 +41,7 @@
#include <asm-generic/qspinlock_types.h>
#include <linux/atomic.h>
+#include <linux/tracepoint-defs.h>
#ifndef queued_spin_is_locked
/**
@@ -129,12 +130,29 @@ static __always_inline void queued_spin_release(struct qspinlock *lock)
}
#endif
+DECLARE_TRACEPOINT(contended_release);
+
+extern void queued_spin_release_traced(struct qspinlock *lock);
+
/**
* queued_spin_unlock - unlock a queued spinlock
* @lock : Pointer to queued spinlock structure
+ *
+ * Generic tracing wrapper around the arch-overridable
+ * queued_spin_release().
*/
static __always_inline void queued_spin_unlock(struct qspinlock *lock)
{
+ /*
+ * Trace and release are combined in queued_spin_release_traced() so
+ * the compiler does not need to preserve the lock pointer across the
+ * function call, avoiding callee-saved register save/restore on the
+ * hot path.
+ */
+ if (tracepoint_enabled(contended_release)) {
+ queued_spin_release_traced(lock);
+ return;
+ }
queued_spin_release(lock);
}
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index d2ef312a8611..5f7a0fc2b27a 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -90,3 +90,19 @@ void __lockfunc queued_write_lock_slowpath(struct qrwlock *lock)
trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_write_lock_slowpath);
+
+void __lockfunc queued_read_unlock_traced(struct qrwlock *lock)
+{
+ if (queued_rwlock_is_contended(lock))
+ trace_contended_release(lock);
+ __queued_read_unlock(lock);
+}
+EXPORT_SYMBOL(queued_read_unlock_traced);
+
+void __lockfunc queued_write_unlock_traced(struct qrwlock *lock)
+{
+ if (queued_rwlock_is_contended(lock))
+ trace_contended_release(lock);
+ __queued_write_unlock(lock);
+}
+EXPORT_SYMBOL(queued_write_unlock_traced);
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index af8d122bb649..c72610980ec7 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -104,6 +104,14 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
#endif
+void __lockfunc queued_spin_release_traced(struct qspinlock *lock)
+{
+ if (queued_spin_is_contended(lock))
+ trace_contended_release(lock);
+ queued_spin_release(lock);
+}
+EXPORT_SYMBOL(queued_spin_release_traced);
+
#endif /* _GEN_PV_LOCK_SLOWPATH */
/**
--
2.52.0
© 2016 - 2026 Red Hat, Inc.