arch/powerpc/lib/qspinlock.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-)
Add a lock contention tracepoint in the queued spinlock slowpath.
Also add the __lockfunc annotation so that in_lock_functions()
works as expected.
Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
---
arch/powerpc/lib/qspinlock.c | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index bcc7e4dff8c3..622e7f45c2ce 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <asm/qspinlock.h>
#include <asm/paravirt.h>
+#include <trace/events/lock.h>
#define MAX_NODES 4
@@ -708,8 +709,9 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
qnodesp->count--;
}
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
{
+ trace_contention_begin(lock, LCB_F_SPIN);
/*
* This looks funny, but it induces the compiler to inline both
* sides of the branch rather than share code as when the condition
@@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock)
if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
if (try_to_steal_lock(lock, true)) {
spec_barrier();
- return;
+ } else {
+ queued_spin_lock_mcs_queue(lock, true);
}
- queued_spin_lock_mcs_queue(lock, true);
} else {
if (try_to_steal_lock(lock, false)) {
spec_barrier();
- return;
+ } else {
+ queued_spin_lock_mcs_queue(lock, false);
}
- queued_spin_lock_mcs_queue(lock, false);
}
+ trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);
--
2.47.0
On 2025-07-25 13:44, Nysal Jan K.A. wrote: > Add a lock contention tracepoint in the queued spinlock slowpath. > Also add the __lockfunc annotation so that in_lock_functions() > works as expected. > > Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com> > --- > arch/powerpc/lib/qspinlock.c | 13 ++++++++----- > 1 file changed, 8 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/lib/qspinlock.c > b/arch/powerpc/lib/qspinlock.c > index bcc7e4dff8c3..622e7f45c2ce 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -9,6 +9,7 @@ > #include <linux/sched/clock.h> > #include <asm/qspinlock.h> > #include <asm/paravirt.h> > +#include <trace/events/lock.h> > > #define MAX_NODES 4 > > @@ -708,8 +709,9 @@ static __always_inline void > queued_spin_lock_mcs_queue(struct qspinlock *lock, b > qnodesp->count--; > } > > -void queued_spin_lock_slowpath(struct qspinlock *lock) > +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) > { > + trace_contention_begin(lock, LCB_F_SPIN); > /* > * This looks funny, but it induces the compiler to inline both > * sides of the branch rather than share code as when the condition > @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock > *lock) > if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { > if (try_to_steal_lock(lock, true)) { > spec_barrier(); > - return; > + } else { > + queued_spin_lock_mcs_queue(lock, true); > } > - queued_spin_lock_mcs_queue(lock, true); > } else { > if (try_to_steal_lock(lock, false)) { > spec_barrier(); > - return; > + } else { > + queued_spin_lock_mcs_queue(lock, false); > } > - queued_spin_lock_mcs_queue(lock, false); > } > + trace_contention_end(lock, 0); > } > EXPORT_SYMBOL(queued_spin_lock_slowpath); Hello, I have verified the patch with the latest upstream Linux kernel, and here are my findings: ———Kernel Version——— 6.16.0-rc7-160000.11-default+ ———perf --version——— perf version 6.16.rc7.g5f33ebd2018c To test this patch, I used the Lockstorm benchmark, which rigorously exercises spinlocks from kernel space. Benchmark repository: https://github.com/lop-devops/lockstorm To capture all events related to the Lockstorm benchmark, I used the following command: cmd: perf lock record -a insmod lockstorm.ko After generating the perf.data, I analyzed the results using: cmd: perf lock contention -a -i perf.data ————Logs———— contended total wait max wait avg wait type caller 6187241 12.50 m 2.30 ms 121.22 us spinlock kthread+0x160 78 8.23 ms 209.87 us 105.47 us rwlock:W do_exit+0x378 71 7.97 ms 208.07 us 112.24 us spinlock do_exit+0x378 68 4.18 ms 210.04 us 61.43 us rwlock:W release_task+0xe0 63 3.96 ms 204.02 us 62.90 us spinlock release_task+0xe0 115 477.15 us 19.69 us 4.15 us spinlock rcu_report_qs_rdp+0x40 250 437.34 us 5.34 us 1.75 us spinlock raw_spin_rq_lock_nested+0x24 32 156.32 us 13.56 us 4.88 us spinlock cgroup_exit+0x34 19 88.12 us 12.20 us 4.64 us spinlock exit_fs+0x44 12 23.25 us 3.09 us 1.94 us spinlock lock_hrtimer_base+0x4c 1 18.83 us 18.83 us 18.83 us rwsem:R btrfs_tree_read_lock_nested+0x38 1 17.84 us 17.84 us 17.84 us rwsem:W btrfs_tree_lock_nested+0x38 10 15.75 us 5.72 us 1.58 us spinlock raw_spin_rq_lock_nested+0x24 5 15.08 us 5.59 us 3.02 us spinlock mix_interrupt_randomness+0xb4 2 12.78 us 9.50 us 4.26 us spinlock raw_spin_rq_lock_nested+0x24 1 11.13 us 11.13 us 11.13 us spinlock __queue_work+0x338 3 10.79 us 7.04 us 3.60 us spinlock raw_spin_rq_lock_nested+0x24 3 8.17 us 4.58 us 2.72 us spinlock raw_spin_rq_lock_nested+0x24 3 7.99 us 3.13 us 2.66 us spinlock lock_hrtimer_base+0x4c 2 6.66 us 4.57 us 3.33 us spinlock free_pcppages_bulk+0x50 3 5.34 us 2.19 us 1.78 us spinlock ibmvscsi_handle_crq+0x1e4 2 3.71 us 2.32 us 1.85 us spinlock __hrtimer_run_queues+0x1b8 2 2.98 us 2.19 us 1.49 us spinlock raw_spin_rq_lock_nested+0x24 1 2.85 us 2.85 us 2.85 us spinlock raw_spin_rq_lock_nested+0x24 2 2.15 us 1.09 us 1.07 us spinlock raw_spin_rq_lock_nested+0x24 2 2.06 us 1.06 us 1.03 us spinlock raw_spin_rq_lock_nested+0x24 1 1.69 us 1.69 us 1.69 us spinlock raw_spin_rq_lock_nested+0x24 1 1.53 us 1.53 us 1.53 us spinlock __queue_work+0xd8 1 1.27 us 1.27 us 1.27 us spinlock pull_rt_task+0xa0 1 1.16 us 1.16 us 1.16 us spinlock raw_spin_rq_lock_nested+0x24 1 740 ns 740 ns 740 ns spinlock add_device_randomness+0x5c 1 566 ns 566 ns 566 ns spinlock raw_spin_rq_lock_nested+0x24 From the results, we were able to observe lock contention specifically on spinlocks. The patch works as expected. Thank you for the patch! Tested-by: Samir Mulani <samir@linux.ibm.com>
Le 25/07/2025 à 10:14, Nysal Jan K.A. a écrit : > Add a lock contention tracepoint in the queued spinlock slowpath. > Also add the __lockfunc annotation so that in_lock_functions() > works as expected. > > Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com> > --- > arch/powerpc/lib/qspinlock.c | 13 ++++++++----- > 1 file changed, 8 insertions(+), 5 deletions(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index bcc7e4dff8c3..622e7f45c2ce 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -9,6 +9,7 @@ > #include <linux/sched/clock.h> > #include <asm/qspinlock.h> > #include <asm/paravirt.h> > +#include <trace/events/lock.h> > > #define MAX_NODES 4 > > @@ -708,8 +709,9 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b > qnodesp->count--; > } > > -void queued_spin_lock_slowpath(struct qspinlock *lock) > +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) > { > + trace_contention_begin(lock, LCB_F_SPIN); > /* > * This looks funny, but it induces the compiler to inline both > * sides of the branch rather than share code as when the condition > @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock) > if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { > if (try_to_steal_lock(lock, true)) { > spec_barrier(); > - return; > + } else { > + queued_spin_lock_mcs_queue(lock, true); If I read correctly, now all this is single line so you have to drop the braces , see https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces > } > - queued_spin_lock_mcs_queue(lock, true); > } else { > if (try_to_steal_lock(lock, false)) { > spec_barrier(); > - return; > + } else { > + queued_spin_lock_mcs_queue(lock, false); > } Same here. > - queued_spin_lock_mcs_queue(lock, false); > } > + trace_contention_end(lock, 0); > } > EXPORT_SYMBOL(queued_spin_lock_slowpath); >
On Wed, Jul 30, 2025 at 08:46:28AM +0200, Christophe Leroy wrote: > > > Le 25/07/2025 à 10:14, Nysal Jan K.A. a écrit : > > @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock) > > if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { > > if (try_to_steal_lock(lock, true)) { > > spec_barrier(); > > - return; > > + } else { > > + queued_spin_lock_mcs_queue(lock, true); > > If I read correctly, now all this is single line so you have to drop the > braces , see > https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces Will fix the coding style in v2. Thanks for the review. --Nysal
Add a lock contention tracepoint in the queued spinlock slowpath.
Also add the __lockfunc annotation so that in_lock_functions()
works as expected.
Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
---
arch/powerpc/lib/qspinlock.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index bcc7e4dff8c3..95ab4cdf582e 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <asm/qspinlock.h>
#include <asm/paravirt.h>
+#include <trace/events/lock.h>
#define MAX_NODES 4
@@ -708,26 +709,26 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
qnodesp->count--;
}
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
{
+ trace_contention_begin(lock, LCB_F_SPIN);
/*
* This looks funny, but it induces the compiler to inline both
* sides of the branch rather than share code as when the condition
* is passed as the paravirt argument to the functions.
*/
if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
- if (try_to_steal_lock(lock, true)) {
+ if (try_to_steal_lock(lock, true))
spec_barrier();
- return;
- }
- queued_spin_lock_mcs_queue(lock, true);
+ else
+ queued_spin_lock_mcs_queue(lock, true);
} else {
- if (try_to_steal_lock(lock, false)) {
+ if (try_to_steal_lock(lock, false))
spec_barrier();
- return;
- }
- queued_spin_lock_mcs_queue(lock, false);
+ else
+ queued_spin_lock_mcs_queue(lock, false);
}
+ trace_contention_end(lock, 0);
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);
--
2.47.0
On 7/31/25 11:48, Nysal Jan K.A. wrote: > Add a lock contention tracepoint in the queued spinlock slowpath. > Also add the __lockfunc annotation so that in_lock_functions() > works as expected. > There is bit of pure code movement. Given that is small, single patch is fine. > Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com> Tried the patch and able to see tracepoints. Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com> > --- > arch/powerpc/lib/qspinlock.c | 19 ++++++++++--------- > 1 file changed, 10 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c > index bcc7e4dff8c3..95ab4cdf582e 100644 > --- a/arch/powerpc/lib/qspinlock.c > +++ b/arch/powerpc/lib/qspinlock.c > @@ -9,6 +9,7 @@ > #include <linux/sched/clock.h> > #include <asm/qspinlock.h> > #include <asm/paravirt.h> > +#include <trace/events/lock.h> > > #define MAX_NODES 4 > > @@ -708,26 +709,26 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b > qnodesp->count--; > } > > -void queued_spin_lock_slowpath(struct qspinlock *lock) > +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock) > { > + trace_contention_begin(lock, LCB_F_SPIN); > /* > * This looks funny, but it induces the compiler to inline both > * sides of the branch rather than share code as when the condition > * is passed as the paravirt argument to the functions. > */ > if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { > - if (try_to_steal_lock(lock, true)) { > + if (try_to_steal_lock(lock, true)) > spec_barrier(); > - return; > - } > - queued_spin_lock_mcs_queue(lock, true); > + else > + queued_spin_lock_mcs_queue(lock, true); > } else { > - if (try_to_steal_lock(lock, false)) { > + if (try_to_steal_lock(lock, false)) > spec_barrier(); > - return; > - } > - queued_spin_lock_mcs_queue(lock, false); > + else > + queued_spin_lock_mcs_queue(lock, false); > } > + trace_contention_end(lock, 0); > } > EXPORT_SYMBOL(queued_spin_lock_slowpath); > >
On Thu, 31 Jul 2025 11:48:53 +0530, Nysal Jan K.A. wrote: > Add a lock contention tracepoint in the queued spinlock slowpath. > Also add the __lockfunc annotation so that in_lock_functions() > works as expected. > > Applied to powerpc/next. [1/1] powerpc/qspinlock: Add spinlock contention tracepoint https://git.kernel.org/powerpc/c/4f61d54d2245c15b23ad78a89f854fb2496b6216 Thanks
© 2016 - 2025 Red Hat, Inc.