[v1] powerpc/qspinlock: Add spinlock contention tracepoint

[PATCH] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Nysal Jan K.A. 2 months, 1 week ago

Add a lock contention tracepoint in the queued spinlock slowpath.
Also add the __lockfunc annotation so that in_lock_functions()
works as expected.

Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
---
 arch/powerpc/lib/qspinlock.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index bcc7e4dff8c3..622e7f45c2ce 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -9,6 +9,7 @@
 #include <linux/sched/clock.h>
 #include <asm/qspinlock.h>
 #include <asm/paravirt.h>
+#include <trace/events/lock.h>
 
 #define MAX_NODES	4
 
@@ -708,8 +709,9 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	qnodesp->count--;
 }
 
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
 {
+	trace_contention_begin(lock, LCB_F_SPIN);
 	/*
 	 * This looks funny, but it induces the compiler to inline both
 	 * sides of the branch rather than share code as when the condition
@@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock)
 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
 		if (try_to_steal_lock(lock, true)) {
 			spec_barrier();
-			return;
+		} else {
+			queued_spin_lock_mcs_queue(lock, true);
 		}
-		queued_spin_lock_mcs_queue(lock, true);
 	} else {
 		if (try_to_steal_lock(lock, false)) {
 			spec_barrier();
-			return;
+		} else {
+			queued_spin_lock_mcs_queue(lock, false);
 		}
-		queued_spin_lock_mcs_queue(lock, false);
 	}
+	trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
-- 
2.47.0

Re: [PATCH] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by samir 2 months, 1 week ago

On 2025-07-25 13:44, Nysal Jan K.A. wrote:
> Add a lock contention tracepoint in the queued spinlock slowpath.
> Also add the __lockfunc annotation so that in_lock_functions()
> works as expected.
> 
> Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
> ---
>  arch/powerpc/lib/qspinlock.c | 13 ++++++++-----
>  1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c 
> b/arch/powerpc/lib/qspinlock.c
> index bcc7e4dff8c3..622e7f45c2ce 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -9,6 +9,7 @@
>  #include <linux/sched/clock.h>
>  #include <asm/qspinlock.h>
>  #include <asm/paravirt.h>
> +#include <trace/events/lock.h>
> 
>  #define MAX_NODES	4
> 
> @@ -708,8 +709,9 @@ static __always_inline void
> queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>  	qnodesp->count--;
>  }
> 
> -void queued_spin_lock_slowpath(struct qspinlock *lock)
> +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
>  {
> +	trace_contention_begin(lock, LCB_F_SPIN);
>  	/*
>  	 * This looks funny, but it induces the compiler to inline both
>  	 * sides of the branch rather than share code as when the condition
> @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock 
> *lock)
>  	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
>  		if (try_to_steal_lock(lock, true)) {
>  			spec_barrier();
> -			return;
> +		} else {
> +			queued_spin_lock_mcs_queue(lock, true);
>  		}
> -		queued_spin_lock_mcs_queue(lock, true);
>  	} else {
>  		if (try_to_steal_lock(lock, false)) {
>  			spec_barrier();
> -			return;
> +		} else {
> +			queued_spin_lock_mcs_queue(lock, false);
>  		}
> -		queued_spin_lock_mcs_queue(lock, false);
>  	}
> +	trace_contention_end(lock, 0);
>  }
>  EXPORT_SYMBOL(queued_spin_lock_slowpath);

Hello,

I have verified the patch with the latest upstream Linux kernel, and 
here are my findings:

———Kernel Version———
6.16.0-rc7-160000.11-default+
———perf --version———
perf version 6.16.rc7.g5f33ebd2018c

To test this patch, I used the Lockstorm benchmark, which rigorously 
exercises spinlocks from kernel space.

Benchmark repository: https://github.com/lop-devops/lockstorm
To capture all events related to the Lockstorm benchmark, I used the 
following command:
cmd: perf lock record -a insmod lockstorm.ko
After generating the perf.data, I analyzed the results using:
cmd: perf lock contention -a -i perf.data

————Logs————
contended   total wait     max wait     avg wait         type   caller

    6187241     12.50 m       2.30 ms    121.22 us     spinlock   
kthread+0x160
         78      8.23 ms    209.87 us    105.47 us     rwlock:W   
do_exit+0x378
         71      7.97 ms    208.07 us    112.24 us     spinlock   
do_exit+0x378
         68      4.18 ms    210.04 us     61.43 us     rwlock:W   
release_task+0xe0
         63      3.96 ms    204.02 us     62.90 us     spinlock   
release_task+0xe0
        115    477.15 us     19.69 us      4.15 us     spinlock   
rcu_report_qs_rdp+0x40
        250    437.34 us      5.34 us      1.75 us     spinlock   
raw_spin_rq_lock_nested+0x24
         32    156.32 us     13.56 us      4.88 us     spinlock   
cgroup_exit+0x34
         19     88.12 us     12.20 us      4.64 us     spinlock   
exit_fs+0x44
         12     23.25 us      3.09 us      1.94 us     spinlock   
lock_hrtimer_base+0x4c
          1     18.83 us     18.83 us     18.83 us      rwsem:R   
btrfs_tree_read_lock_nested+0x38
          1     17.84 us     17.84 us     17.84 us      rwsem:W   
btrfs_tree_lock_nested+0x38
         10     15.75 us      5.72 us      1.58 us     spinlock   
raw_spin_rq_lock_nested+0x24
          5     15.08 us      5.59 us      3.02 us     spinlock   
mix_interrupt_randomness+0xb4
          2     12.78 us      9.50 us      4.26 us     spinlock   
raw_spin_rq_lock_nested+0x24
          1     11.13 us     11.13 us     11.13 us     spinlock   
__queue_work+0x338
          3     10.79 us      7.04 us      3.60 us     spinlock   
raw_spin_rq_lock_nested+0x24
          3      8.17 us      4.58 us      2.72 us     spinlock   
raw_spin_rq_lock_nested+0x24
          3      7.99 us      3.13 us      2.66 us     spinlock   
lock_hrtimer_base+0x4c
          2      6.66 us      4.57 us      3.33 us     spinlock   
free_pcppages_bulk+0x50
          3      5.34 us      2.19 us      1.78 us     spinlock   
ibmvscsi_handle_crq+0x1e4
          2      3.71 us      2.32 us      1.85 us     spinlock   
__hrtimer_run_queues+0x1b8
          2      2.98 us      2.19 us      1.49 us     spinlock   
raw_spin_rq_lock_nested+0x24
          1      2.85 us      2.85 us      2.85 us     spinlock   
raw_spin_rq_lock_nested+0x24
          2      2.15 us      1.09 us      1.07 us     spinlock   
raw_spin_rq_lock_nested+0x24
          2      2.06 us      1.06 us      1.03 us     spinlock   
raw_spin_rq_lock_nested+0x24
          1      1.69 us      1.69 us      1.69 us     spinlock   
raw_spin_rq_lock_nested+0x24
          1      1.53 us      1.53 us      1.53 us     spinlock   
__queue_work+0xd8
          1      1.27 us      1.27 us      1.27 us     spinlock   
pull_rt_task+0xa0
          1      1.16 us      1.16 us      1.16 us     spinlock   
raw_spin_rq_lock_nested+0x24
          1       740 ns       740 ns       740 ns     spinlock   
add_device_randomness+0x5c
          1       566 ns       566 ns       566 ns     spinlock   
raw_spin_rq_lock_nested+0x24

 From the results, we were able to observe lock contention specifically 
on spinlocks.

The patch works as expected.
Thank you for the patch!

Tested-by: Samir Mulani <samir@linux.ibm.com>

Re: [PATCH] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Christophe Leroy 2 months, 1 week ago


Le 25/07/2025 à 10:14, Nysal Jan K.A. a écrit :
> Add a lock contention tracepoint in the queued spinlock slowpath.
> Also add the __lockfunc annotation so that in_lock_functions()
> works as expected.
> 
> Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
> ---
>   arch/powerpc/lib/qspinlock.c | 13 ++++++++-----
>   1 file changed, 8 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index bcc7e4dff8c3..622e7f45c2ce 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -9,6 +9,7 @@
>   #include <linux/sched/clock.h>
>   #include <asm/qspinlock.h>
>   #include <asm/paravirt.h>
> +#include <trace/events/lock.h>
>   
>   #define MAX_NODES	4
>   
> @@ -708,8 +709,9 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>   	qnodesp->count--;
>   }
>   
> -void queued_spin_lock_slowpath(struct qspinlock *lock)
> +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
>   {
> +	trace_contention_begin(lock, LCB_F_SPIN);
>   	/*
>   	 * This looks funny, but it induces the compiler to inline both
>   	 * sides of the branch rather than share code as when the condition
> @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock)
>   	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
>   		if (try_to_steal_lock(lock, true)) {
>   			spec_barrier();
> -			return;
> +		} else {
> +			queued_spin_lock_mcs_queue(lock, true);

If I read correctly, now all this is single line so you have to drop the 
braces , see 
https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces

>   		}
> -		queued_spin_lock_mcs_queue(lock, true);
>   	} else {
>   		if (try_to_steal_lock(lock, false)) {
>   			spec_barrier();
> -			return;
> +		} else {
> +			queued_spin_lock_mcs_queue(lock, false);
>   		}

Same here.

> -		queued_spin_lock_mcs_queue(lock, false);
>   	}
> +	trace_contention_end(lock, 0);
>   }
>   EXPORT_SYMBOL(queued_spin_lock_slowpath);
>

Re: [PATCH] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Nysal Jan K.A. 2 months, 1 week ago

On Wed, Jul 30, 2025 at 08:46:28AM +0200, Christophe Leroy wrote:
> 
> 
> Le 25/07/2025 à 10:14, Nysal Jan K.A. a écrit :
> > @@ -718,16 +720,17 @@ void queued_spin_lock_slowpath(struct qspinlock *lock)
> >   	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
> >   		if (try_to_steal_lock(lock, true)) {
> >   			spec_barrier();
> > -			return;
> > +		} else {
> > +			queued_spin_lock_mcs_queue(lock, true);
> 
> If I read correctly, now all this is single line so you have to drop the
> braces , see
> https://docs.kernel.org/process/coding-style.html#placing-braces-and-spaces

Will fix the coding style in v2. Thanks for the review.

--Nysal

[PATCH v2] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Nysal Jan K.A. 2 months ago

Add a lock contention tracepoint in the queued spinlock slowpath.
Also add the __lockfunc annotation so that in_lock_functions()
works as expected.

Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>
---
 arch/powerpc/lib/qspinlock.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index bcc7e4dff8c3..95ab4cdf582e 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -9,6 +9,7 @@
 #include <linux/sched/clock.h>
 #include <asm/qspinlock.h>
 #include <asm/paravirt.h>
+#include <trace/events/lock.h>
 
 #define MAX_NODES	4
 
@@ -708,26 +709,26 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	qnodesp->count--;
 }
 
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
 {
+	trace_contention_begin(lock, LCB_F_SPIN);
 	/*
 	 * This looks funny, but it induces the compiler to inline both
 	 * sides of the branch rather than share code as when the condition
 	 * is passed as the paravirt argument to the functions.
 	 */
 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
-		if (try_to_steal_lock(lock, true)) {
+		if (try_to_steal_lock(lock, true))
 			spec_barrier();
-			return;
-		}
-		queued_spin_lock_mcs_queue(lock, true);
+		else
+			queued_spin_lock_mcs_queue(lock, true);
 	} else {
-		if (try_to_steal_lock(lock, false)) {
+		if (try_to_steal_lock(lock, false))
 			spec_barrier();
-			return;
-		}
-		queued_spin_lock_mcs_queue(lock, false);
+		else
+			queued_spin_lock_mcs_queue(lock, false);
 	}
+	trace_contention_end(lock, 0);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
-- 
2.47.0

Re: [PATCH v2] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Shrikanth Hegde 2 months ago


On 7/31/25 11:48, Nysal Jan K.A. wrote:
> Add a lock contention tracepoint in the queued spinlock slowpath.
> Also add the __lockfunc annotation so that in_lock_functions()
> works as expected.
> 

There is bit of pure code movement. Given that is small, single patch is fine.

> Signed-off-by: Nysal Jan K.A. <nysal@linux.ibm.com>

Tried the patch and able to see tracepoints.

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>

> ---
>   arch/powerpc/lib/qspinlock.c | 19 ++++++++++---------
>   1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
> index bcc7e4dff8c3..95ab4cdf582e 100644
> --- a/arch/powerpc/lib/qspinlock.c
> +++ b/arch/powerpc/lib/qspinlock.c
> @@ -9,6 +9,7 @@
>   #include <linux/sched/clock.h>
>   #include <asm/qspinlock.h>
>   #include <asm/paravirt.h>
> +#include <trace/events/lock.h>
>   
>   #define MAX_NODES	4
>   
> @@ -708,26 +709,26 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
>   	qnodesp->count--;
>   }
>   
> -void queued_spin_lock_slowpath(struct qspinlock *lock)
> +void __lockfunc queued_spin_lock_slowpath(struct qspinlock *lock)
>   {
> +	trace_contention_begin(lock, LCB_F_SPIN);
>   	/*
>   	 * This looks funny, but it induces the compiler to inline both
>   	 * sides of the branch rather than share code as when the condition
>   	 * is passed as the paravirt argument to the functions.
>   	 */
>   	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
> -		if (try_to_steal_lock(lock, true)) {
> +		if (try_to_steal_lock(lock, true))
>   			spec_barrier();
> -			return;
> -		}
> -		queued_spin_lock_mcs_queue(lock, true);
> +		else
> +			queued_spin_lock_mcs_queue(lock, true);
>   	} else {
> -		if (try_to_steal_lock(lock, false)) {
> +		if (try_to_steal_lock(lock, false))
>   			spec_barrier();
> -			return;
> -		}
> -		queued_spin_lock_mcs_queue(lock, false);
> +		else
> +			queued_spin_lock_mcs_queue(lock, false);
>   	}
> +	trace_contention_end(lock, 0);
>   }
>   EXPORT_SYMBOL(queued_spin_lock_slowpath);
>   
>

Re: [PATCH v2] powerpc/qspinlock: Add spinlock contention tracepoint

Posted by Madhavan Srinivasan 4 weeks, 1 day ago

On Thu, 31 Jul 2025 11:48:53 +0530, Nysal Jan K.A. wrote:
> Add a lock contention tracepoint in the queued spinlock slowpath.
> Also add the __lockfunc annotation so that in_lock_functions()
> works as expected.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/qspinlock: Add spinlock contention tracepoint
      https://git.kernel.org/powerpc/c/4f61d54d2245c15b23ad78a89f854fb2496b6216

Thanks