[v2] Zacas/Zabha support and qspinlocks

[PATCH v2 07/10] riscv: Improve amoswap.X use in xchg()

Posted by Alexandre Ghiti 1 year, 5 months ago

xchg() uses amoswap.X instructions from Zabha but still uses
the LR/SC acquire/release semantics which require barriers.

Let's improve that by using proper amoswap acquire/release semantics in
order to avoid any of those barriers.

Suggested-by: Andrea Parri <andrea@rivosinc.com>
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
 arch/riscv/include/asm/cmpxchg.h | 35 +++++++++++++-------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index eb35e2d30a97..0e57d5fbf227 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -11,8 +11,8 @@
 #include <asm/fence.h>
 #include <asm/alternative.h>
 
-#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,	\
-			   swap_append, r, p, n)			\
+#define __arch_xchg_masked(sc_sfx, swap_sfx, sc_prepend, sc_append,	\
+			   r, p, n)					\
 ({									\
 	__label__ zabha, end;						\
 									\
@@ -31,7 +31,7 @@
 	ulong __rc;							\
 									\
 	__asm__ __volatile__ (						\
-	       prepend							\
+	       sc_prepend							\
 	       "0:	lr.w %0, %2\n"					\
 	       "	and  %1, %0, %z4\n"				\
 	       "	or   %1, %1, %z3\n"				\
@@ -48,9 +48,7 @@
 zabha:									\
 	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) {			\
 		__asm__ __volatile__ (					\
-			prepend						\
 			"	amoswap" swap_sfx " %0, %z2, %1\n"	\
-			swap_append						\
 			: "=&r" (r), "+A" (*(p))			\
 			: "rJ" (n)					\
 			: "memory");					\
@@ -58,19 +56,17 @@ zabha:									\
 end:;									\
 })
 
-#define __arch_xchg(sfx, prepend, append, r, p, n)			\
+#define __arch_xchg(sfx, r, p, n)					\
 ({									\
 	__asm__ __volatile__ (						\
-		prepend							\
 		"	amoswap" sfx " %0, %2, %1\n"			\
-		append							\
 		: "=r" (r), "+A" (*(p))					\
 		: "r" (n)						\
 		: "memory");						\
 })
 
-#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
-		   sc_append, swap_append)				\
+#define _arch_xchg(ptr, new, sc_sfx, swap_sfx,				\
+		   sc_prepend, sc_append)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(*(__ptr)) __new = (new);				\
@@ -79,21 +75,19 @@ end:;									\
 	switch (sizeof(*__ptr)) {					\
 	case 1:								\
 		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
-				   prepend, sc_append, swap_append,	\
+				   sc_prepend, sc_append,		\
 				   __ret, __ptr, __new);		\
 		break;							\
 	case 2:								\
 		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
-				   prepend, sc_append, swap_append,	\
+				   sc_prepend, sc_append,		\
 				   __ret, __ptr, __new);		\
 		break;							\
 	case 4:								\
-		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
-			      __ret, __ptr, __new);			\
+		__arch_xchg(".w" swap_sfx,  __ret, __ptr, __new);	\
 		break;							\
 	case 8:								\
-		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
-			      __ret, __ptr, __new);			\
+		__arch_xchg(".d" swap_sfx,  __ret, __ptr, __new);	\
 		break;							\
 	default:							\
 		BUILD_BUG();						\
@@ -102,17 +96,16 @@ end:;									\
 })
 
 #define arch_xchg_relaxed(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "", "", "")
+	_arch_xchg(ptr, x, "", "", "", "")
 
 #define arch_xchg_acquire(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", "",					\
-		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
+	_arch_xchg(ptr, x, "", ".aq", "", RISCV_ACQUIRE_BARRIER)
 
 #define arch_xchg_release(ptr, x)					\
-	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
+	_arch_xchg(ptr, x, "", ".rl", RISCV_RELEASE_BARRIER, "")
 
 #define arch_xchg(ptr, x)						\
-	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
+	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER)
 
 #define xchg32(ptr, x)							\
 ({									\
-- 
2.39.2

Re: [PATCH v2 07/10] riscv: Improve amoswap.X use in xchg()

Posted by Andrea Parri 1 year, 5 months ago

On Wed, Jun 26, 2024 at 03:03:44PM +0200, Alexandre Ghiti wrote:
> xchg() uses amoswap.X instructions from Zabha but still uses
> the LR/SC acquire/release semantics which require barriers.
> 
> Let's improve that by using proper amoswap acquire/release semantics in
> order to avoid any of those barriers.
> 
> Suggested-by: Andrea Parri <andrea@rivosinc.com>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
>  arch/riscv/include/asm/cmpxchg.h | 35 +++++++++++++-------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index eb35e2d30a97..0e57d5fbf227 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -11,8 +11,8 @@
>  #include <asm/fence.h>
>  #include <asm/alternative.h>
>  
> -#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,	\
> -			   swap_append, r, p, n)			\
> +#define __arch_xchg_masked(sc_sfx, swap_sfx, sc_prepend, sc_append,	\
> +			   r, p, n)					\
>  ({									\
>  	__label__ zabha, end;						\
>  									\
> @@ -31,7 +31,7 @@
>  	ulong __rc;							\
>  									\
>  	__asm__ __volatile__ (						\
> -	       prepend							\
> +	       sc_prepend							\
>  	       "0:	lr.w %0, %2\n"					\
>  	       "	and  %1, %0, %z4\n"				\
>  	       "	or   %1, %1, %z3\n"				\
> @@ -48,9 +48,7 @@
>  zabha:									\
>  	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) {			\
>  		__asm__ __volatile__ (					\
> -			prepend						\
>  			"	amoswap" swap_sfx " %0, %z2, %1\n"	\
> -			swap_append						\
>  			: "=&r" (r), "+A" (*(p))			\
>  			: "rJ" (n)					\
>  			: "memory");					\
> @@ -58,19 +56,17 @@ zabha:									\
>  end:;									\
>  })
>  
> -#define __arch_xchg(sfx, prepend, append, r, p, n)			\
> +#define __arch_xchg(sfx, r, p, n)					\
>  ({									\
>  	__asm__ __volatile__ (						\
> -		prepend							\
>  		"	amoswap" sfx " %0, %2, %1\n"			\
> -		append							\
>  		: "=r" (r), "+A" (*(p))					\
>  		: "r" (n)						\
>  		: "memory");						\
>  })
>  
> -#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
> -		   sc_append, swap_append)				\
> +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx,				\
> +		   sc_prepend, sc_append)				\
>  ({									\
>  	__typeof__(ptr) __ptr = (ptr);					\
>  	__typeof__(*(__ptr)) __new = (new);				\
> @@ -79,21 +75,19 @@ end:;									\
>  	switch (sizeof(*__ptr)) {					\
>  	case 1:								\
>  		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
> -				   prepend, sc_append, swap_append,	\
> +				   sc_prepend, sc_append,		\
>  				   __ret, __ptr, __new);		\
>  		break;							\
>  	case 2:								\
>  		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
> -				   prepend, sc_append, swap_append,	\
> +				   sc_prepend, sc_append,		\
>  				   __ret, __ptr, __new);		\
>  		break;							\
>  	case 4:								\
> -		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
> -			      __ret, __ptr, __new);			\
> +		__arch_xchg(".w" swap_sfx,  __ret, __ptr, __new);	\
>  		break;							\
>  	case 8:								\
> -		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
> -			      __ret, __ptr, __new);			\
> +		__arch_xchg(".d" swap_sfx,  __ret, __ptr, __new);	\
>  		break;							\
>  	default:							\
>  		BUILD_BUG();						\
> @@ -102,17 +96,16 @@ end:;									\
>  })
>  
>  #define arch_xchg_relaxed(ptr, x)					\
> -	_arch_xchg(ptr, x, "", "", "", "", "")
> +	_arch_xchg(ptr, x, "", "", "", "")
>  
>  #define arch_xchg_acquire(ptr, x)					\
> -	_arch_xchg(ptr, x, "", "", "",					\
> -		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
> +	_arch_xchg(ptr, x, "", ".aq", "", RISCV_ACQUIRE_BARRIER)
>  
>  #define arch_xchg_release(ptr, x)					\
> -	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
> +	_arch_xchg(ptr, x, "", ".rl", RISCV_RELEASE_BARRIER, "")
>  
>  #define arch_xchg(ptr, x)						\
> -	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
> +	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER)

I actually see no reason for this patch, please see also my remarks
/question on patch #4.

  Andrea

Re: [PATCH v2 07/10] riscv: Improve amoswap.X use in xchg()

Posted by Alexandre Ghiti 1 year, 5 months ago

On 27/06/2024 15:58, Andrea Parri wrote:
> On Wed, Jun 26, 2024 at 03:03:44PM +0200, Alexandre Ghiti wrote:
>> xchg() uses amoswap.X instructions from Zabha but still uses
>> the LR/SC acquire/release semantics which require barriers.
>>
>> Let's improve that by using proper amoswap acquire/release semantics in
>> order to avoid any of those barriers.
>>
>> Suggested-by: Andrea Parri <andrea@rivosinc.com>
>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>> ---
>>   arch/riscv/include/asm/cmpxchg.h | 35 +++++++++++++-------------------
>>   1 file changed, 14 insertions(+), 21 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
>> index eb35e2d30a97..0e57d5fbf227 100644
>> --- a/arch/riscv/include/asm/cmpxchg.h
>> +++ b/arch/riscv/include/asm/cmpxchg.h
>> @@ -11,8 +11,8 @@
>>   #include <asm/fence.h>
>>   #include <asm/alternative.h>
>>   
>> -#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append,	\
>> -			   swap_append, r, p, n)			\
>> +#define __arch_xchg_masked(sc_sfx, swap_sfx, sc_prepend, sc_append,	\
>> +			   r, p, n)					\
>>   ({									\
>>   	__label__ zabha, end;						\
>>   									\
>> @@ -31,7 +31,7 @@
>>   	ulong __rc;							\
>>   									\
>>   	__asm__ __volatile__ (						\
>> -	       prepend							\
>> +	       sc_prepend							\
>>   	       "0:	lr.w %0, %2\n"					\
>>   	       "	and  %1, %0, %z4\n"				\
>>   	       "	or   %1, %1, %z3\n"				\
>> @@ -48,9 +48,7 @@
>>   zabha:									\
>>   	if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) {			\
>>   		__asm__ __volatile__ (					\
>> -			prepend						\
>>   			"	amoswap" swap_sfx " %0, %z2, %1\n"	\
>> -			swap_append						\
>>   			: "=&r" (r), "+A" (*(p))			\
>>   			: "rJ" (n)					\
>>   			: "memory");					\
>> @@ -58,19 +56,17 @@ zabha:									\
>>   end:;									\
>>   })
>>   
>> -#define __arch_xchg(sfx, prepend, append, r, p, n)			\
>> +#define __arch_xchg(sfx, r, p, n)					\
>>   ({									\
>>   	__asm__ __volatile__ (						\
>> -		prepend							\
>>   		"	amoswap" sfx " %0, %2, %1\n"			\
>> -		append							\
>>   		: "=r" (r), "+A" (*(p))					\
>>   		: "r" (n)						\
>>   		: "memory");						\
>>   })
>>   
>> -#define _arch_xchg(ptr, new, sc_sfx, swap_sfx, prepend,			\
>> -		   sc_append, swap_append)				\
>> +#define _arch_xchg(ptr, new, sc_sfx, swap_sfx,				\
>> +		   sc_prepend, sc_append)				\
>>   ({									\
>>   	__typeof__(ptr) __ptr = (ptr);					\
>>   	__typeof__(*(__ptr)) __new = (new);				\
>> @@ -79,21 +75,19 @@ end:;									\
>>   	switch (sizeof(*__ptr)) {					\
>>   	case 1:								\
>>   		__arch_xchg_masked(sc_sfx, ".b" swap_sfx,		\
>> -				   prepend, sc_append, swap_append,	\
>> +				   sc_prepend, sc_append,		\
>>   				   __ret, __ptr, __new);		\
>>   		break;							\
>>   	case 2:								\
>>   		__arch_xchg_masked(sc_sfx, ".h" swap_sfx,		\
>> -				   prepend, sc_append, swap_append,	\
>> +				   sc_prepend, sc_append,		\
>>   				   __ret, __ptr, __new);		\
>>   		break;							\
>>   	case 4:								\
>> -		__arch_xchg(".w" swap_sfx, prepend, swap_append,	\
>> -			      __ret, __ptr, __new);			\
>> +		__arch_xchg(".w" swap_sfx,  __ret, __ptr, __new);	\
>>   		break;							\
>>   	case 8:								\
>> -		__arch_xchg(".d" swap_sfx, prepend, swap_append,	\
>> -			      __ret, __ptr, __new);			\
>> +		__arch_xchg(".d" swap_sfx,  __ret, __ptr, __new);	\
>>   		break;							\
>>   	default:							\
>>   		BUILD_BUG();						\
>> @@ -102,17 +96,16 @@ end:;									\
>>   })
>>   
>>   #define arch_xchg_relaxed(ptr, x)					\
>> -	_arch_xchg(ptr, x, "", "", "", "", "")
>> +	_arch_xchg(ptr, x, "", "", "", "")
>>   
>>   #define arch_xchg_acquire(ptr, x)					\
>> -	_arch_xchg(ptr, x, "", "", "",					\
>> -		   RISCV_ACQUIRE_BARRIER, RISCV_ACQUIRE_BARRIER)
>> +	_arch_xchg(ptr, x, "", ".aq", "", RISCV_ACQUIRE_BARRIER)
>>   
>>   #define arch_xchg_release(ptr, x)					\
>> -	_arch_xchg(ptr, x, "", "", RISCV_RELEASE_BARRIER, "", "")
>> +	_arch_xchg(ptr, x, "", ".rl", RISCV_RELEASE_BARRIER, "")
>>   
>>   #define arch_xchg(ptr, x)						\
>> -	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER, "")
>> +	_arch_xchg(ptr, x, ".rl", ".aqrl", "", RISCV_FULL_BARRIER)
> I actually see no reason for this patch, please see also my remarks
> /question on patch #4.


You mean that we can't improve the fully-ordered version here?


>
>    Andrea
>

Re: [PATCH v2 07/10] riscv: Improve amoswap.X use in xchg()

Posted by Andrea Parri 1 year, 5 months ago

> > I actually see no reason for this patch, please see also my remarks
> > /question on patch #4.
> 
> You mean that we can't improve the fully-ordered version here?

Well, I mean, this patch doesn't AFAICT.  You've recently fixed these
LR/SC mappings in 1d84afaf0252 ("riscv: Fix fully ordered LR/SC
xchg[8|16]() implementations"), and the remaining look just fine to me.

  Andrea