This adds runtime support for Zabha in cmpxchg8/16() operations.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/Kconfig | 17 ++++++++++++++++
arch/riscv/Makefile | 3 +++
arch/riscv/include/asm/cmpxchg.h | 34 ++++++++++++++++++++++++++++++--
3 files changed, 52 insertions(+), 2 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 1caaedec88c7..d3b0f92f92da 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.
+config TOOLCHAIN_HAS_ZABHA
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZABHA
+ bool "Zabha extension support for atomic byte/halfword operations"
+ depends on TOOLCHAIN_HAS_ZABHA
+ default y
+ help
+ Enable the use of the Zabha ISA-extension to implement kernel
+ byte/halfword atomic memory operations when it is detected at boot.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZACAS
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 9fd13d7a9cc6..78dcaaeebf4e 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -88,6 +88,9 @@ riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
# Check if the toolchain supports Zacas
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+# Check if the toolchain supports Zabha
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index a58a2141c6d3..b9a3fdcec919 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -105,8 +105,20 @@
* indicated by comparing RETURN with OLD.
*/
-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
({ \
+ __label__ no_zacas, zabha, end; \
+ \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
+ asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
+ RISCV_ISA_EXT_ZACAS, 1) \
+ : : : : no_zacas); \
+ asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
+ RISCV_ISA_EXT_ZABHA, 1) \
+ : : : : zabha); \
+ } \
+ \
+no_zacas:; \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
@@ -133,6 +145,19 @@
: "memory"); \
\
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
+ goto end; \
+ \
+zabha: \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ } \
+end:; \
})
#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
@@ -181,8 +206,13 @@ end:; \
\
switch (sizeof(*__ptr)) { \
case 1: \
+ __arch_cmpxchg_masked(sc_sfx, ".b" sc_sfx, \
+ prepend, append, \
+ __ret, __ptr, __old, __new); \
+ break; \
case 2: \
- __arch_cmpxchg_masked(sc_sfx, prepend, append, \
+ __arch_cmpxchg_masked(sc_sfx, ".h" sc_sfx, \
+ prepend, append, \
__ret, __ptr, __old, __new); \
break; \
case 4: \
--
2.39.2
> -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
> +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
> ({ \
> + __label__ no_zacas, zabha, end; \
> + \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : no_zacas); \
> + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
> + RISCV_ISA_EXT_ZABHA, 1) \
> + : : : : zabha); \
> + } \
> + \
> +no_zacas:; \
> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
> @@ -133,6 +145,19 @@
> : "memory"); \
> \
> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> + goto end; \
> + \
> +zabha: \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> + } \
> +end:; \
> })
I admit that I found this all quite difficult to read; IIUC, this is
missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. How about adding
such a check under the zabha: label (replacing/in place of the second
IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding
asm goto statement there, perhaps as follows? (on top of this patch)
Also, the patch presents the first occurrence of RISCV_ISA_EXT_ZABHA;
perhaps worth moving the hwcap/cpufeature changes from patch #6 here?
Andrea
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index b9a3fdcec919..3c913afec150 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -110,15 +110,12 @@
__label__ no_zacas, zabha, end; \
\
if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
- asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
- RISCV_ISA_EXT_ZACAS, 1) \
- : : : : no_zacas); \
asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
RISCV_ISA_EXT_ZABHA, 1) \
: : : : zabha); \
} \
\
-no_zacas:; \
+no_zacas: \
u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
@@ -148,16 +145,20 @@ no_zacas:; \
goto end; \
\
zabha: \
- if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
- __asm__ __volatile__ ( \
- prepend \
- " amocas" cas_sfx " %0, %z2, %1\n" \
- append \
- : "+&r" (r), "+A" (*(p)) \
- : "rJ" (n) \
- : "memory"); \
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \
+ asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
+ RISCV_ISA_EXT_ZACAS, 1) \
+ : : : : no_zacas); \
} \
-end:; \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+end: \
})
#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
On 27/06/2024 13:53, Andrea Parri wrote:
>> -#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n) \
>> +#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n) \
>> ({ \
>> + __label__ no_zacas, zabha, end; \
>> + \
>> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
>> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
>> + RISCV_ISA_EXT_ZACAS, 1) \
>> + : : : : no_zacas); \
>> + asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
>> + RISCV_ISA_EXT_ZABHA, 1) \
>> + : : : : zabha); \
>> + } \
>> + \
>> +no_zacas:; \
>> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
>> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
>> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
>> @@ -133,6 +145,19 @@
>> : "memory"); \
>> \
>> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
>> + goto end; \
>> + \
>> +zabha: \
>> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
>> + __asm__ __volatile__ ( \
>> + prepend \
>> + " amocas" cas_sfx " %0, %z2, %1\n" \
>> + append \
>> + : "+&r" (r), "+A" (*(p)) \
>> + : "rJ" (n) \
>> + : "memory"); \
>> + } \
>> +end:; \
>> })
> I admit that I found this all quite difficult to read; IIUC, this is
> missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check.
I'm not sure we need the zacas check here, since we could use a
toolchain that supports zabha but not zacas, run this on a zabha/zacas
platform and it would work.
> How about adding
> such a check under the zabha: label (replacing/in place of the second
> IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding
> asm goto statement there, perhaps as follows? (on top of this patch)
If that makes things clearer for you, sure, I can do that.
>
> Also, the patch presents the first occurrence of RISCV_ISA_EXT_ZABHA;
> perhaps worth moving the hwcap/cpufeature changes from patch #6 here?
>
> Andrea
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index b9a3fdcec919..3c913afec150 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -110,15 +110,12 @@
> __label__ no_zacas, zabha, end; \
> \
> if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
> - asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> - RISCV_ISA_EXT_ZACAS, 1) \
> - : : : : no_zacas); \
> asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
> RISCV_ISA_EXT_ZABHA, 1) \
> : : : : zabha); \
> } \
> \
> -no_zacas:; \
> +no_zacas: \
> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
> @@ -148,16 +145,20 @@ no_zacas:; \
> goto end; \
> \
> zabha: \
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
But we need to keep this check, otherwise it would fail to compile on
toolchains without Zabha support.
> - __asm__ __volatile__ ( \
> - prepend \
> - " amocas" cas_sfx " %0, %z2, %1\n" \
> - append \
> - : "+&r" (r), "+A" (*(p)) \
> - : "rJ" (n) \
> - : "memory"); \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \
> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : no_zacas); \
> } \
> -end:; \
> + \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> +end: \
> })
>
> #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
> > I admit that I found this all quite difficult to read; IIUC, this is > > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. > > I'm not sure we need the zacas check here, since we could use a toolchain > that supports zabha but not zacas, run this on a zabha/zacas platform and it > would work. One specific set-up I was concerned about is as follows: 1) hardware implements both zabha and zacas 2) toolchain supports both zabha and zacas 3) CONFIG_RISCV_ISA_ZABHA=y and CONFIG_RISCV_ISA_ZACAS=n Since CONFIG_RISCV_ISA_ZABHA=y, the first asm goto will get executed and, since the hardware implements zacas, that will result in a nop. Then the second asm goto will get executed and, since the hardware implements zabha, it will result in the j zabha. In conclusion, the amocas instruction following the zabha: label will get executed, thus violating (the semantics of) CONFIG_RISCV_ISA_ZACAS=n. IIUC, the diff I've posted previously in this thread shared a similar limitation/bug. Andrea
Hi Andrea, On Wed, Jul 10, 2024 at 1:51 AM Andrea Parri <parri.andrea@gmail.com> wrote: > > > > I admit that I found this all quite difficult to read; IIUC, this is > > > missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. > > > > I'm not sure we need the zacas check here, since we could use a toolchain > > that supports zabha but not zacas, run this on a zabha/zacas platform and it > > would work. > > One specific set-up I was concerned about is as follows: > > 1) hardware implements both zabha and zacas > 2) toolchain supports both zabha and zacas > 3) CONFIG_RISCV_ISA_ZABHA=y and CONFIG_RISCV_ISA_ZACAS=n > > Since CONFIG_RISCV_ISA_ZABHA=y, the first asm goto will get executed > and, since the hardware implements zacas, that will result in a nop. > Then the second asm goto will get executed and, since the hardware > implements zabha, it will result in the j zabha. In conclusion, the > amocas instruction following the zabha: label will get executed, thus > violating (the semantics of) CONFIG_RISCV_ISA_ZACAS=n. IIUC, the diff > I've posted previously in this thread shared a similar limitation/bug. So you mean that when disabling Zacas, we should actually disable *all* the CAS instructions, even the Zabha ones. It makes sense and allows for a single way to disable the CAS instructions but keeping the other atomic operations. I'll fix that and add a comment. Thanks, Alex > > Andrea
> I admit that I found this all quite difficult to read; IIUC, this is
> missing an IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) check. How about adding
> such a check under the zabha: label (replacing/in place of the second
> IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) check) and moving the corresponding
> asm goto statement there, perhaps as follows? (on top of this patch)
[...]
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index b9a3fdcec919..3c913afec150 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -110,15 +110,12 @@
> __label__ no_zacas, zabha, end; \
> \
> if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
> - asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> - RISCV_ISA_EXT_ZACAS, 1) \
> - : : : : no_zacas); \
> asm goto(ALTERNATIVE("nop", "j %[zabha]", 0, \
> RISCV_ISA_EXT_ZABHA, 1) \
> : : : : zabha); \
> } \
> \
> -no_zacas:; \
> +no_zacas: \
> u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3); \
> ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE; \
> ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0) \
> @@ -148,16 +145,20 @@ no_zacas:; \
> goto end; \
> \
> zabha: \
> - if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA)) { \
> - __asm__ __volatile__ ( \
> - prepend \
> - " amocas" cas_sfx " %0, %z2, %1\n" \
> - append \
> - : "+&r" (r), "+A" (*(p)) \
> - : "rJ" (n) \
> - : "memory"); \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZAZAS)) { \
> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : no_zacas); \
> } \
> -end:; \
> + \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> +end: \
> })
>
> #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
Unfortunately, this diff won't work e.g. when ZABHA is supported and
detected at boot while ZACAS is not supported; more thinking required...
Andrea
© 2016 - 2025 Red Hat, Inc.