This adds runtime support for Zacas in cmpxchg operations.
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
---
arch/riscv/Kconfig | 17 +++++++++++++++++
arch/riscv/Makefile | 3 +++
arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
3 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 05ccba8ca33a..1caaedec88c7 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
preemption. Enabling this config will result in higher memory
consumption due to the allocation of per-task's kernel Vector context.
+config TOOLCHAIN_HAS_ZACAS
+ bool
+ default y
+ depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
+ depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
+ depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZACAS
+ bool "Zacas extension support for atomic CAS"
+ depends on TOOLCHAIN_HAS_ZACAS
+ default y
+ help
+ Enable the use of the Zacas ISA-extension to implement kernel atomic
+ cmpxchg operations when it is detected at boot.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZBB
bool
default y
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 06de9d365088..9fd13d7a9cc6 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -85,6 +85,9 @@ endif
# Check if the toolchain supports Zihintpause extension
riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
+# Check if the toolchain supports Zacas
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
+
# Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
# matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 808b4c78462e..5d38153e2f13 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -9,6 +9,7 @@
#include <linux/bug.h>
#include <asm/fence.h>
+#include <asm/alternative.h>
#define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
({ \
@@ -134,21 +135,40 @@
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
})
-#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
+#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
({ \
+ __label__ no_zacas, end; \
register unsigned int __rc; \
\
+ if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
+ asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
+ RISCV_ISA_EXT_ZACAS, 1) \
+ : : : : no_zacas); \
+ \
+ __asm__ __volatile__ ( \
+ prepend \
+ " amocas" sc_cas_sfx " %0, %z2, %1\n" \
+ append \
+ : "+&r" (r), "+A" (*(p)) \
+ : "rJ" (n) \
+ : "memory"); \
+ goto end; \
+ } \
+ \
+no_zacas: \
__asm__ __volatile__ ( \
prepend \
"0: lr" lr_sfx " %0, %2\n" \
" bne %0, %z3, 1f\n" \
- " sc" sc_sfx " %1, %z4, %2\n" \
+ " sc" sc_cas_sfx " %1, %z4, %2\n" \
" bnez %1, 0b\n" \
append \
"1:\n" \
: "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
: "rJ" (co o), "rJ" (n) \
: "memory"); \
+ \
+end:; \
})
#define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
@@ -156,7 +176,7 @@
__typeof__(ptr) __ptr = (ptr); \
__typeof__(*(__ptr)) __old = (old); \
__typeof__(*(__ptr)) __new = (new); \
- __typeof__(*(__ptr)) __ret; \
+ __typeof__(*(__ptr)) __ret = (old); \
\
switch (sizeof(*__ptr)) { \
case 1: \
--
2.39.2
Hi Alex,
On 2024-07-17 1:19 AM, Alexandre Ghiti wrote:
> This adds runtime support for Zacas in cmpxchg operations.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/Kconfig | 17 +++++++++++++++++
> arch/riscv/Makefile | 3 +++
> arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
> 3 files changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 05ccba8ca33a..1caaedec88c7 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
> preemption. Enabling this config will result in higher memory
> consumption due to the allocation of per-task's kernel Vector context.
>
> +config TOOLCHAIN_HAS_ZACAS
> + bool
> + default y
> + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> + depends on AS_HAS_OPTION_ARCH
> +
> +config RISCV_ISA_ZACAS
> + bool "Zacas extension support for atomic CAS"
> + depends on TOOLCHAIN_HAS_ZACAS
> + default y
> + help
> + Enable the use of the Zacas ISA-extension to implement kernel atomic
> + cmpxchg operations when it is detected at boot.
> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_HAS_ZBB
> bool
> default y
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index 06de9d365088..9fd13d7a9cc6 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -85,6 +85,9 @@ endif
> # Check if the toolchain supports Zihintpause extension
> riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
>
> +# Check if the toolchain supports Zacas
> +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> +
> # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 808b4c78462e..5d38153e2f13 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -9,6 +9,7 @@
> #include <linux/bug.h>
>
> #include <asm/fence.h>
> +#include <asm/alternative.h>
>
> #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
> ({ \
> @@ -134,21 +135,40 @@
> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> })
>
> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
> +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> ({ \
> + __label__ no_zacas, end; \
> register unsigned int __rc; \
> \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : no_zacas); \
> + \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> + goto end; \
> + } \
> + \
> +no_zacas: \
> __asm__ __volatile__ ( \
> prepend \
> "0: lr" lr_sfx " %0, %2\n" \
> " bne %0, %z3, 1f\n" \
> - " sc" sc_sfx " %1, %z4, %2\n" \
> + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> " bnez %1, 0b\n" \
> append \
This would probably be a good place to use inline ALTERNATIVE instead of an asm
goto. It saves overall code size, and a jump in the non-Zacas case, at the cost
of 3 nops in the Zacas case. (And all the nops can go after the amocas, where
they will likely be hidden by the amocas latency.)
Regards,
Samuel
> "1:\n" \
> : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> : "rJ" (co o), "rJ" (n) \
> : "memory"); \
> + \
> +end:; \
> })
>
> #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
> @@ -156,7 +176,7 @@
> __typeof__(ptr) __ptr = (ptr); \
> __typeof__(*(__ptr)) __old = (old); \
> __typeof__(*(__ptr)) __new = (new); \
> - __typeof__(*(__ptr)) __ret; \
> + __typeof__(*(__ptr)) __ret = (old); \
> \
> switch (sizeof(*__ptr)) { \
> case 1: \
On Fri, Jul 19, 2024 at 2:45 AM Samuel Holland
<samuel.holland@sifive.com> wrote:
>
> Hi Alex,
>
> On 2024-07-17 1:19 AM, Alexandre Ghiti wrote:
> > This adds runtime support for Zacas in cmpxchg operations.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/Kconfig | 17 +++++++++++++++++
> > arch/riscv/Makefile | 3 +++
> > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
> > 3 files changed, 43 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 05ccba8ca33a..1caaedec88c7 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
> > preemption. Enabling this config will result in higher memory
> > consumption due to the allocation of per-task's kernel Vector context.
> >
> > +config TOOLCHAIN_HAS_ZACAS
> > + bool
> > + default y
> > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> > + depends on AS_HAS_OPTION_ARCH
> > +
> > +config RISCV_ISA_ZACAS
> > + bool "Zacas extension support for atomic CAS"
> > + depends on TOOLCHAIN_HAS_ZACAS
> > + default y
> > + help
> > + Enable the use of the Zacas ISA-extension to implement kernel atomic
> > + cmpxchg operations when it is detected at boot.
> > +
> > + If you don't know what to do here, say Y.
> > +
> > config TOOLCHAIN_HAS_ZBB
> > bool
> > default y
> > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > index 06de9d365088..9fd13d7a9cc6 100644
> > --- a/arch/riscv/Makefile
> > +++ b/arch/riscv/Makefile
> > @@ -85,6 +85,9 @@ endif
> > # Check if the toolchain supports Zihintpause extension
> > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
> >
> > +# Check if the toolchain supports Zacas
> > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > +
> > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 808b4c78462e..5d38153e2f13 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -9,6 +9,7 @@
> > #include <linux/bug.h>
> >
> > #include <asm/fence.h>
> > +#include <asm/alternative.h>
> >
> > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
> > ({ \
> > @@ -134,21 +135,40 @@
> > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> > })
> >
> > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
> > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> > ({ \
> > + __label__ no_zacas, end; \
> > register unsigned int __rc; \
> > \
> > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
> > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> > + RISCV_ISA_EXT_ZACAS, 1) \
> > + : : : : no_zacas); \
> > + \
> > + __asm__ __volatile__ ( \
> > + prepend \
> > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> > + append \
> > + : "+&r" (r), "+A" (*(p)) \
> > + : "rJ" (n) \
> > + : "memory"); \
> > + goto end; \
> > + } \
> > + \
> > +no_zacas: \
> > __asm__ __volatile__ ( \
> > prepend \
> > "0: lr" lr_sfx " %0, %2\n" \
> > " bne %0, %z3, 1f\n" \
> > - " sc" sc_sfx " %1, %z4, %2\n" \
> > + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> > " bnez %1, 0b\n" \
> > append \
>
> This would probably be a good place to use inline ALTERNATIVE instead of an asm
> goto. It saves overall code size, and a jump in the non-Zacas case, at the cost
> of 3 nops in the Zacas case. (And all the nops can go after the amocas, where
> they will likely be hidden by the amocas latency.)
That's what Conor proposed indeed.
I have just given it a try, but it does not work. The number of
instructions in the zacas asm inline is different in the case of the
fully-ordered version so I cannot set a unique number of nops. I could
pass this information from arch_cmpxchg() down to __arch_cmpxchg() but
those macros are already complex enough so I'd rather not add another
parameter.
This suggestion unfortunately cannot be applied to
__arch_cmpxchg_masked(), nor __arch_xchg_masked().
So unless you and Conor really insist, I'll drop the idea!
Thanks,
Alex
>
> Regards,
> Samuel
>
> > "1:\n" \
> > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> > : "rJ" (co o), "rJ" (n) \
> > : "memory"); \
> > + \
> > +end:; \
> > })
> >
> > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
> > @@ -156,7 +176,7 @@
> > __typeof__(ptr) __ptr = (ptr); \
> > __typeof__(*(__ptr)) __old = (old); \
> > __typeof__(*(__ptr)) __new = (new); \
> > - __typeof__(*(__ptr)) __ret; \
> > + __typeof__(*(__ptr)) __ret = (old); \
> > \
> > switch (sizeof(*__ptr)) { \
> > case 1: \
>
On Fri, Jul 19, 2024 at 1:48 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> On Fri, Jul 19, 2024 at 2:45 AM Samuel Holland
> <samuel.holland@sifive.com> wrote:
> >
> > Hi Alex,
> >
> > On 2024-07-17 1:19 AM, Alexandre Ghiti wrote:
> > > This adds runtime support for Zacas in cmpxchg operations.
> > >
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > ---
> > > arch/riscv/Kconfig | 17 +++++++++++++++++
> > > arch/riscv/Makefile | 3 +++
> > > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
> > > 3 files changed, 43 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > > index 05ccba8ca33a..1caaedec88c7 100644
> > > --- a/arch/riscv/Kconfig
> > > +++ b/arch/riscv/Kconfig
> > > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
> > > preemption. Enabling this config will result in higher memory
> > > consumption due to the allocation of per-task's kernel Vector context.
> > >
> > > +config TOOLCHAIN_HAS_ZACAS
> > > + bool
> > > + default y
> > > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> > > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> > > + depends on AS_HAS_OPTION_ARCH
> > > +
> > > +config RISCV_ISA_ZACAS
> > > + bool "Zacas extension support for atomic CAS"
> > > + depends on TOOLCHAIN_HAS_ZACAS
> > > + default y
> > > + help
> > > + Enable the use of the Zacas ISA-extension to implement kernel atomic
> > > + cmpxchg operations when it is detected at boot.
> > > +
> > > + If you don't know what to do here, say Y.
> > > +
> > > config TOOLCHAIN_HAS_ZBB
> > > bool
> > > default y
> > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > > index 06de9d365088..9fd13d7a9cc6 100644
> > > --- a/arch/riscv/Makefile
> > > +++ b/arch/riscv/Makefile
> > > @@ -85,6 +85,9 @@ endif
> > > # Check if the toolchain supports Zihintpause extension
> > > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
> > >
> > > +# Check if the toolchain supports Zacas
> > > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > > +
> > > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > > index 808b4c78462e..5d38153e2f13 100644
> > > --- a/arch/riscv/include/asm/cmpxchg.h
> > > +++ b/arch/riscv/include/asm/cmpxchg.h
> > > @@ -9,6 +9,7 @@
> > > #include <linux/bug.h>
> > >
> > > #include <asm/fence.h>
> > > +#include <asm/alternative.h>
> > >
> > > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
> > > ({ \
> > > @@ -134,21 +135,40 @@
> > > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> > > })
> > >
> > > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
> > > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
> > > ({ \
> > > + __label__ no_zacas, end; \
> > > register unsigned int __rc; \
> > > \
> > > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
> > > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> > > + RISCV_ISA_EXT_ZACAS, 1) \
> > > + : : : : no_zacas); \
> > > + \
> > > + __asm__ __volatile__ ( \
> > > + prepend \
> > > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> > > + append \
> > > + : "+&r" (r), "+A" (*(p)) \
> > > + : "rJ" (n) \
> > > + : "memory"); \
> > > + goto end; \
> > > + } \
> > > + \
> > > +no_zacas: \
> > > __asm__ __volatile__ ( \
> > > prepend \
> > > "0: lr" lr_sfx " %0, %2\n" \
> > > " bne %0, %z3, 1f\n" \
> > > - " sc" sc_sfx " %1, %z4, %2\n" \
> > > + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> > > " bnez %1, 0b\n" \
> > > append \
> >
> > This would probably be a good place to use inline ALTERNATIVE instead of an asm
> > goto. It saves overall code size, and a jump in the non-Zacas case, at the cost
> > of 3 nops in the Zacas case. (And all the nops can go after the amocas, where
> > they will likely be hidden by the amocas latency.)
>
> That's what Conor proposed indeed.
>
> I have just given it a try, but it does not work. The number of
> instructions in the zacas asm inline is different in the case of the
> fully-ordered version so I cannot set a unique number of nops. I could
> pass this information from arch_cmpxchg() down to __arch_cmpxchg() but
> those macros are already complex enough so I'd rather not add another
> parameter.
>
> This suggestion unfortunately cannot be applied to
> __arch_cmpxchg_masked(), nor __arch_xchg_masked().
>
> So unless you and Conor really insist, I'll drop the idea!
Or I can pass a nop when the full barrier is not needed, and it works!
I'll probably keep this version then since it avoids the introduction
of new macros or the use of a static branch to circumvent the llvm
bug.
>
> Thanks,
>
> Alex
>
>
> >
> > Regards,
> > Samuel
> >
> > > "1:\n" \
> > > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> > > : "rJ" (co o), "rJ" (n) \
> > > : "memory"); \
> > > + \
> > > +end:; \
> > > })
> > >
> > > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
> > > @@ -156,7 +176,7 @@
> > > __typeof__(ptr) __ptr = (ptr); \
> > > __typeof__(*(__ptr)) __old = (old); \
> > > __typeof__(*(__ptr)) __new = (new); \
> > > - __typeof__(*(__ptr)) __ret; \
> > > + __typeof__(*(__ptr)) __ret = (old); \
> > > \
> > > switch (sizeof(*__ptr)) { \
> > > case 1: \
> >
On Wed, Jul 17, 2024 at 08:19:47AM GMT, Alexandre Ghiti wrote:
> This adds runtime support for Zacas in cmpxchg operations.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> ---
> arch/riscv/Kconfig | 17 +++++++++++++++++
> arch/riscv/Makefile | 3 +++
> arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
> 3 files changed, 43 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> index 05ccba8ca33a..1caaedec88c7 100644
> --- a/arch/riscv/Kconfig
> +++ b/arch/riscv/Kconfig
> @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
> preemption. Enabling this config will result in higher memory
> consumption due to the allocation of per-task's kernel Vector context.
>
> +config TOOLCHAIN_HAS_ZACAS
> + bool
> + default y
> + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> + depends on AS_HAS_OPTION_ARCH
> +
> +config RISCV_ISA_ZACAS
> + bool "Zacas extension support for atomic CAS"
> + depends on TOOLCHAIN_HAS_ZACAS
> + default y
> + help
> + Enable the use of the Zacas ISA-extension to implement kernel atomic
> + cmpxchg operations when it is detected at boot.
> +
> + If you don't know what to do here, say Y.
> +
> config TOOLCHAIN_HAS_ZBB
> bool
> default y
> diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> index 06de9d365088..9fd13d7a9cc6 100644
> --- a/arch/riscv/Makefile
> +++ b/arch/riscv/Makefile
> @@ -85,6 +85,9 @@ endif
> # Check if the toolchain supports Zihintpause extension
> riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
>
> +# Check if the toolchain supports Zacas
> +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> +
> # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 808b4c78462e..5d38153e2f13 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -9,6 +9,7 @@
> #include <linux/bug.h>
>
> #include <asm/fence.h>
> +#include <asm/alternative.h>
>
> #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
> ({ \
> @@ -134,21 +135,40 @@
> r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> })
>
> -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
> +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
I'd either not bother renaming sc_sfx or also rename it in _arch_cmpxchg.
> ({ \
> + __label__ no_zacas, end; \
> register unsigned int __rc; \
> \
> + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
> + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> + RISCV_ISA_EXT_ZACAS, 1) \
> + : : : : no_zacas); \
> + \
> + __asm__ __volatile__ ( \
> + prepend \
> + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> + append \
> + : "+&r" (r), "+A" (*(p)) \
> + : "rJ" (n) \
> + : "memory"); \
> + goto end; \
> + } \
> + \
> +no_zacas: \
> __asm__ __volatile__ ( \
> prepend \
> "0: lr" lr_sfx " %0, %2\n" \
> " bne %0, %z3, 1f\n" \
> - " sc" sc_sfx " %1, %z4, %2\n" \
> + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> " bnez %1, 0b\n" \
> append \
> "1:\n" \
> : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> : "rJ" (co o), "rJ" (n) \
> : "memory"); \
> + \
> +end:; \
> })
>
> #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
> @@ -156,7 +176,7 @@
> __typeof__(ptr) __ptr = (ptr); \
> __typeof__(*(__ptr)) __old = (old); \
> __typeof__(*(__ptr)) __new = (new); \
> - __typeof__(*(__ptr)) __ret; \
> + __typeof__(*(__ptr)) __ret = (old); \
Is this just to silence some compiler warnings? Can we point out
whatever the reason is in the commit message?
> \
> switch (sizeof(*__ptr)) { \
> case 1: \
> --
> 2.39.2
>
Thanks,
drew
Hi drew,
On Wed, Jul 17, 2024 at 5:08 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jul 17, 2024 at 08:19:47AM GMT, Alexandre Ghiti wrote:
> > This adds runtime support for Zacas in cmpxchg operations.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > ---
> > arch/riscv/Kconfig | 17 +++++++++++++++++
> > arch/riscv/Makefile | 3 +++
> > arch/riscv/include/asm/cmpxchg.h | 26 +++++++++++++++++++++++---
> > 3 files changed, 43 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
> > index 05ccba8ca33a..1caaedec88c7 100644
> > --- a/arch/riscv/Kconfig
> > +++ b/arch/riscv/Kconfig
> > @@ -596,6 +596,23 @@ config RISCV_ISA_V_PREEMPTIVE
> > preemption. Enabling this config will result in higher memory
> > consumption due to the allocation of per-task's kernel Vector context.
> >
> > +config TOOLCHAIN_HAS_ZACAS
> > + bool
> > + default y
> > + depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zacas)
> > + depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zacas)
> > + depends on AS_HAS_OPTION_ARCH
> > +
> > +config RISCV_ISA_ZACAS
> > + bool "Zacas extension support for atomic CAS"
> > + depends on TOOLCHAIN_HAS_ZACAS
> > + default y
> > + help
> > + Enable the use of the Zacas ISA-extension to implement kernel atomic
> > + cmpxchg operations when it is detected at boot.
> > +
> > + If you don't know what to do here, say Y.
> > +
> > config TOOLCHAIN_HAS_ZBB
> > bool
> > default y
> > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
> > index 06de9d365088..9fd13d7a9cc6 100644
> > --- a/arch/riscv/Makefile
> > +++ b/arch/riscv/Makefile
> > @@ -85,6 +85,9 @@ endif
> > # Check if the toolchain supports Zihintpause extension
> > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := $(riscv-march-y)_zihintpause
> >
> > +# Check if the toolchain supports Zacas
> > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
> > +
> > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
> > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
> > KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 808b4c78462e..5d38153e2f13 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -9,6 +9,7 @@
> > #include <linux/bug.h>
> >
> > #include <asm/fence.h>
> > +#include <asm/alternative.h>
> >
> > #define __arch_xchg_masked(sc_sfx, prepend, append, r, p, n) \
> > ({ \
> > @@ -134,21 +135,40 @@
> > r = (__typeof__(*(p)))((__retx & __mask) >> __s); \
> > })
> >
> > -#define __arch_cmpxchg(lr_sfx, sc_sfx, prepend, append, r, p, co, o, n) \
> > +#define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n) \
>
> I'd either not bother renaming sc_sfx or also rename it in _arch_cmpxchg.
I'll rename both then.
>
> > ({ \
> > + __label__ no_zacas, end; \
> > register unsigned int __rc; \
> > \
> > + if (IS_ENABLED(CONFIG_RISCV_ISA_ZACAS)) { \
> > + asm goto(ALTERNATIVE("j %[no_zacas]", "nop", 0, \
> > + RISCV_ISA_EXT_ZACAS, 1) \
> > + : : : : no_zacas); \
> > + \
> > + __asm__ __volatile__ ( \
> > + prepend \
> > + " amocas" sc_cas_sfx " %0, %z2, %1\n" \
> > + append \
> > + : "+&r" (r), "+A" (*(p)) \
> > + : "rJ" (n) \
> > + : "memory"); \
> > + goto end; \
> > + } \
> > + \
> > +no_zacas: \
> > __asm__ __volatile__ ( \
> > prepend \
> > "0: lr" lr_sfx " %0, %2\n" \
> > " bne %0, %z3, 1f\n" \
> > - " sc" sc_sfx " %1, %z4, %2\n" \
> > + " sc" sc_cas_sfx " %1, %z4, %2\n" \
> > " bnez %1, 0b\n" \
> > append \
> > "1:\n" \
> > : "=&r" (r), "=&r" (__rc), "+A" (*(p)) \
> > : "rJ" (co o), "rJ" (n) \
> > : "memory"); \
> > + \
> > +end:; \
> > })
> >
> > #define _arch_cmpxchg(ptr, old, new, sc_sfx, prepend, append) \
> > @@ -156,7 +176,7 @@
> > __typeof__(ptr) __ptr = (ptr); \
> > __typeof__(*(__ptr)) __old = (old); \
> > __typeof__(*(__ptr)) __new = (new); \
> > - __typeof__(*(__ptr)) __ret; \
> > + __typeof__(*(__ptr)) __ret = (old); \
>
> Is this just to silence some compiler warnings? Can we point out
> whatever the reason is in the commit message?
CAS expects to find the old value in rd (__ret) to check against the
current value in memory before actually swapping with the new value.
But both you and Andrea were confused by this, I'll make it more explicit.
>
> > \
> > switch (sizeof(*__ptr)) { \
> > case 1: \
> > --
> > 2.39.2
> >
>
> Thanks,
> drew
Thanks,
Alex
© 2016 - 2025 Red Hat, Inc.