arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
From: Guo Ren <guoren@linux.alibaba.com>
RISC-V code uses the queued spinlock implementation, which calls
the macros smp_cond_load_acquire for one byte. So, complement the
implementation of byte and halfword versions.
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++---
1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 4cadc56220fe..2bd42a11ff8f 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr,
{
unsigned long tmp;
+ u32 *__ptr32b;
+ ulong __s, __val, __mask;
+
asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
0, RISCV_ISA_EXT_ZAWRS, 1)
: : : : no_zawrs);
switch (size) {
case 1:
- fallthrough;
+ __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
+ __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
+ __val = val << __s;
+ __mask = 0xf << __s;
+
+ asm volatile(
+ " lr.w %0, %1\n"
+ " and %0, %0, %3\n"
+ " xor %0, %0, %2\n"
+ " bnez %0, 1f\n"
+ ZAWRS_WRS_NTO "\n"
+ "1:"
+ : "=&r" (tmp), "+A" (*(__ptr32b))
+ : "r" (__val), "r" (__mask)
+ : "memory");
+ break;
case 2:
- /* RISC-V doesn't have lr instructions on byte and half-word. */
- goto no_zawrs;
+ __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
+ __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
+ __val = val << __s;
+ __mask = 0xff << __s;
+
+ asm volatile(
+ " lr.w %0, %1\n"
+ " and %0, %0, %3\n"
+ " xor %0, %0, %2\n"
+ " bnez %0, 1f\n"
+ ZAWRS_WRS_NTO "\n"
+ "1:"
+ : "=&r" (tmp), "+A" (*(__ptr32b))
+ : "r" (__val), "r" (__mask)
+ : "memory");
+ break;
case 4:
asm volatile(
" lr.w %0, %1\n"
--
2.40.1
On Sun, Dec 15, 2024 at 10:22:53PM -0500, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
>
> RISC-V code uses the queued spinlock implementation, which calls
> the macros smp_cond_load_acquire for one byte. So, complement the
> implementation of byte and halfword versions.
>
> Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> Signed-off-by: Guo Ren <guoren@kernel.org>
> ---
> arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++---
> 1 file changed, 35 insertions(+), 3 deletions(-)
>
> diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> index 4cadc56220fe..2bd42a11ff8f 100644
> --- a/arch/riscv/include/asm/cmpxchg.h
> +++ b/arch/riscv/include/asm/cmpxchg.h
> @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr,
> {
> unsigned long tmp;
>
> + u32 *__ptr32b;
> + ulong __s, __val, __mask;
> +
> asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
> 0, RISCV_ISA_EXT_ZAWRS, 1)
> : : : : no_zawrs);
>
> switch (size) {
> case 1:
> - fallthrough;
> + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
> + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
> + __val = val << __s;
> + __mask = 0xf << __s;
This mask should be 0xff and the mask below should be 0xffff.
> +
> + asm volatile(
> + " lr.w %0, %1\n"
> + " and %0, %0, %3\n"
> + " xor %0, %0, %2\n"
> + " bnez %0, 1f\n"
> + ZAWRS_WRS_NTO "\n"
> + "1:"
> + : "=&r" (tmp), "+A" (*(__ptr32b))
> + : "r" (__val), "r" (__mask)
> + : "memory");
> + break;
> case 2:
> - /* RISC-V doesn't have lr instructions on byte and half-word. */
> - goto no_zawrs;
> + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
> + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
> + __val = val << __s;
> + __mask = 0xff << __s;
> +
> + asm volatile(
> + " lr.w %0, %1\n"
> + " and %0, %0, %3\n"
> + " xor %0, %0, %2\n"
> + " bnez %0, 1f\n"
> + ZAWRS_WRS_NTO "\n"
> + "1:"
> + : "=&r" (tmp), "+A" (*(__ptr32b))
> + : "r" (__val), "r" (__mask)
> + : "memory");
> + break;
> case 4:
> asm volatile(
> " lr.w %0, %1\n"
> --
> 2.40.1
>
Thanks,
drew
On Mon, Dec 16, 2024 at 11:42 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Sun, Dec 15, 2024 at 10:22:53PM -0500, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > RISC-V code uses the queued spinlock implementation, which calls
> > the macros smp_cond_load_acquire for one byte. So, complement the
> > implementation of byte and halfword versions.
> >
> > Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
> > Signed-off-by: Guo Ren <guoren@kernel.org>
> > ---
> > arch/riscv/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++---
> > 1 file changed, 35 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
> > index 4cadc56220fe..2bd42a11ff8f 100644
> > --- a/arch/riscv/include/asm/cmpxchg.h
> > +++ b/arch/riscv/include/asm/cmpxchg.h
> > @@ -365,16 +365,48 @@ static __always_inline void __cmpwait(volatile void *ptr,
> > {
> > unsigned long tmp;
> >
> > + u32 *__ptr32b;
> > + ulong __s, __val, __mask;
> > +
> > asm goto(ALTERNATIVE("j %l[no_zawrs]", "nop",
> > 0, RISCV_ISA_EXT_ZAWRS, 1)
> > : : : : no_zawrs);
> >
> > switch (size) {
> > case 1:
> > - fallthrough;
> > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
> > + __s = ((ulong)(ptr) & 0x3) * BITS_PER_BYTE;
> > + __val = val << __s;
> > + __mask = 0xf << __s;
>
> This mask should be 0xff and the mask below should be 0xffff.
Thx for catching it; it's hard to test it out. I will correct it in
the next version.
>
> > +
> > + asm volatile(
> > + " lr.w %0, %1\n"
> > + " and %0, %0, %3\n"
> > + " xor %0, %0, %2\n"
> > + " bnez %0, 1f\n"
> > + ZAWRS_WRS_NTO "\n"
> > + "1:"
> > + : "=&r" (tmp), "+A" (*(__ptr32b))
> > + : "r" (__val), "r" (__mask)
> > + : "memory");
> > + break;
> > case 2:
> > - /* RISC-V doesn't have lr instructions on byte and half-word. */
> > - goto no_zawrs;
> > + __ptr32b = (u32 *)((ulong)(ptr) & ~0x3);
> > + __s = ((ulong)(ptr) & 0x2) * BITS_PER_BYTE;
> > + __val = val << __s;
> > + __mask = 0xff << __s;
> > +
> > + asm volatile(
> > + " lr.w %0, %1\n"
> > + " and %0, %0, %3\n"
> > + " xor %0, %0, %2\n"
> > + " bnez %0, 1f\n"
> > + ZAWRS_WRS_NTO "\n"
> > + "1:"
> > + : "=&r" (tmp), "+A" (*(__ptr32b))
> > + : "r" (__val), "r" (__mask)
> > + : "memory");
> > + break;
> > case 4:
> > asm volatile(
> > " lr.w %0, %1\n"
> > --
> > 2.40.1
> >
>
> Thanks,
> drew
--
Best Regards
Guo Ren
© 2016 - 2025 Red Hat, Inc.