[PATCH v10 loongarch-next 2/3] LoongArch: Add 128-bit atomic cmpxchg support

George Guo posted 3 patches 3 weeks, 6 days ago
[PATCH v10 loongarch-next 2/3] LoongArch: Add 128-bit atomic cmpxchg support
Posted by George Guo 3 weeks, 6 days ago
From: George Guo <guodongtai@kylinos.cn>

Implement 128-bit atomic compare-and-exchange using LoongArch's
LL.D/SC.Q instructions.

At the same time, fix BPF scheduler test failures (scx_central scx_qmap)
caused by kmalloc_nolock_noprof returning NULL due to missing
128-bit atomics. The NULL returns led to -ENOMEM errors during
scheduler initialization, causing test cases to fail.

Verified by testing with the scx_qmap scheduler (located in
tools/sched_ext/). Building with `make` and running
./tools/sched_ext/build/bin/scx_qmap.

Link: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/commit/?id=5fb750e8a9ae
Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: George Guo <guodongtai@kylinos.cn>
---
 arch/loongarch/Kconfig               |  2 ++
 arch/loongarch/include/asm/cmpxchg.h | 48 ++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 730f34214519..f9845ebec1a4 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -114,6 +114,7 @@ config LOONGARCH
 	select GENERIC_TIME_VSYSCALL
 	select GPIOLIB
 	select HAS_IOPORT
+	select HAVE_ALIGNED_STRUCT_PAGE
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_BITREVERSE
 	select HAVE_ARCH_JUMP_LABEL
@@ -130,6 +131,7 @@ config LOONGARCH
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
 	select HAVE_ASM_MODVERSIONS
+	select HAVE_CMPXCHG_DOUBLE
 	select HAVE_CONTEXT_TRACKING_USER
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
index 0494c2ab553e..d25e25d8fc9e 100644
--- a/arch/loongarch/include/asm/cmpxchg.h
+++ b/arch/loongarch/include/asm/cmpxchg.h
@@ -8,6 +8,7 @@
 #include <linux/bits.h>
 #include <linux/build_bug.h>
 #include <asm/barrier.h>
+#include <asm/cpu-features.h>
 
 #define __xchg_amo_asm(amswap_db, m, val)	\
 ({						\
@@ -137,6 +138,44 @@ __arch_xchg(volatile void *ptr, unsigned long x, int size)
 	__ret;								\
 })
 
+union __u128_halves {
+	u128 full;
+	struct {
+		u64 low;
+		u64 high;
+	};
+};
+
+#define __arch_cmpxchg128(ptr, old, new)					\
+({									\
+	union __u128_halves __old, __new, __ret;			\
+	volatile u64 *__ptr = (volatile u64 *)(ptr);			\
+									\
+	__old.full = (old);                                             \
+	__new.full = (new);						\
+									\
+	__asm__ __volatile__(						\
+	"1:   ll.d    %0, %3		# 128-bit cmpxchg low	\n"	\
+	__WEAK_LLSC_MB							\
+	"     ld.d    %1, %4		# 128-bit cmpxchg high	\n"	\
+	"     bne     %0, %z5, 2f				\n"	\
+	"     bne     %1, %z6, 2f				\n"	\
+	"     move    $t0, %z7					\n"	\
+	"     move    $t1, %z8					\n"	\
+	"     sc.q    $t0, $t1, %2				\n"	\
+	"     beqz    $t0, 1b					\n"	\
+	"2:							\n"	\
+	__WEAK_LLSC_MB							\
+	: "=&r" (__ret.low), "=&r" (__ret.high)				\
+	: "r" (__ptr),							\
+	  "ZC" (__ptr[0]), "m" (__ptr[1]),				\
+	  "Jr" (__old.low), "Jr" (__old.high),				\
+	  "Jr" (__new.low), "Jr" (__new.high)				\
+	: "t0", "t1", "memory");					\
+									\
+	__ret.full;							\
+})
+
 static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
 					   unsigned int new, unsigned int size)
 {
@@ -224,6 +263,15 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int
 	__res;								\
 })
 
+/* cmpxchg128 */
+#define system_has_cmpxchg128()		(cpu_has_scq)
+
+#define arch_cmpxchg128(ptr, o, n)					\
+({									\
+	BUILD_BUG_ON(sizeof(*(ptr)) != 16);				\
+	__arch_cmpxchg128(ptr, o, n);					\
+})
+
 #ifdef CONFIG_64BIT
 #define arch_cmpxchg64_local(ptr, o, n)					\
   ({									\
-- 
2.49.0
Re: [PATCH v10 loongarch-next 2/3] LoongArch: Add 128-bit atomic cmpxchg support
Posted by Huacai Chen 1 week, 3 days ago
Applied and add arch_cmpxchg128_local(), you can check whether everthing is OK.
https://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git/commit/?h=loongarch-next&id=09a3171203974a30c6d0ab66f8ef0a6d9e7e780e

Huacai

On Sat, Jan 10, 2026 at 9:11 PM George Guo <dongtai.guo@linux.dev> wrote:
>
> From: George Guo <guodongtai@kylinos.cn>
>
> Implement 128-bit atomic compare-and-exchange using LoongArch's
> LL.D/SC.Q instructions.
>
> At the same time, fix BPF scheduler test failures (scx_central scx_qmap)
> caused by kmalloc_nolock_noprof returning NULL due to missing
> 128-bit atomics. The NULL returns led to -ENOMEM errors during
> scheduler initialization, causing test cases to fail.
>
> Verified by testing with the scx_qmap scheduler (located in
> tools/sched_ext/). Building with `make` and running
> ./tools/sched_ext/build/bin/scx_qmap.
>
> Link: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git/commit/?id=5fb750e8a9ae
> Acked-by: Hengqi Chen <hengqi.chen@gmail.com>
> Tested-by: Hengqi Chen <hengqi.chen@gmail.com>
> Signed-off-by: George Guo <guodongtai@kylinos.cn>
> ---
>  arch/loongarch/Kconfig               |  2 ++
>  arch/loongarch/include/asm/cmpxchg.h | 48 ++++++++++++++++++++++++++++
>  2 files changed, 50 insertions(+)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 730f34214519..f9845ebec1a4 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -114,6 +114,7 @@ config LOONGARCH
>         select GENERIC_TIME_VSYSCALL
>         select GPIOLIB
>         select HAS_IOPORT
> +       select HAVE_ALIGNED_STRUCT_PAGE
>         select HAVE_ARCH_AUDITSYSCALL
>         select HAVE_ARCH_BITREVERSE
>         select HAVE_ARCH_JUMP_LABEL
> @@ -130,6 +131,7 @@ config LOONGARCH
>         select HAVE_ARCH_TRANSPARENT_HUGEPAGE
>         select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
>         select HAVE_ASM_MODVERSIONS
> +       select HAVE_CMPXCHG_DOUBLE
>         select HAVE_CONTEXT_TRACKING_USER
>         select HAVE_C_RECORDMCOUNT
>         select HAVE_DEBUG_KMEMLEAK
> diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/asm/cmpxchg.h
> index 0494c2ab553e..d25e25d8fc9e 100644
> --- a/arch/loongarch/include/asm/cmpxchg.h
> +++ b/arch/loongarch/include/asm/cmpxchg.h
> @@ -8,6 +8,7 @@
>  #include <linux/bits.h>
>  #include <linux/build_bug.h>
>  #include <asm/barrier.h>
> +#include <asm/cpu-features.h>
>
>  #define __xchg_amo_asm(amswap_db, m, val)      \
>  ({                                             \
> @@ -137,6 +138,44 @@ __arch_xchg(volatile void *ptr, unsigned long x, int size)
>         __ret;                                                          \
>  })
>
> +union __u128_halves {
> +       u128 full;
> +       struct {
> +               u64 low;
> +               u64 high;
> +       };
> +};
> +
> +#define __arch_cmpxchg128(ptr, old, new)                                       \
> +({                                                                     \
> +       union __u128_halves __old, __new, __ret;                        \
> +       volatile u64 *__ptr = (volatile u64 *)(ptr);                    \
> +                                                                       \
> +       __old.full = (old);                                             \
> +       __new.full = (new);                                             \
> +                                                                       \
> +       __asm__ __volatile__(                                           \
> +       "1:   ll.d    %0, %3            # 128-bit cmpxchg low   \n"     \
> +       __WEAK_LLSC_MB                                                  \
> +       "     ld.d    %1, %4            # 128-bit cmpxchg high  \n"     \
> +       "     bne     %0, %z5, 2f                               \n"     \
> +       "     bne     %1, %z6, 2f                               \n"     \
> +       "     move    $t0, %z7                                  \n"     \
> +       "     move    $t1, %z8                                  \n"     \
> +       "     sc.q    $t0, $t1, %2                              \n"     \
> +       "     beqz    $t0, 1b                                   \n"     \
> +       "2:                                                     \n"     \
> +       __WEAK_LLSC_MB                                                  \
> +       : "=&r" (__ret.low), "=&r" (__ret.high)                         \
> +       : "r" (__ptr),                                                  \
> +         "ZC" (__ptr[0]), "m" (__ptr[1]),                              \
> +         "Jr" (__old.low), "Jr" (__old.high),                          \
> +         "Jr" (__new.low), "Jr" (__new.high)                           \
> +       : "t0", "t1", "memory");                                        \
> +                                                                       \
> +       __ret.full;                                                     \
> +})
> +
>  static inline unsigned int __cmpxchg_small(volatile void *ptr, unsigned int old,
>                                            unsigned int new, unsigned int size)
>  {
> @@ -224,6 +263,15 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, unsigned int
>         __res;                                                          \
>  })
>
> +/* cmpxchg128 */
> +#define system_has_cmpxchg128()                (cpu_has_scq)
> +
> +#define arch_cmpxchg128(ptr, o, n)                                     \
> +({                                                                     \
> +       BUILD_BUG_ON(sizeof(*(ptr)) != 16);                             \
> +       __arch_cmpxchg128(ptr, o, n);                                   \
> +})
> +
>  #ifdef CONFIG_64BIT
>  #define arch_cmpxchg64_local(ptr, o, n)                                        \
>    ({                                                                   \
> --
> 2.49.0
>
>