From nobody Mon Dec 1 22:05:37 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D7AEB337BAF; Thu, 27 Nov 2025 15:49:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1764258573; cv=none; b=N6yyHeGmB5ykrqAiZTvRs/KIS7tnzUMwV/ceVSg4u5YoHvtzW3UEQF0zQCcTgm/oiSQNQw7MwZ4L4b16bVQF7czOZoPO+E8pfnmQqLak2XPH+cT54p7UgZuk7B4dCZvSOk3dr64jYtWNHDHOKSNuLvARGSP4pUAWxbxiwiI1fY8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1764258573; c=relaxed/simple; bh=F8slmda6vCjyF6xh0edwI5ZDt99WD9OHTx/+q2HuiJI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MSysD0AQUqzQfDW7t8AGLJVkcfOT86OBXXMuU/qmamh5u80U+EzL4dnUvAGGzjXgwB0alV7LBuTMMFPlYZT7Qd/kIlOYWTf6rj9bOClFfZAO4l6NvUVOpJfnnRbIhPvskXNdgD4cL0bsx0qC/8Q2EzBYGBkk08v4tz7WIikB/PA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 Received: by smtp.kernel.org (Postfix) with ESMTPSA id 09685C4CEF8; Thu, 27 Nov 2025 15:49:30 +0000 (UTC) From: Huacai Chen To: Huacai Chen Cc: loongarch@lists.linux.dev, Xuefeng Li , Guo Ren , Xuerui Wang , Jiaxun Yang , linux-kernel@vger.kernel.org, Huacai Chen , Arnd Bergmann Subject: [PATCH V4 01/14] LoongArch: Add atomic operations for 32BIT/64BIT Date: Thu, 27 Nov 2025 23:48:19 +0800 Message-ID: <20251127154832.137925-2-chenhuacai@loongson.cn> X-Mailer: git-send-email 2.47.3 In-Reply-To: <20251127154832.137925-1-chenhuacai@loongson.cn> References: <20251127154832.137925-1-chenhuacai@loongson.cn> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" LoongArch64 has both AMO and LL/SC instructions, while LoongArch32 only has LL/SC intstructions. So we add a Kconfig option CPU_HAS_AMO here and implement atomic operations (also including local operations and percpu operations) for both 32BIT and 64BIT platforms. Reviewed-by: Arnd Bergmann Signed-off-by: Jiaxun Yang Signed-off-by: Huacai Chen --- arch/loongarch/Kconfig | 4 + arch/loongarch/include/asm/atomic-amo.h | 206 +++++++++++++++++++++++ arch/loongarch/include/asm/atomic-llsc.h | 100 +++++++++++ arch/loongarch/include/asm/atomic.h | 197 ++-------------------- arch/loongarch/include/asm/cmpxchg.h | 48 ++++-- arch/loongarch/include/asm/local.h | 37 ++++ arch/loongarch/include/asm/percpu.h | 40 +++-- 7 files changed, 413 insertions(+), 219 deletions(-) create mode 100644 arch/loongarch/include/asm/atomic-amo.h create mode 100644 arch/loongarch/include/asm/atomic-llsc.h diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index a672f689cb03..730f34214519 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -568,6 +568,10 @@ config ARCH_STRICT_ALIGN to run kernel only on systems with h/w unaligned access support in order to optimise for performance. =20 +config CPU_HAS_AMO + bool + default 64BIT + config CPU_HAS_FPU bool default y diff --git a/arch/loongarch/include/asm/atomic-amo.h b/arch/loongarch/inclu= de/asm/atomic-amo.h new file mode 100644 index 000000000000..d5efa5252d56 --- /dev/null +++ b/arch/loongarch/include/asm/atomic-amo.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Atomic operations (AMO). + * + * Copyright (C) 2020-2025 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_ATOMIC_AMO_H +#define _ASM_ATOMIC_AMO_H + +#include +#include +#include + +#define ATOMIC_OP(op, I, asm_op) \ +static inline void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op".w" " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=3D&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \ +{ \ + int result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=3D&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op, I, asm_op, c_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) + +ATOMIC_OPS(add, i, add, +) +ATOMIC_OPS(sub, -i, add, +) + +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_add_return_acquire arch_atomic_add_return +#define arch_atomic_add_return_release arch_atomic_add_return +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_sub_return_acquire arch_atomic_sub_return +#define arch_atomic_sub_return_release arch_atomic_sub_return +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add +#define arch_atomic_fetch_add_release arch_atomic_fetch_add +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS + +#define ATOMIC_OPS(op, I, asm_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) + +ATOMIC_OPS(and, i, and) +ATOMIC_OPS(or, i, or) +ATOMIC_OPS(xor, i, xor) + +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and +#define arch_atomic_fetch_and_release arch_atomic_fetch_and +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or +#define arch_atomic_fetch_or_release arch_atomic_fetch_or +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#ifdef CONFIG_64BIT + +#define ATOMIC64_OP(op, I, asm_op) \ +static inline void arch_atomic64_##op(long i, atomic64_t *v) \ +{ \ + __asm__ __volatile__( \ + "am"#asm_op".d " " $zero, %1, %0 \n" \ + : "+ZB" (v->counter) \ + : "r" (I) \ + : "memory"); \ +} + +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t = *v) \ +{ \ + long result; \ + __asm__ __volatile__( \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=3D&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result c_op I; \ +} + +#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v)= \ +{ \ + long result; \ + \ + __asm__ __volatile__( \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ + : "+ZB" (v->counter), "=3D&r" (result) \ + : "r" (I) \ + : "memory"); \ + \ + return result; \ +} + +#define ATOMIC64_OPS(op, I, asm_op, c_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) + +ATOMIC64_OPS(add, i, add, +) +ATOMIC64_OPS(sub, -i, add, +) + +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_add_return_acquire arch_atomic64_add_return +#define arch_atomic64_add_return_release arch_atomic64_add_return +#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return +#define arch_atomic64_sub_return_release arch_atomic64_sub_return +#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed + +#undef ATOMIC64_OPS + +#define ATOMIC64_OPS(op, I, asm_op) \ + ATOMIC64_OP(op, I, asm_op) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) + +ATOMIC64_OPS(and, i, and) +ATOMIC64_OPS(or, i, or) +ATOMIC64_OPS(xor, i, xor) + +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +#endif + +#endif /* _ASM_ATOMIC_AMO_H */ diff --git a/arch/loongarch/include/asm/atomic-llsc.h b/arch/loongarch/incl= ude/asm/atomic-llsc.h new file mode 100644 index 000000000000..b4f5670b85cf --- /dev/null +++ b/arch/loongarch/include/asm/atomic-llsc.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Atomic operations (LLSC). + * + * Copyright (C) 2024-2025 Loongson Technology Corporation Limited + */ + +#ifndef _ASM_ATOMIC_LLSC_H +#define _ASM_ATOMIC_LLSC_H + +#include +#include +#include + +#define ATOMIC_OP(op, I, asm_op) \ +static inline void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + int temp; \ + \ + __asm__ __volatile__( \ + "1: ll.w %0, %1 #atomic_" #op " \n" \ + " " #asm_op " %0, %0, %2 \n" \ + " sc.w %0, %1 \n" \ + " beq %0, $r0, 1b \n" \ + :"=3D&r" (temp) , "+ZB"(v->counter) \ + :"r" (I) \ + ); \ +} + +#define ATOMIC_OP_RETURN(op, I, asm_op) \ +static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + __asm__ __volatile__( \ + "1: ll.w %1, %2 # atomic_" #op "_return \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc.w %0, %2 \n" \ + " beq %0, $r0 ,1b \n" \ + " " #asm_op " %0, %1, %3 \n" \ + : "=3D&r" (result), "=3D&r" (temp), "+ZB"(v->counter) \ + : "r" (I)); \ + \ + return result; \ +} + +#define ATOMIC_FETCH_OP(op, I, asm_op) \ +static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + int result, temp; \ + \ + __asm__ __volatile__( \ + "1: ll.w %1, %2 # atomic_fetch_" #op " \n" \ + " " #asm_op " %0, %1, %3 \n" \ + " sc.w %0, %2 \n" \ + " beq %0, $r0 ,1b \n" \ + " add.w %0, %1 ,$r0 \n" \ + : "=3D&r" (result), "=3D&r" (temp), "+ZB" (v->counter) \ + : "r" (I)); \ + \ + return result; \ +} + +#define ATOMIC_OPS(op,I ,asm_op, c_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_OP_RETURN(op, I , asm_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(add, i , add.w ,+=3D) +ATOMIC_OPS(sub, -i , add.w ,+=3D) + +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed + +#undef ATOMIC_OPS + +#define ATOMIC_OPS(op, I, asm_op) \ + ATOMIC_OP(op, I, asm_op) \ + ATOMIC_FETCH_OP(op, I, asm_op) + +ATOMIC_OPS(and, i, and) +ATOMIC_OPS(or, i, or) +ATOMIC_OPS(xor, i, xor) + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#ifdef CONFIG_64BIT +#error "64-bit LLSC atomic operations are not supported" +#endif + +#endif /* _ASM_ATOMIC_LLSC_H */ diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/a= sm/atomic.h index c86f0ab922ec..444b9ddcd004 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -11,6 +11,16 @@ #include #include =20 +#ifdef CONFIG_CPU_HAS_AMO +#include +#else +#include +#endif + +#ifdef CONFIG_GENERIC_ATOMIC64 +#include +#endif + #if __SIZEOF_LONG__ =3D=3D 4 #define __LL "ll.w " #define __SC "sc.w " @@ -34,100 +44,6 @@ #define arch_atomic_read(v) READ_ONCE((v)->counter) #define arch_atomic_set(v, i) WRITE_ONCE((v)->counter, (i)) =20 -#define ATOMIC_OP(op, I, asm_op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ -{ \ - __asm__ __volatile__( \ - "am"#asm_op".w" " $zero, %1, %0 \n" \ - : "+ZB" (v->counter) \ - : "r" (I) \ - : "memory"); \ -} - -#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ -static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \ -{ \ - int result; \ - \ - __asm__ __volatile__( \ - "am"#asm_op#mb".w" " %1, %2, %0 \n" \ - : "+ZB" (v->counter), "=3D&r" (result) \ - : "r" (I) \ - : "memory"); \ - \ - return result c_op I; \ -} - -#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ -static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \ -{ \ - int result; \ - \ - __asm__ __volatile__( \ - "am"#asm_op#mb".w" " %1, %2, %0 \n" \ - : "+ZB" (v->counter), "=3D&r" (result) \ - : "r" (I) \ - : "memory"); \ - \ - return result; \ -} - -#define ATOMIC_OPS(op, I, asm_op, c_op) \ - ATOMIC_OP(op, I, asm_op) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ - ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ - ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) - -ATOMIC_OPS(add, i, add, +) -ATOMIC_OPS(sub, -i, add, +) - -#define arch_atomic_add_return arch_atomic_add_return -#define arch_atomic_add_return_acquire arch_atomic_add_return -#define arch_atomic_add_return_release arch_atomic_add_return -#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed -#define arch_atomic_sub_return arch_atomic_sub_return -#define arch_atomic_sub_return_acquire arch_atomic_sub_return -#define arch_atomic_sub_return_release arch_atomic_sub_return -#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed -#define arch_atomic_fetch_add arch_atomic_fetch_add -#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add -#define arch_atomic_fetch_add_release arch_atomic_fetch_add -#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed -#define arch_atomic_fetch_sub arch_atomic_fetch_sub -#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub -#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub -#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed - -#undef ATOMIC_OPS - -#define ATOMIC_OPS(op, I, asm_op) \ - ATOMIC_OP(op, I, asm_op) \ - ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ - ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) - -ATOMIC_OPS(and, i, and) -ATOMIC_OPS(or, i, or) -ATOMIC_OPS(xor, i, xor) - -#define arch_atomic_fetch_and arch_atomic_fetch_and -#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and -#define arch_atomic_fetch_and_release arch_atomic_fetch_and -#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed -#define arch_atomic_fetch_or arch_atomic_fetch_or -#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or -#define arch_atomic_fetch_or_release arch_atomic_fetch_or -#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed -#define arch_atomic_fetch_xor arch_atomic_fetch_xor -#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor -#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor -#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed - -#undef ATOMIC_OPS -#undef ATOMIC_FETCH_OP -#undef ATOMIC_OP_RETURN -#undef ATOMIC_OP - static inline int arch_atomic_fetch_add_unless(atomic_t *v, int a, int u) { int prev, rc; @@ -194,99 +110,6 @@ static inline int arch_atomic_sub_if_positive(int i, a= tomic_t *v) #define arch_atomic64_read(v) READ_ONCE((v)->counter) #define arch_atomic64_set(v, i) WRITE_ONCE((v)->counter, (i)) =20 -#define ATOMIC64_OP(op, I, asm_op) \ -static inline void arch_atomic64_##op(long i, atomic64_t *v) \ -{ \ - __asm__ __volatile__( \ - "am"#asm_op".d " " $zero, %1, %0 \n" \ - : "+ZB" (v->counter) \ - : "r" (I) \ - : "memory"); \ -} - -#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ -static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t = *v) \ -{ \ - long result; \ - __asm__ __volatile__( \ - "am"#asm_op#mb".d " " %1, %2, %0 \n" \ - : "+ZB" (v->counter), "=3D&r" (result) \ - : "r" (I) \ - : "memory"); \ - \ - return result c_op I; \ -} - -#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ -static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v)= \ -{ \ - long result; \ - \ - __asm__ __volatile__( \ - "am"#asm_op#mb".d " " %1, %2, %0 \n" \ - : "+ZB" (v->counter), "=3D&r" (result) \ - : "r" (I) \ - : "memory"); \ - \ - return result; \ -} - -#define ATOMIC64_OPS(op, I, asm_op, c_op) \ - ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ - ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ - ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) - -ATOMIC64_OPS(add, i, add, +) -ATOMIC64_OPS(sub, -i, add, +) - -#define arch_atomic64_add_return arch_atomic64_add_return -#define arch_atomic64_add_return_acquire arch_atomic64_add_return -#define arch_atomic64_add_return_release arch_atomic64_add_return -#define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed -#define arch_atomic64_sub_return arch_atomic64_sub_return -#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return -#define arch_atomic64_sub_return_release arch_atomic64_sub_return -#define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed -#define arch_atomic64_fetch_add arch_atomic64_fetch_add -#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add -#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add -#define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed -#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub -#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub -#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub -#define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed - -#undef ATOMIC64_OPS - -#define ATOMIC64_OPS(op, I, asm_op) \ - ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ - ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) - -ATOMIC64_OPS(and, i, and) -ATOMIC64_OPS(or, i, or) -ATOMIC64_OPS(xor, i, xor) - -#define arch_atomic64_fetch_and arch_atomic64_fetch_and -#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and -#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and -#define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed -#define arch_atomic64_fetch_or arch_atomic64_fetch_or -#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or -#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or -#define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed -#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor -#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor -#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor -#define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed - -#undef ATOMIC64_OPS -#undef ATOMIC64_FETCH_OP -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP - static inline long arch_atomic64_fetch_add_unless(atomic64_t *v, long a, l= ong u) { long prev, rc; diff --git a/arch/loongarch/include/asm/cmpxchg.h b/arch/loongarch/include/= asm/cmpxchg.h index 979fde61bba8..0494c2ab553e 100644 --- a/arch/loongarch/include/asm/cmpxchg.h +++ b/arch/loongarch/include/asm/cmpxchg.h @@ -9,17 +9,33 @@ #include #include =20 -#define __xchg_asm(amswap_db, m, val) \ +#define __xchg_amo_asm(amswap_db, m, val) \ ({ \ - __typeof(val) __ret; \ + __typeof(val) __ret; \ \ - __asm__ __volatile__ ( \ - " "amswap_db" %1, %z2, %0 \n" \ - : "+ZB" (*m), "=3D&r" (__ret) \ - : "Jr" (val) \ - : "memory"); \ + __asm__ __volatile__ ( \ + " "amswap_db" %1, %z2, %0 \n" \ + : "+ZB" (*m), "=3D&r" (__ret) \ + : "Jr" (val) \ + : "memory"); \ \ - __ret; \ + __ret; \ +}) + +#define __xchg_llsc_asm(ld, st, m, val) \ +({ \ + __typeof(val) __ret, __tmp; \ + \ + asm volatile ( \ + "1: ll.w %0, %3 \n" \ + " move %1, %z4 \n" \ + " sc.w %1, %2 \n" \ + " beqz %1, 1b \n" \ + : "=3D&r" (__ret), "=3D&r" (__tmp), "=3DZC" (*m) \ + : "ZC" (*m), "Jr" (val) \ + : "memory"); \ + \ + __ret; \ }) =20 static inline unsigned int __xchg_small(volatile void *ptr, unsigned int v= al, @@ -67,13 +83,23 @@ __arch_xchg(volatile void *ptr, unsigned long x, int si= ze) switch (size) { case 1: case 2: - return __xchg_small(ptr, x, size); + return __xchg_small((volatile void *)ptr, x, size); =20 case 4: - return __xchg_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); +#ifdef CONFIG_CPU_HAS_AMO + return __xchg_amo_asm("amswap_db.w", (volatile u32 *)ptr, (u32)x); +#else + return __xchg_llsc_asm("ll.w", "sc.w", (volatile u32 *)ptr, (u32)x); +#endif /* CONFIG_CPU_HAS_AMO */ =20 +#ifdef CONFIG_64BIT case 8: - return __xchg_asm("amswap_db.d", (volatile u64 *)ptr, (u64)x); +#ifdef CONFIG_CPU_HAS_AMO + return __xchg_amo_asm("amswap_db.d", (volatile u64 *)ptr, (u64)x); +#else + return __xchg_llsc_asm("ll.d", "sc.d", (volatile u64 *)ptr, (u64)x); +#endif /* CONFIG_CPU_HAS_AMO */ +#endif /* CONFIG_64BIT */ =20 default: BUILD_BUG(); diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/as= m/local.h index f53ea653af76..65ace8e4350a 100644 --- a/arch/loongarch/include/asm/local.h +++ b/arch/loongarch/include/asm/local.h @@ -8,6 +8,7 @@ #include #include #include +#include #include =20 typedef struct { @@ -27,6 +28,7 @@ typedef struct { /* * Same as above, but return the result value */ +#ifdef CONFIG_CPU_HAS_AMO static inline long local_add_return(long i, local_t *l) { unsigned long result; @@ -55,6 +57,41 @@ static inline long local_sub_return(long i, local_t *l) =20 return result; } +#else +static inline long local_add_return(long i, local_t * l) +{ + unsigned long result, temp; + + __asm__ __volatile__( + "1:" __LL "%1, %2 # local_add_return \n" + __stringify(LONG_ADD) " %0, %1, %3 \n" + __SC "%0, %2 \n" + " beq %0, $r0, 1b \n" + __stringify(LONG_ADD) " %0, %1, %3 \n" + : "=3D&r" (result), "=3D&r" (temp), "=3Dm" (l->a.counter) + : "r" (i), "m" (l->a.counter) + : "memory"); + + return result; +} + +static inline long local_sub_return(long i, local_t * l) +{ + unsigned long result, temp; + + __asm__ __volatile__( + "1:" __LL "%1, %2 # local_sub_return \n" + __stringify(LONG_SUB) " %0, %1, %3 \n" + __SC "%0, %2 \n" + " beq %0, $r0, 1b \n" + __stringify(LONG_SUB) " %0, %1, %3 \n" + : "=3D&r" (result), "=3D&r" (temp), "=3Dm" (l->a.counter) + : "r" (i), "m" (l->a.counter) + : "memory"); + + return result; +} +#endif =20 static inline long local_cmpxchg(local_t *l, long old, long new) { diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/a= sm/percpu.h index 87be9b14e9da..1619c1d15e6b 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -36,6 +36,8 @@ static inline void set_my_cpu_offset(unsigned long off) __my_cpu_offset; \ }) =20 +#ifdef CONFIG_CPU_HAS_AMO + #define PERCPU_OP(op, asm_op, c_op) \ static __always_inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ @@ -68,25 +70,9 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP =20 -static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned lon= g val, int size) -{ - switch (size) { - case 1: - case 2: - return __xchg_small((volatile void *)ptr, val, size); - - case 4: - return __xchg_asm("amswap.w", (volatile u32 *)ptr, (u32)val); - - case 8: - return __xchg_asm("amswap.d", (volatile u64 *)ptr, (u64)val); +#endif =20 - default: - BUILD_BUG(); - } - - return 0; -} +#ifdef CONFIG_64BIT =20 #define __pcpu_op_1(op) op ".b " #define __pcpu_op_2(op) op ".h " @@ -115,6 +101,10 @@ do { \ : "memory"); \ } while (0) =20 +#endif + +#define __percpu_xchg __arch_xchg + /* this_cpu_cmpxchg */ #define _protect_cmpxchg_local(pcp, o, n) \ ({ \ @@ -135,6 +125,8 @@ do { \ __retval; \ }) =20 +#ifdef CONFIG_CPU_HAS_AMO + #define _percpu_add(pcp, val) \ _pcp_protect(__percpu_add, pcp, val) =20 @@ -146,9 +138,6 @@ do { \ #define _percpu_or(pcp, val) \ _pcp_protect(__percpu_or, pcp, val) =20 -#define _percpu_xchg(pcp, val) ((typeof(pcp)) \ - _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))) - #define this_cpu_add_4(pcp, val) _percpu_add(pcp, val) #define this_cpu_add_8(pcp, val) _percpu_add(pcp, val) =20 @@ -161,6 +150,10 @@ do { \ #define this_cpu_or_4(pcp, val) _percpu_or(pcp, val) #define this_cpu_or_8(pcp, val) _percpu_or(pcp, val) =20 +#endif + +#ifdef CONFIG_64BIT + #define this_cpu_read_1(pcp) _percpu_read(1, pcp) #define this_cpu_read_2(pcp) _percpu_read(2, pcp) #define this_cpu_read_4(pcp) _percpu_read(4, pcp) @@ -171,6 +164,11 @@ do { \ #define this_cpu_write_4(pcp, val) _percpu_write(4, pcp, val) #define this_cpu_write_8(pcp, val) _percpu_write(8, pcp, val) =20 +#endif + +#define _percpu_xchg(pcp, val) ((typeof(pcp)) \ + _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))) + #define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val) #define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val) --=20 2.47.3