From nobody Thu May 14 10:24:08 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7001CC433EF for ; Mon, 11 Apr 2022 14:52:15 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1347526AbiDKOy1 (ORCPT ); Mon, 11 Apr 2022 10:54:27 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56564 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S241348AbiDKOyW (ORCPT ); Mon, 11 Apr 2022 10:54:22 -0400 Received: from ams.source.kernel.org (ams.source.kernel.org [IPv6:2604:1380:4601:e00::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id ECF281B9; Mon, 11 Apr 2022 07:52:07 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ams.source.kernel.org (Postfix) with ESMTPS id A27BEB81649; Mon, 11 Apr 2022 14:52:06 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4570CC385AA; Mon, 11 Apr 2022 14:52:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1649688725; bh=7Herwyg859Hm73dHIXsR7AzUXkxBAZbVYjJ8KndKbEM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=bMKZFyWcYa5eVqFZ4MEbslDV47qw5MoxOzKnY0cLLPg4s/iqGYYJqLKIMwOggfFha N+jImHG9ackDKQuvf06YoeVarLm9yd3FQykaO1yZJtLKY5JT3bssvCUbtuedCy0mbq 0RG3q5Ptb8JC6OXVkBAWfwM+8YwIcsryEDwFq8XX3kYCFRjoNuQHxhOVvSnVTWDKjp 85jwidY1YWNoIoHSX5LgBiMQTLEfAIUJUmgD7q7/3IueIpro+6Ike2SJ8lhk0RUbsc g2vfEyUUJk1qnQK8mNRt/z972Jy7Ho9DySakr5VUcDLYfZMRsBPzVgYvKnQSzDeFsh Cth36Iz5bWbeQ== From: guoren@kernel.org To: guoren@kernel.org, arnd@arndb.de, mark.rutland@arm.com Cc: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org, linux-csky@vger.kernel.org, Guo Ren Subject: [PATCH V2 1/2] csky: cmpxchg: Optimize with acquire & release Date: Mon, 11 Apr 2022 22:51:45 +0800 Message-Id: <20220411145146.920314-2-guoren@kernel.org> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220411145146.920314-1-guoren@kernel.org> References: <20220411145146.920314-1-guoren@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Guo Ren Optimize arch_xchg|cmpxchg|cmpxchg_local with ASM acquire|release instructions instead of previous C based. Important reference comment by Rutland: 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for full barrier semantics") Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=3DLbAdu7jntZRUa=3D-= dJwL0VfmDfBV5MHB=3DrcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f= 8a181d6c3 Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Mark Rutland --- Changes in V2: - Fixup use of acquire + release for barrier semantics by Rutland. --- arch/csky/include/asm/barrier.h | 8 +++-- arch/csky/include/asm/cmpxchg.h | 61 +++++++++++++++++++++++++++++---- 2 files changed, 60 insertions(+), 9 deletions(-) diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrie= r.h index f4045dd53e17..a075f17d02dd 100644 --- a/arch/csky/include/asm/barrier.h +++ b/arch/csky/include/asm/barrier.h @@ -37,6 +37,9 @@ * bar.brar * bar.bwaw */ +#define ACQUIRE_FENCE ".long 0x8427c000\n" +#define RELEASE_FENCE ".long 0x842ec000\n" + #define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory") #define __bar_br() asm volatile (".long 0x8424c000\n":::"memory") #define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory") @@ -44,10 +47,10 @@ #define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory") #define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory") #define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory") -#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory") +#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory") #define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory") #define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory") -#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory") +#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory") #define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory") #define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory") #define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory") @@ -56,7 +59,6 @@ #define __smp_rmb() __bar_brar() #define __smp_wmb() __bar_bwaw() =20 -#define ACQUIRE_FENCE ".long 0x8427c000\n" #define __smp_acquire_fence() __bar_brarw() #define __smp_release_fence() __bar_brwaw() =20 diff --git a/arch/csky/include/asm/cmpxchg.h b/arch/csky/include/asm/cmpxch= g.h index d1bef11f8dc9..1a6f2f445c12 100644 --- a/arch/csky/include/asm/cmpxchg.h +++ b/arch/csky/include/asm/cmpxchg.h @@ -30,10 +30,36 @@ extern void __bad_xchg(void); } \ __ret; \ }) - #define arch_xchg_relaxed(ptr, x) \ (__xchg_relaxed((x), (ptr), sizeof(*(ptr)))) =20 +#define __xchg(new, ptr, size) \ +({ \ + __typeof__(ptr) __ptr =3D (ptr); \ + __typeof__(new) __new =3D (new); \ + __typeof__(*(ptr)) __ret; \ + unsigned long tmp; \ + switch (size) { \ + case 4: \ + asm volatile ( \ + "1: ldex.w %0, (%3) \n" \ + " mov %1, %2 \n" \ + RELEASE_FENCE \ + " stex.w %1, (%3) \n" \ + " bez %1, 1b \n" \ + : "=3D&r" (__ret), "=3D&r" (tmp) \ + : "r" (__new), "r"(__ptr) \ + :); \ + __smp_mb(); \ + break; \ + default: \ + __bad_xchg(); \ + } \ + __ret; \ +}) +#define arch_xchg(ptr, x) \ + (__xchg((x), (ptr), sizeof(*(ptr)))) + #define __cmpxchg_relaxed(ptr, old, new, size) \ ({ \ __typeof__(ptr) __ptr =3D (ptr); \ @@ -60,19 +86,42 @@ extern void __bad_xchg(void); } \ __ret; \ }) - #define arch_cmpxchg_relaxed(ptr, o, n) \ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) =20 -#define arch_cmpxchg(ptr, o, n) \ +#define __cmpxchg(ptr, old, new, size) \ ({ \ + __typeof__(ptr) __ptr =3D (ptr); \ + __typeof__(new) __new =3D (new); \ + __typeof__(new) __tmp; \ + __typeof__(old) __old =3D (old); \ __typeof__(*(ptr)) __ret; \ - __smp_release_fence(); \ - __ret =3D arch_cmpxchg_relaxed(ptr, o, n); \ - __smp_acquire_fence(); \ + switch (size) { \ + case 4: \ + asm volatile ( \ + "1: ldex.w %0, (%3) \n" \ + " cmpne %0, %4 \n" \ + " bt 2f \n" \ + " mov %1, %2 \n" \ + RELEASE_FENCE \ + " stex.w %1, (%3) \n" \ + " bez %1, 1b \n" \ + "2: \n" \ + : "=3D&r" (__ret), "=3D&r" (__tmp) \ + : "r" (__new), "r"(__ptr), "r"(__old) \ + :); \ + __smp_mb(); \ + break; \ + default: \ + __bad_xchg(); \ + } \ __ret; \ }) +#define arch_cmpxchg(ptr, o, n) \ + (__cmpxchg((ptr), (o), (n), sizeof(*(ptr)))) =20 +#define arch_cmpxchg_local(ptr, o, n) \ + (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr)))) #else #include #endif --=20 2.25.1 From nobody Thu May 14 10:24:08 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CEC51C433F5 for ; Mon, 11 Apr 2022 14:52:18 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1347535AbiDKOya (ORCPT ); Mon, 11 Apr 2022 10:54:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56588 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1347512AbiDKOyX (ORCPT ); Mon, 11 Apr 2022 10:54:23 -0400 Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E79D425C59; Mon, 11 Apr 2022 07:52:08 -0700 (PDT) Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by dfw.source.kernel.org (Postfix) with ESMTPS id 70E33614E6; Mon, 11 Apr 2022 14:52:08 +0000 (UTC) Received: by smtp.kernel.org (Postfix) with ESMTPSA id D401EC385A3; Mon, 11 Apr 2022 14:52:05 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1649688727; bh=Xp4ImSPeb4qq8tniBlGX8vZ2MFgbMYplDHWI2bqxnkg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YmkUiwXvapVmxGSNyBvfQjv4TwbHGUcnjk4WETPxl9xcN0UBbZobAqe7Gpqm0Hnab EcMYO57RZicU4J8cmn1BhCrUrvSSckwVfmUhShBQWH60BM2sA2I3zAZChZfjPfwFQP qd753P1n3EMlzukt/SAajseHpXxZVfIx4u5hatQapwRZBtrckqVEU2CDsLCLEWzc/y gJHKxq+dP3VmraCG2LNwLtfsO0A8aYSy5EmZVq3EGjjlfphXC9NnSXzTlQUIO4VBcf dPP0d5HMwE7ZjXdcaJqxM2D0/PDjEHyMyNO5zOWpyMcoQk5sychnTD+06L+nhL5Ysd 7EFpn+bIsJqzA== From: guoren@kernel.org To: guoren@kernel.org, arnd@arndb.de, mark.rutland@arm.com Cc: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org, linux-csky@vger.kernel.org, Guo Ren Subject: [PATCH V2 2/2] csky: atomic: Add custom atomic.h implementation Date: Mon, 11 Apr 2022 22:51:46 +0800 Message-Id: <20220411145146.920314-3-guoren@kernel.org> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20220411145146.920314-1-guoren@kernel.org> References: <20220411145146.920314-1-guoren@kernel.org> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Guo Ren The generic atomic.h used cmpxchg to implement the atomic operations, it will cause daul loop to reduce the forward guarantee. The patch implement csky custom atomic operations with ldex/stex instructions for the best performance. Important reference comment by Rutland: 8e86f0b409a4 ("arm64: atomics: fix use of acquire + release for full barrier semantics") Link: https://lore.kernel.org/linux-riscv/CAJF2gTSAxpAi=3DLbAdu7jntZRUa=3D-= dJwL0VfmDfBV5MHB=3DrcZ-w@mail.gmail.com/T/#m27a0f1342995deae49ce1d0e1f2683f= 8a181d6c3 Signed-off-by: Guo Ren Signed-off-by: Guo Ren Cc: Mark Rutland --- Changes in V2: - Fixup use of acquire + release for barrier semantics by Rutland. --- arch/csky/include/asm/atomic.h | 130 +++++++++++++++++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 arch/csky/include/asm/atomic.h diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h new file mode 100644 index 000000000000..2e1a22f55ea1 --- /dev/null +++ b/arch/csky/include/asm/atomic.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_CSKY_ATOMIC_H +#define __ASM_CSKY_ATOMIC_H + +#ifdef CONFIG_SMP +# include + +#include +#include + +#define __atomic_acquire_fence() __smp_acquire_fence() + +#define __atomic_release_fence() __smp_release_fence() + +static __always_inline int arch_atomic_read(const atomic_t *v) +{ + return READ_ONCE(v->counter); +} +static __always_inline void arch_atomic_set(atomic_t *v, int i) +{ + WRITE_ONCE(v->counter, i); +} + +#define ATOMIC_OP(op, asm_op, I) \ +static __always_inline \ +void arch_atomic_##op(int i, atomic_t *v) \ +{ \ + unsigned long tmp; \ + __asm__ __volatile__ ( \ + "1: ldex.w %0, (%2) \n" \ + " " #op " %0, %1 \n" \ + " stex.w %0, (%2) \n" \ + " bez %0, 1b \n" \ + : "=3D&r" (tmp) \ + : "r" (I), "r" (&v->counter) \ + : "memory"); \ +} + +ATOMIC_OP(add, add, i) +ATOMIC_OP(sub, add, -i) +ATOMIC_OP(and, and, i) +ATOMIC_OP( or, or, i) +ATOMIC_OP(xor, xor, i) + +#undef ATOMIC_OP + +#define ATOMIC_FETCH_OP(op, asm_op, I) \ +static __always_inline \ +int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +{ \ + register int ret, tmp; \ + __asm__ __volatile__ ( \ + "1: ldex.w %0, (%3) \n" \ + " mov %1, %0 \n" \ + " " #op " %0, %2 \n" \ + " stex.w %0, (%3) \n" \ + " bez %0, 1b \n" \ + : "=3D&r" (tmp), "=3D&r" (ret) \ + : "r" (I), "r"(&v->counter) \ + : "memory"); \ + return ret; \ +} + +#define ATOMIC_OP_RETURN(op, asm_op, c_op, I) \ +static __always_inline \ +int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +{ \ + return arch_atomic_fetch_##op##_relaxed(i, v) c_op I; \ +} + +#define ATOMIC_OPS(op, asm_op, c_op, I) \ + ATOMIC_FETCH_OP( op, asm_op, I) \ + ATOMIC_OP_RETURN(op, asm_op, c_op, I) + +ATOMIC_OPS(add, add, +, i) +ATOMIC_OPS(sub, add, +, -i) + +#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed + +#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN + +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_FETCH_OP(op, asm_op, I) + +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) + +#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed + +#undef ATOMIC_OPS + +#undef ATOMIC_FETCH_OP + +#define ATOMIC_OP() \ +static __always_inline \ +int arch_atomic_xchg_relaxed(atomic_t *v, int n) \ +{ \ + return __xchg_relaxed(n, &(v->counter), 4); \ +} \ +static __always_inline \ +int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \ +{ \ + return __cmpxchg_relaxed(&(v->counter), o, n, 4); \ +} + +#define ATOMIC_OPS() \ + ATOMIC_OP() + +ATOMIC_OPS() + +#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed +#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed + +#undef ATOMIC_OPS +#undef ATOMIC_OP + +#else +# include +#endif + +#endif /* __ASM_CSKY_ATOMIC_H */ --=20 2.25.1