From nobody Sun Sep 14 16:29:59 2025 Received: from smtpout149.security-mail.net (smtpout149.security-mail.net [85.31.212.149]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C1C5816C862 for ; Mon, 22 Jul 2024 09:43:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=85.31.212.149 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1721641411; cv=none; b=VdSoQ9cJ5wHs2T7m68DzzzBws4tDdTunkJ35K/09jB1EMZrBZ5h64MtpEyofng0oj/Hvrn+hWUSMjm5uhVk+Ch8luI8jmi5QarzzEA3ENh3G26nwHA3tO3ssihX75huzkbkwESk8exTd9wdwtMwlS7ctB4Hxu5jOjAw1a3d7P8I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1721641411; c=relaxed/simple; bh=yiIIaJWJm3G+BS5HXK8fiZQ3PgG4MBqpY9GLN4Ty0cI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=SaFXunCglzTHbwaX7eEsBOqeRFN/GhNx9aGgez1eZZ9YmJb2GmqLhgWprKUo76sHBmyaSzCF240VOPyz3k0aZp5E5IcSKMzNJP2UIzNHWqyYMK1myzJMHjxQD4lrTgLYjW/Dd5zZycIx246xbUFtwa3nd6r3+EFPfgNxycMQIZI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=kalrayinc.com; spf=pass smtp.mailfrom=kalrayinc.com; dkim=pass (1024-bit key) header.d=kalrayinc.com header.i=@kalrayinc.com header.b=YR3UOs4h; arc=none smtp.client-ip=85.31.212.149 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=quarantine dis=none) header.from=kalrayinc.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=kalrayinc.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (1024-bit key) header.d=kalrayinc.com header.i=@kalrayinc.com header.b="YR3UOs4h" Received: from localhost (fx409.security-mail.net [127.0.0.1]) by fx409.security-mail.net (Postfix) with ESMTP id 748B7349A5D for ; Mon, 22 Jul 2024 11:43:27 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kalrayinc.com; s=sec-sig-email; t=1721641407; bh=yiIIaJWJm3G+BS5HXK8fiZQ3PgG4MBqpY9GLN4Ty0cI=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=YR3UOs4hdTeLnBjze7nfeP22D1Dit9F2Iyj/zFLNuFS4pFbfpnQv2wAGNgcZte9VY okFQ34X1wO0Y55ZRujWGYWXYpMxgO55pQfcBpyl2pDn9RmUDLsjacZk6wxw1Ks3+SS cb6TTw0YfSMyfQJF2vyxpL0PiXZ5+VdPnnrZ8tLY= Received: from fx409 (fx409.security-mail.net [127.0.0.1]) by fx409.security-mail.net (Postfix) with ESMTP id 3189E349A3F; Mon, 22 Jul 2024 11:43:27 +0200 (CEST) Received: from srvsmtp.lin.mbt.kalray.eu (unknown [217.181.231.53]) by fx409.security-mail.net (Postfix) with ESMTPS id 7E162349A79; Mon, 22 Jul 2024 11:43:26 +0200 (CEST) Received: from junon.lan.kalrayinc.com (unknown [192.168.37.161]) by srvsmtp.lin.mbt.kalray.eu (Postfix) with ESMTPS id 3C40640317; Mon, 22 Jul 2024 11:43:26 +0200 (CEST) X-Secumail-id: <182ba.669e29be.7935d.0> From: ysionneau@kalrayinc.com To: linux-kernel@vger.kernel.org, Will Deacon , Peter Zijlstra , Boqun Feng , Mark Rutland , Yury Norov , Rasmus Villemoes Cc: Jonathan Borne , Julian Vetter , Yann Sionneau , Clement Leger , Jules Maselbas , Julien Villette Subject: [RFC PATCH v3 15/37] kvx: Add atomic/locking headers Date: Mon, 22 Jul 2024 11:41:26 +0200 Message-ID: <20240722094226.21602-16-ysionneau@kalrayinc.com> X-Mailer: git-send-email 2.45.2 In-Reply-To: <20240722094226.21602-1-ysionneau@kalrayinc.com> References: <20240722094226.21602-1-ysionneau@kalrayinc.com> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-ALTERMIMEV2_out: done Content-Type: text/plain; charset="utf-8" From: Yann Sionneau Add common headers (atomic, bitops, barrier and locking) for basic kvx support. Co-developed-by: Clement Leger Signed-off-by: Clement Leger Co-developed-by: Jules Maselbas Signed-off-by: Jules Maselbas Co-developed-by: Julian Vetter Signed-off-by: Julian Vetter Co-developed-by: Julien Villette Signed-off-by: Julien Villette Signed-off-by: Yann Sionneau --- Notes: V1 -> V2: - use {READ,WRITE}_ONCE for arch_atomic64_{read,set} - use asm-generic/bitops/atomic.h instead of __test_and_*_bit - removed duplicated includes - rewrite xchg and cmpxchg in C using builtins for acswap insn V2 -> V3: - use arch_atomic64_read instead of plain pointer dereference - undef ATOMIC64_RETURN_OP - override generic atomics for: - arch_atomic64_{add,sub}_return - arch_atomic64_fetch_{add,sub,and,or,xor} - add missing if (!word) return 0 in __ffs - typos --- arch/kvx/include/asm/atomic.h | 119 +++++++++++++++++++++++ arch/kvx/include/asm/barrier.h | 15 +++ arch/kvx/include/asm/bitops.h | 118 +++++++++++++++++++++++ arch/kvx/include/asm/bitrev.h | 32 +++++++ arch/kvx/include/asm/cmpxchg.h | 170 +++++++++++++++++++++++++++++++++ 5 files changed, 454 insertions(+) create mode 100644 arch/kvx/include/asm/atomic.h create mode 100644 arch/kvx/include/asm/barrier.h create mode 100644 arch/kvx/include/asm/bitops.h create mode 100644 arch/kvx/include/asm/bitrev.h create mode 100644 arch/kvx/include/asm/cmpxchg.h diff --git a/arch/kvx/include/asm/atomic.h b/arch/kvx/include/asm/atomic.h new file mode 100644 index 0000000000000..1ccd5ca51a763 --- /dev/null +++ b/arch/kvx/include/asm/atomic.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2023 Kalray Inc. + * Author(s): Clement Leger + */ + +#ifndef _ASM_KVX_ATOMIC_H +#define _ASM_KVX_ATOMIC_H + +#include + +#include + +#define ATOMIC64_INIT(i) { (i) } + +#define arch_atomic64_cmpxchg(v, old, new) (arch_cmpxchg(&((v)->counter), = old, new)) +#define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) + +static inline long arch_atomic64_read(const atomic64_t *v) +{ + return READ_ONCE(v->counter); +} + +static inline void arch_atomic64_set(atomic64_t *v, long i) +{ + WRITE_ONCE(v->counter, i); +} + +#define ATOMIC64_RETURN_OP(op, c_op) \ +static inline long arch_atomic64_##op##_return(long i, atomic64_t *v) \ +{ \ + long new, old, ret; \ + \ + do { \ + old =3D arch_atomic64_read(v); \ + new =3D old c_op i; \ + ret =3D arch_cmpxchg(&v->counter, old, new); \ + } while (ret !=3D old); \ + \ + return new; \ +} + +#define ATOMIC64_OP(op, c_op) \ +static inline void arch_atomic64_##op(long i, atomic64_t *v) \ +{ \ + long new, old, ret; \ + \ + do { \ + old =3D arch_atomic64_read(v); \ + new =3D old c_op i; \ + ret =3D arch_cmpxchg(&v->counter, old, new); \ + } while (ret !=3D old); \ +} + +#define ATOMIC64_FETCH_OP(op, c_op) \ +static inline long arch_atomic64_fetch_##op(long i, atomic64_t *v) \ +{ \ + long new, old, ret; \ + \ + do { \ + old =3D arch_atomic64_read(v); \ + new =3D old c_op i; \ + ret =3D arch_cmpxchg(&v->counter, old, new); \ + } while (ret !=3D old); \ + \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_RETURN_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(and, &) +ATOMIC64_OPS(or, |) +ATOMIC64_OPS(xor, ^) +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_RETURN_OP +#undef ATOMIC64_OP + +static inline int arch_atomic_add_return(int i, atomic_t *v) +{ + int new, old, ret; + + do { + old =3D v->counter; + new =3D old + i; + ret =3D arch_cmpxchg(&v->counter, old, new); + } while (ret !=3D old); + + return new; +} + +static inline int arch_atomic_sub_return(int i, atomic_t *v) +{ + return arch_atomic_add_return(-i, v); +} + +#define arch_atomic64_add_return arch_atomic64_add_return + +#define arch_atomic64_fetch_add arch_atomic64_fetch_add + +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub + +#define arch_atomic64_fetch_and arch_atomic64_fetch_and + +#define arch_atomic64_fetch_or arch_atomic64_fetch_or + +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor + +#define arch_atomic64_sub_return arch_atomic64_sub_return + +#include + +#endif /* _ASM_KVX_ATOMIC_H */ diff --git a/arch/kvx/include/asm/barrier.h b/arch/kvx/include/asm/barrier.h new file mode 100644 index 0000000000000..371f1c70746dc --- /dev/null +++ b/arch/kvx/include/asm/barrier.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2023 Kalray Inc. + * Author(s): Clement Leger + */ + +#ifndef _ASM_KVX_BARRIER_H +#define _ASM_KVX_BARRIER_H + +/* fence is sufficient to guarantee write ordering */ +#define mb() __builtin_kvx_fence() + +#include + +#endif /* _ASM_KVX_BARRIER_H */ diff --git a/arch/kvx/include/asm/bitops.h b/arch/kvx/include/asm/bitops.h new file mode 100644 index 0000000000000..7782ce93cfee4 --- /dev/null +++ b/arch/kvx/include/asm/bitops.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2023 Kalray Inc. + * Author(s): Clement Leger + * Yann Sionneau + */ + +#ifndef _ASM_KVX_BITOPS_H +#define _ASM_KVX_BITOPS_H + +#ifdef __KERNEL__ + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include + +static inline int fls(int x) +{ + return 32 - __builtin_kvx_clzw(x); +} + +static inline int fls64(__u64 x) +{ + return 64 - __builtin_kvx_clzd(x); +} + +/** + * __ffs - find first set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __ffs(unsigned long word) +{ + if (!word) + return 0; + + return __builtin_kvx_ctzd(word); +} + +/** + * __fls - find last set bit in word + * @word: The word to search + * + * Undefined if no set bit exists, so code should check against 0 first. + */ +static inline unsigned long __fls(unsigned long word) +{ + return 63 - __builtin_kvx_clzd(word); +} + + +/** + * ffs - find first set bit in word + * @x: the word to search + * + * This is defined the same way as the libc and compiler builtin ffs + * routines, therefore differs in spirit from the other bitops. + * + * ffs(value) returns 0 if value is 0 or the position of the first + * set bit if value is nonzero. The first (least significant) bit + * is at position 1. + */ +static inline int ffs(int x) +{ + if (!x) + return 0; + return __builtin_kvx_ctzw(x) + 1; +} + +static inline unsigned int __arch_hweight32(unsigned int w) +{ + unsigned int count; + + asm volatile ("cbsw %0 =3D %1\n\t;;" + : "=3Dr" (count) + : "r" (w)); + + return count; +} + +static inline unsigned int __arch_hweight64(__u64 w) +{ + unsigned int count; + + asm volatile ("cbsd %0 =3D %1\n\t;;" + : "=3Dr" (count) + : "r" (w)); + + return count; +} + +static inline unsigned int __arch_hweight16(unsigned int w) +{ + return __arch_hweight32(w & 0xffff); +} + +static inline unsigned int __arch_hweight8(unsigned int w) +{ + return __arch_hweight32(w & 0xff); +} + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#endif + +#endif diff --git a/arch/kvx/include/asm/bitrev.h b/arch/kvx/include/asm/bitrev.h new file mode 100644 index 0000000000000..79865081905a6 --- /dev/null +++ b/arch/kvx/include/asm/bitrev.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2023 Kalray Inc. + * Author(s): Clement Leger + */ + +#ifndef _ASM_KVX_BITREV_H +#define _ASM_KVX_BITREV_H + +#include + +/* Bit reversal constant for matrix multiply */ +#define BIT_REVERSE 0x0102040810204080ULL + +static __always_inline __attribute_const__ u32 __arch_bitrev32(u32 x) +{ + /* Reverse all bits for each bytes and then byte-reverse the 32 LSB */ + return swab32(__builtin_kvx_sbmm8(BIT_REVERSE, x)); +} + +static __always_inline __attribute_const__ u16 __arch_bitrev16(u16 x) +{ + /* Reverse all bits for each bytes and then byte-reverse the 16 LSB */ + return swab16(__builtin_kvx_sbmm8(BIT_REVERSE, x)); +} + +static __always_inline __attribute_const__ u8 __arch_bitrev8(u8 x) +{ + return __builtin_kvx_sbmm8(BIT_REVERSE, x); +} + +#endif diff --git a/arch/kvx/include/asm/cmpxchg.h b/arch/kvx/include/asm/cmpxchg.h new file mode 100644 index 0000000000000..041a3e7797103 --- /dev/null +++ b/arch/kvx/include/asm/cmpxchg.h @@ -0,0 +1,170 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017-2023 Kalray Inc. + * Author(s): Clement Leger + * Yann Sionneau + * Jules Maselbas + */ + +#ifndef _ASM_KVX_CMPXCHG_H +#define _ASM_KVX_CMPXCHG_H + +#include +#include +#include +#include + +/* + * On kvx, we have a boolean compare and swap which means that the operati= on + * returns only the success of operation. + * If operation succeed, this is simple, we just need to return the provid= ed + * old value. However, if it fails, we need to load the value to return it= for + * the caller. If the loaded value is different from the "old" provided by= the + * caller, we can return it since it will means it failed. + * However, if for some reason the value we read is equal to the old value + * provided by the caller, we can't simply return it or the caller will th= ink it + * succeeded. So if the value we read is the same as the "old" provided by + * the caller, we try again until either we succeed or we fail with a diff= erent + * value than the provided one. + */ + +static inline unsigned int __cmpxchg_u32(unsigned int old, unsigned int ne= w, + volatile unsigned int *ptr) +{ + unsigned int exp =3D old; + + __builtin_kvx_fence(); + while (exp =3D=3D old) { + if (__builtin_kvx_acswapw((void *)ptr, new, exp)) + break; /* acswap succeed */ + exp =3D *ptr; + } + + return exp; +} + +static inline unsigned long __cmpxchg_u64(unsigned long old, unsigned long= new, + volatile unsigned long *ptr) +{ + unsigned long exp =3D old; + + __builtin_kvx_fence(); + while (exp =3D=3D old) { + if (__builtin_kvx_acswapd((void *)ptr, new, exp)) + break; /* acswap succeed */ + exp =3D *ptr; + } + + return exp; +} + +extern unsigned long __cmpxchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for cmpxchg"); + +static __always_inline unsigned long __cmpxchg(unsigned long old, + unsigned long new, + volatile void *ptr, int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(old, new, ptr); + case 8: + return __cmpxchg_u64(old, new, ptr); + default: + return __cmpxchg_called_with_bad_pointer(); + } +} + +#define arch_cmpxchg(ptr, old, new) \ + ((__typeof__(*(ptr))) __cmpxchg( \ + (unsigned long)(old), (unsigned long)(new), \ + (ptr), sizeof(*(ptr)))) + +/* + * In order to optimize xchg for 16 bits, we can use insf/extfz if we know= the + * bounds. This way, we only take one more bundle than standard xchg. We s= imply + * do a read modify acswap on a 32 bits word. + */ + +#define __kvx_insf(org, val, start, stop) __asm__ __volatile__( \ + "insf %[_org] =3D %[_val], %[_stop], %[_start]\n\t;;" \ + : [_org]"+r"(org) \ + : [_val]"r"(val), [_stop]"i"(stop), [_start]"i"(start)) + +#define __kvx_extfz(out, val, start, stop) __asm__ __volatile__( \ + "extfz %[_out] =3D %[_val], %[_stop], %[_start]\n\t;;" \ + : [_out]"=3Dr"(out) \ + : [_val]"r"(val), [_stop]"i"(stop), [_start]"i"(start)) + +/* Needed for generic qspinlock implementation */ +static inline unsigned int __xchg_u16(unsigned int old, unsigned int new, + volatile unsigned int *ptr) +{ + unsigned int off =3D ((unsigned long)ptr) % sizeof(unsigned int); + unsigned int val; + + ptr =3D PTR_ALIGN_DOWN(ptr, sizeof(unsigned int)); + __builtin_kvx_fence(); + do { + old =3D *ptr; + val =3D old; + if (off =3D=3D 0) + __kvx_insf(val, new, 0, 15); + else + __kvx_insf(val, new, 16, 31); + } while (!__builtin_kvx_acswapw((void *)ptr, val, old)); + + if (off =3D=3D 0) + __kvx_extfz(old, old, 0, 15); + else + __kvx_extfz(old, old, 16, 31); + + return old; +} + +static inline unsigned int __xchg_u32(unsigned int old, unsigned int new, + volatile unsigned int *ptr) +{ + __builtin_kvx_fence(); + do + old =3D *ptr; + while (!__builtin_kvx_acswapw((void *)ptr, new, old)); + + return old; +} + +static inline unsigned long __xchg_u64(unsigned long old, unsigned long ne= w, + volatile unsigned long *ptr) +{ + __builtin_kvx_fence(); + do + old =3D *ptr; + while (!__builtin_kvx_acswapd((void *)ptr, new, old)); + + return old; +} + +extern unsigned long __xchg_called_with_bad_pointer(void) + __compiletime_error("Bad argument size for xchg"); + +static __always_inline unsigned long __xchg(unsigned long val, + volatile void *ptr, int size) +{ + switch (size) { + case 2: + return __xchg_u16(0, val, ptr); + case 4: + return __xchg_u32(0, val, ptr); + case 8: + return __xchg_u64(0, val, ptr); + default: + return __xchg_called_with_bad_pointer(); + } +} + +#define arch_xchg(ptr, val) \ + ((__typeof__(*(ptr))) __xchg( \ + (unsigned long)(val), \ + (ptr), sizeof(*(ptr)))) + +#endif --=20 2.45.2