From nobody Sun Apr 26 23:09:11 2026
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id B30C4C43334
	for <linux-kernel@archiver.kernel.org>; Tue, 21 Jun 2022 14:50:05 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1352083AbiFUOuD (ORCPT
        <rfc822;linux-kernel@archiver.kernel.org>);
        Tue, 21 Jun 2022 10:50:03 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:55880 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S1352089AbiFUOtt (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Tue, 21 Jun 2022 10:49:49 -0400
Received: from dfw.source.kernel.org (dfw.source.kernel.org
 [IPv6:2604:1380:4641:c500::1])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 2CFAD275E7;
        Tue, 21 Jun 2022 07:49:48 -0700 (PDT)
Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140])
        (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
        (No client certificate requested)
        by dfw.source.kernel.org (Postfix) with ESMTPS id BD780616C3;
        Tue, 21 Jun 2022 14:49:47 +0000 (UTC)
Received: by smtp.kernel.org (Postfix) with ESMTPSA id E7D0EC341C0;
        Tue, 21 Jun 2022 14:49:43 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
        s=k20201202; t=1655822987;
        bh=o7k5aZt+GPZLT7AuMi7ejxspY8QjEmA3Qfnf6gZmvRg=;
        h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
        b=SbxceeGo4Rfq03hmZY7hQfj7fGA2j2+J9UAesk6KId9yP9v5gbCsUYOTCtfwhKxWb
         mpVzFpUkMYxyhVage0lH5AP+0ImXKHRql1FHq9M/X37DewFxw/Sginw4mJq5fxgMtr
         E5Gp+faUZPQxyzW3mRDygHO4Yd3VtT16GyGXMP7QKSmbkZNXENaEfGaAoK7VN4Rd0E
         yZquxtpOfq6tw9dlLeaMQK2BTRBNd7+zjUDksyxex3t3i28rnS0qaAKv6unnsGHPBL
         MOYRpvnPBPJB7LAiS0bjuddfQsf5QoOYNM3JAKbZj9trolZ3Hrjrz66NtPF9vAwWu+
         2Vy7M0/zMC0KA==
From: guoren@kernel.org
To: palmer@rivosinc.com, arnd@arndb.de, peterz@infradead.org,
        longman@redhat.com, boqun.feng@gmail.com,
        Conor.Dooley@microchip.com, chenhuacai@loongson.cn,
        kernel@xen0n.name, r@hev.cc, shorne@gmail.com
Cc: linux-riscv@lists.infradead.org, linux-arch@vger.kernel.org,
        linux-kernel@vger.kernel.org, Guo Ren <guoren@linux.alibaba.com>,
        Guo Ren <guoren@kernel.org>
Subject: [PATCH V6 1/2] asm-generic: spinlock: Move qspinlock & ticket-lock
 into generic spinlock.h
Date: Tue, 21 Jun 2022 10:49:19 -0400
Message-Id: <20220621144920.2945595-2-guoren@kernel.org>
X-Mailer: git-send-email 2.36.1
In-Reply-To: <20220621144920.2945595-1-guoren@kernel.org>
References: <20220621144920.2945595-1-guoren@kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Type: text/plain; charset="utf-8"

From: Guo Ren <guoren@linux.alibaba.com>

Separate ticket-lock into tspinlock.h and let generic spinlock support
qspinlock or ticket-lock selected by CONFIG_ARCH_USE_QUEUED_SPINLOCKS
config.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/spinlock.h        | 90 ++------------------------
 include/asm-generic/spinlock_types.h  | 14 ++--
 include/asm-generic/tspinlock.h       | 92 +++++++++++++++++++++++++++
 include/asm-generic/tspinlock_types.h | 17 +++++
 4 files changed, 119 insertions(+), 94 deletions(-)
 create mode 100644 include/asm-generic/tspinlock.h
 create mode 100644 include/asm-generic/tspinlock_types.h

diff --git a/include/asm-generic/spinlock.h b/include/asm-generic/spinlock.h
index fdfebcb050f4..4eca2488af38 100644
--- a/include/asm-generic/spinlock.h
+++ b/include/asm-generic/spinlock.h
@@ -1,92 +1,12 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * 'Generic' ticket-lock implementation.
- *
- * It relies on atomic_fetch_add() having well defined forward progress
- * guarantees under contention. If your architecture cannot provide this, =
stick
- * to a test-and-set lock.
- *
- * It also relies on atomic_fetch_add() being safe vs smp_store_release() =
on a
- * sub-word of the value. This is generally true for anything LL/SC althou=
gh
- * you'd be hard pressed to find anything useful in architecture specifica=
tions
- * about this. If your architecture cannot do this you might be better off=
 with
- * a test-and-set.
- *
- * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and =
hence
- * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along=
 with
- * a full fence after the spin to upgrade the otherwise-RCpc
- * atomic_cond_read_acquire().
- *
- * The implementation uses smp_cond_load_acquire() to spin, so if the
- * architecture has WFE like instructions to sleep instead of poll for word
- * modifications be sure to implement that (see ARM64 for example).
- *
- */
-
 #ifndef __ASM_GENERIC_SPINLOCK_H
 #define __ASM_GENERIC_SPINLOCK_H
=20
-#include <linux/atomic.h>
-#include <asm-generic/spinlock_types.h>
-
-static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	u32 val =3D atomic_fetch_add(1<<16, lock);
-	u16 ticket =3D val >> 16;
-
-	if (ticket =3D=3D (u16)val)
-		return;
-
-	/*
-	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
-	 * custom cond_read_rcsc() here we just emit a full fence.  We only
-	 * need the prior reads before subsequent writes ordering from
-	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
-	 * have no outstanding writes due to the atomic_fetch_add() the extra
-	 * orderings are free.
-	 */
-	atomic_cond_read_acquire(lock, ticket =3D=3D (u16)VAL);
-	smp_mb();
-}
-
-static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
-{
-	u32 old =3D atomic_read(lock);
-
-	if ((old >> 16) !=3D (old & 0xffff))
-		return false;
-
-	return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
-}
-
-static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	u16 *ptr =3D (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
-	u32 val =3D atomic_read(lock);
-
-	smp_store_release(ptr, (u16)val + 1);
-}
-
-static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	u32 val =3D atomic_read(lock);
-
-	return ((val >> 16) !=3D (val & 0xffff));
-}
-
-static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
-{
-	u32 val =3D atomic_read(lock);
-
-	return (s16)((val >> 16) - (val & 0xffff)) > 1;
-}
-
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	return !arch_spin_is_locked(&lock);
-}
-
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
 #include <asm/qrwlock.h>
+#else
+#include <asm-generic/tspinlock.h>
+#endif
=20
 #endif /* __ASM_GENERIC_SPINLOCK_H */
diff --git a/include/asm-generic/spinlock_types.h b/include/asm-generic/spi=
nlock_types.h
index 8962bb730945..9875c1d058b3 100644
--- a/include/asm-generic/spinlock_types.h
+++ b/include/asm-generic/spinlock_types.h
@@ -3,15 +3,11 @@
 #ifndef __ASM_GENERIC_SPINLOCK_TYPES_H
 #define __ASM_GENERIC_SPINLOCK_TYPES_H
=20
-#include <linux/types.h>
-typedef atomic_t arch_spinlock_t;
-
-/*
- * qrwlock_types depends on arch_spinlock_t, so we must typedef that befor=
e the
- * include.
- */
+#ifdef CONFIG_ARCH_USE_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
 #include <asm/qrwlock_types.h>
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	ATOMIC_INIT(0)
+#else
+#include <asm-generic/tspinlock_types.h>
+#endif
=20
 #endif /* __ASM_GENERIC_SPINLOCK_TYPES_H */
diff --git a/include/asm-generic/tspinlock.h b/include/asm-generic/tspinloc=
k.h
new file mode 100644
index 000000000000..def7b8f0f4f4
--- /dev/null
+++ b/include/asm-generic/tspinlock.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * 'Generic' ticket-lock implementation.
+ *
+ * It relies on atomic_fetch_add() having well defined forward progress
+ * guarantees under contention. If your architecture cannot provide this, =
stick
+ * to a test-and-set lock.
+ *
+ * It also relies on atomic_fetch_add() being safe vs smp_store_release() =
on a
+ * sub-word of the value. This is generally true for anything LL/SC althou=
gh
+ * you'd be hard pressed to find anything useful in architecture specifica=
tions
+ * about this. If your architecture cannot do this you might be better off=
 with
+ * a test-and-set.
+ *
+ * It further assumes atomic_*_release() + atomic_*_acquire() is RCpc and =
hence
+ * uses atomic_fetch_add() which is RCsc to create an RCsc hot path, along=
 with
+ * a full fence after the spin to upgrade the otherwise-RCpc
+ * atomic_cond_read_acquire().
+ *
+ * The implementation uses smp_cond_load_acquire() to spin, so if the
+ * architecture has WFE like instructions to sleep instead of poll for word
+ * modifications be sure to implement that (see ARM64 for example).
+ *
+ */
+
+#ifndef __ASM_GENERIC_TSPINLOCK_H
+#define __ASM_GENERIC_TSPINLOCK_H
+
+#include <linux/atomic.h>
+#include <asm-generic/tspinlock_types.h>
+
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	u32 val =3D atomic_fetch_add(1<<16, lock);
+	u16 ticket =3D val >> 16;
+
+	if (ticket =3D=3D (u16)val)
+		return;
+
+	/*
+	 * atomic_cond_read_acquire() is RCpc, but rather than defining a
+	 * custom cond_read_rcsc() here we just emit a full fence.  We only
+	 * need the prior reads before subsequent writes ordering from
+	 * smb_mb(), but as atomic_cond_read_acquire() just emits reads and we
+	 * have no outstanding writes due to the atomic_fetch_add() the extra
+	 * orderings are free.
+	 */
+	atomic_cond_read_acquire(lock, ticket =3D=3D (u16)VAL);
+	smp_mb();
+}
+
+static __always_inline bool arch_spin_trylock(arch_spinlock_t *lock)
+{
+	u32 old =3D atomic_read(lock);
+
+	if ((old >> 16) !=3D (old & 0xffff))
+		return false;
+
+	return atomic_try_cmpxchg(lock, &old, old + (1<<16)); /* SC, for RCsc */
+}
+
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	u16 *ptr =3D (u16 *)lock + IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+	u32 val =3D atomic_read(lock);
+
+	smp_store_release(ptr, (u16)val + 1);
+}
+
+static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	u32 val =3D atomic_read(lock);
+
+	return ((val >> 16) !=3D (val & 0xffff));
+}
+
+static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
+{
+	u32 val =3D atomic_read(lock);
+
+	return (s16)((val >> 16) - (val & 0xffff)) > 1;
+}
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return !arch_spin_is_locked(&lock);
+}
+
+#include <asm/qrwlock.h>
+
+#endif /* __ASM_GENERIC_TSPINLOCK_H */
diff --git a/include/asm-generic/tspinlock_types.h b/include/asm-generic/ts=
pinlock_types.h
new file mode 100644
index 000000000000..ca3ea5acd172
--- /dev/null
+++ b/include/asm-generic/tspinlock_types.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_GENERIC_TSPINLOCK_TYPES_H
+#define __ASM_GENERIC_TSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+typedef atomic_t arch_spinlock_t;
+
+/*
+ * qrwlock_types depends on arch_spinlock_t, so we must typedef that befor=
e the
+ * include.
+ */
+#include <asm/qrwlock_types.h>
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	ATOMIC_INIT(0)
+
+#endif /* __ASM_GENERIC_TSPINLOCK_TYPES_H */
--=20
2.36.1
From nobody Sun Apr 26 23:09:11 2026
Return-Path: <linux-kernel-owner@kernel.org>
X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on
	aws-us-west-2-korg-lkml-1.web.codeaurora.org
Received: from vger.kernel.org (vger.kernel.org [23.128.96.18])
	by smtp.lore.kernel.org (Postfix) with ESMTP id 64718C433EF
	for <linux-kernel@archiver.kernel.org>; Tue, 21 Jun 2022 14:50:22 +0000 (UTC)
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
        id S1352075AbiFUOuU (ORCPT
        <rfc822;linux-kernel@archiver.kernel.org>);
        Tue, 21 Jun 2022 10:50:20 -0400
Received: from lindbergh.monkeyblade.net ([23.128.96.19]:56018 "EHLO
        lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
        with ESMTP id S1352110AbiFUOtx (ORCPT
        <rfc822;linux-kernel@vger.kernel.org>);
        Tue, 21 Jun 2022 10:49:53 -0400
Received: from dfw.source.kernel.org (dfw.source.kernel.org [139.178.84.217])
        by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 084C427CD4;
        Tue, 21 Jun 2022 07:49:51 -0700 (PDT)
Received: from smtp.kernel.org (relay.kernel.org [52.25.139.140])
        (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
        (No client certificate requested)
        by dfw.source.kernel.org (Postfix) with ESMTPS id 82B7A616CD;
        Tue, 21 Jun 2022 14:49:51 +0000 (UTC)
Received: by smtp.kernel.org (Postfix) with ESMTPSA id A41F7C341C6;
        Tue, 21 Jun 2022 14:49:47 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org;
        s=k20201202; t=1655822991;
        bh=EyNeRNlAcDveQFHhZSW52KHV2AvJzo7KCLMh5kztleU=;
        h=From:To:Cc:Subject:Date:In-Reply-To:References:From;
        b=azjBuoUWoFfmCqiN+mAOEbU1HS1LJNHyNJLll2EsY1Gi8T+OF666Yab5NAkHdhfSn
         yHOBYttnyBD9Cd6Cjr0+C79E99tHuDsNiPu6cOLCAKPejBv9D3zc9YCvYVLr//XRix
         Hng1vfmOdOefuNiD6RzyZfHN2R400C6POuO/+1RUMstAsxmpaqewBK1e/9XAQqrYNi
         wV74Xjasa5HOnAWwINf8hIylNgsw5PCzaRT5qRyhIWiO25LOPwsKKriB9I+hL2XhSU
         8+eXCUHbPthKiKUyZNH7yQoTMoKGJalev4Ivm+hszMvy9ipk2Qy7aB1i7Yzxr5S9DB
         fMNXmyKYDYUpA==
From: guoren@kernel.org
To: palmer@rivosinc.com, arnd@arndb.de, peterz@infradead.org,
        longman@redhat.com, boqun.feng@gmail.com,
        Conor.Dooley@microchip.com, chenhuacai@loongson.cn,
        kernel@xen0n.name, r@hev.cc, shorne@gmail.com
Cc: linux-riscv@lists.infradead.org, linux-arch@vger.kernel.org,
        linux-kernel@vger.kernel.org, Guo Ren <guoren@linux.alibaba.com>,
        Guo Ren <guoren@kernel.org>
Subject: [PATCH V6 2/2] riscv: Add qspinlock support
Date: Tue, 21 Jun 2022 10:49:20 -0400
Message-Id: <20220621144920.2945595-3-guoren@kernel.org>
X-Mailer: git-send-email 2.36.1
In-Reply-To: <20220621144920.2945595-1-guoren@kernel.org>
References: <20220621144920.2945595-1-guoren@kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Precedence: bulk
List-ID: <linux-kernel.vger.kernel.org>
X-Mailing-List: linux-kernel@vger.kernel.org
Content-Type: text/plain; charset="utf-8"

From: Guo Ren <guoren@linux.alibaba.com>

Enable qspinlock by the requirements mentioned in a8ad07e5240c9
("asm-generic: qspinlock: Indicate the use of mixed-size atomics").

 - RISC-V atomic_*_release()/atomic_*_acquire() are implemented with
   own relaxed version plus acquire/release_fence for RCsc
   synchronization.

 - RISC-V LR/SC pairs could provide a strong/weak forward guarantee
   that depends on micro-architecture. And RISC-V ISA spec has given
   out several limitations to let hardware support strict forward
   guarantee (RISC-V User ISA - 8.3 Eventual Success of
   Store-Conditional Instructions). Some riscv cores such as BOOMv3
   & XiangShan could provide strict & strong forward guarantee (The
   cache line would be kept in an exclusive state for Backoff cycles,
   and only this core's interrupt could break the LR/SC pair).

 - RISC-V provides cheap atomic_fetch_or_acquire() with RCsc.

 - RISC-V only provides relaxed xchg16 to support qspinlock.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Waiman Long <longman@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig               |  8 ++++++++
 arch/riscv/include/asm/Kbuild    |  2 ++
 arch/riscv/include/asm/cmpxchg.h | 17 +++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 32ffef9f6e5b..e1b57cb89189 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -333,6 +333,14 @@ config NODES_SHIFT
 	  Specify the maximum number of NUMA Nodes available on the target
 	  system.  Increases memory reserved to accommodate various tables.
=20
+config RISCV_USE_QUEUED_SPINLOCKS
+	bool "Using queued spinlock instead of ticket-lock"
+	depends on SMP && MMU
+	select ARCH_USE_QUEUED_SPINLOCKS
+	help
+	  Make sure your micro arch LL/SC has a strong forward progress guarantee.
+	  Otherwise, stay at ticket-lock.
+
 config RISCV_ALTERNATIVE
 	bool
 	depends on !XIP_KERNEL
diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild
index 504f8b7e72d4..2cce98c7b653 100644
--- a/arch/riscv/include/asm/Kbuild
+++ b/arch/riscv/include/asm/Kbuild
@@ -2,7 +2,9 @@
 generic-y +=3D early_ioremap.h
 generic-y +=3D flat.h
 generic-y +=3D kvm_para.h
+generic-y +=3D mcs_spinlock.h
 generic-y +=3D parport.h
+generic-y +=3D qspinlock.h
 generic-y +=3D spinlock.h
 generic-y +=3D spinlock_types.h
 generic-y +=3D qrwlock.h
diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpx=
chg.h
index 12debce235e5..492104d45a23 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -17,6 +17,23 @@
 	__typeof__(new) __new =3D (new);					\
 	__typeof__(*(ptr)) __ret;					\
 	switch (size) {							\
+	case 2: { 							\
+		u32 temp;						\
+		u32 shif =3D ((ulong)__ptr & 2) ? 16 : 0;			\
+		u32 mask =3D 0xffff << shif;				\
+		__ptr =3D (__typeof__(ptr))((ulong)__ptr & ~(ulong)2);	\
+		__asm__ __volatile__ (					\
+			"0:	lr.w %0, %2\n"				\
+			"	and  %1, %0, %z3\n"			\
+			"	or   %1, %1, %z4\n"			\
+			"	sc.w %1, %1, %2\n"			\
+			"	bnez %1, 0b\n"				\
+			: "=3D&r" (__ret), "=3D&r" (temp), "+A" (*__ptr)	\
+			: "rJ" (~mask), "rJ" (__new << shif)		\
+			: "memory");					\
+		__ret =3D (__ret & mask) >> shif;				\
+		break;							\
+	}								\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"	amoswap.w %0, %2, %1\n"			\
--=20
2.36.1