From nobody Thu Apr  2 19:15:32 2026
Received: from galois.linutronix.de (Galois.linutronix.de [193.142.43.55])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1104534BA5B
	for <linux-kernel@vger.kernel.org>; Wed, 11 Feb 2026 17:30:51 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=193.142.43.55
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1770831053; cv=none;
 b=V3+9InRxscn7mNYuIcLMbu4l33Kza2zq871pJWo8tGA/JbQgPz3GE4AFdgZ2i1iUzcTPxqt2SRKegOr9rE1h5OTMtsxe72WxNehpDk6yPgycLgzP+Cpiqyx6gEO/kN/k/xmgwvrBdjQJ7kAhLe/Zw+Yfqu/ayOPzsA+06MisSjY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1770831053; c=relaxed/simple;
	bh=8e20AmCrtqUDkRXKRWH3dENziO3/dE2iW9JJTecT7OU=;
	h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References:
	 MIME-Version;
 b=aHb4Fp2RuDS5wkrbN+mKV7gm15W9acYV3zoB1YQbJ2iAuytcnO7MurE6X8gXsfdDzXJIVuPO+7Cf25PYs6/QAdjHAE24s8BJHZASQ4XuOmK9Yoqp4PHW90gNr02XucUvQDuwTl7PZAl6+NOQmZHpv2SoyV1vmyfx7/WmnzwkaPE=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linutronix.de;
 spf=pass smtp.mailfrom=linutronix.de;
 dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de
 header.b=kpB4bgYE;
 dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de
 header.b=LQ91RW9Z; arc=none smtp.client-ip=193.142.43.55
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=none dis=none) header.from=linutronix.de
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=linutronix.de
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=linutronix.de header.i=@linutronix.de
 header.b="kpB4bgYE";
	dkim=permerror (0-bit key) header.d=linutronix.de header.i=@linutronix.de
 header.b="LQ91RW9Z"
From: Nam Cao <namcao@linutronix.de>
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linutronix.de;
	s=2020; t=1770831050;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=XZ1wS3EkGS+wwLdwUudi9ss9LulFQNodQHS/dw62M1g=;
	b=kpB4bgYEqanUlIF0vZKcYFpNXDgR7pr7v6lRuqzhHnr3AoyuH3MP2Ww628MyOorndOVXEB
	ueNmE7eG7i7cZtZlmd0A1CqvREarLsB9s8egrrZTpZf57KLMdJY8dz655aFdSn7UF9XPVd
	QT7sVJIVv5jGwAj5zUaXPW+GbrvzhroqP5zTFW/oT+6TRYA0UIAwVmJAMy1anbpcStvXe3
	yhO2Yej3kimKcpysJAQTbTDAyssJnw/KhPbRaB3Sz9GM5XNZhyPxw1dp3vvRlIed+n3oSp
	InfbOvtLvaidiGEBYTks1JR7/0oxbMOZLbcYBf9Er+qpybZLQo6UgCLPjdmtGA==
DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=linutronix.de;
	s=2020e; t=1770831050;
	h=from:from:reply-to:subject:subject:date:date:message-id:message-id:
	 to:to:cc:cc:mime-version:mime-version:
	 content-transfer-encoding:content-transfer-encoding:
	 in-reply-to:in-reply-to:references:references;
	bh=XZ1wS3EkGS+wwLdwUudi9ss9LulFQNodQHS/dw62M1g=;
	b=LQ91RW9Zh93ahUIz4FmRP/hfC84OGYkwo90UEkjaQG8jONEILQ71e9CjxJRTxyicbWD7+g
	ndWnnTvTGpEg3ACA==
To: Paul Walmsley <pjw@kernel.org>,
	Palmer Dabbelt <palmer@dabbelt.com>,
	Albert Ou <aou@eecs.berkeley.edu>,
	Alexandre Ghiti <alex@ghiti.fr>,
	Andrew Jones <ajones@ventanamicro.com>,
	=?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <cleger@rivosinc.com>,
	linux-riscv@lists.infradead.org,
	linux-kernel@vger.kernel.org
Cc: Nam Cao <namcao@linutronix.de>,
	Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Subject: [PATCH 1/5] riscv: Clean up & optimize unaligned scalar access probe
Date: Wed, 11 Feb 2026 18:30:31 +0100
Message-ID: 
 <9b9a20affe2e4f5c380926ceb885a47e20a59395.1770830596.git.namcao@linutronix.de>
In-Reply-To: <cover.1770830596.git.namcao@linutronix.de>
References: <cover.1770830596.git.namcao@linutronix.de>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

check_unaligned_access_speed_all_cpus() is more complicated than it should
be:

  - It uses on_each_cpu() to probe unaligned memory access on all CPUs but
    excludes CPU0 with a check in the callback function. So an IPI to CPU0
    is wasted.

  - Probing on CPU0 is done with smp_call_on_cpu(), which is not as fast as
    on_each_cpu().

The reason for this design is because the probe is timed with jiffies.
Therefore on_each_cpu() excludes CPU0 because that CPU needs to tend to
jiffies.

Instead, replace jiffies usage with ktime_get_mono_fast_ns(). With jiffies
out of the way, on_each_cpu() can be used for all CPUs and
smp_call_on_cpu() can be dropped.

To make ktime_get_mono_fast_ns() usable, move this probe to late_initcall.
Anything after clocksource's fs_initcall works, but avoid depending on
clocksource staying at fs_initcall.

The choice of probe time is now 8000000 ns, which is the same as before (2
jiffies) for riscv defconfig. This is excessive for the CPUs I have, and
probably should be reduced; but that's a different discussion.

Suggested-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Nam Cao <namcao@linutronix.de>
---
 arch/riscv/kernel/unaligned_access_speed.c | 28 ++++++++--------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel=
/unaligned_access_speed.c
index 70b5e6927620..8b744c4a41ea 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -17,6 +17,7 @@
 #include "copy-unaligned.h"
=20
 #define MISALIGNED_ACCESS_JIFFIES_LG2 1
+#define MISALIGNED_ACCESS_NS 8000000
 #define MISALIGNED_BUFFER_SIZE 0x4000
 #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
 #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
@@ -36,8 +37,8 @@ static int check_unaligned_access(void *param)
 	u64 start_cycles, end_cycles;
 	u64 word_cycles;
 	u64 byte_cycles;
+	u64 start_ns;
 	int ratio;
-	unsigned long start_jiffies, now;
 	struct page *page =3D param;
 	void *dst;
 	void *src;
@@ -55,15 +56,13 @@ static int check_unaligned_access(void *param)
 	/* Do a warmup. */
 	__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
 	preempt_disable();
-	start_jiffies =3D jiffies;
-	while ((now =3D jiffies) =3D=3D start_jiffies)
-		cpu_relax();
=20
 	/*
 	 * For a fixed amount of time, repeatedly try the function, and take
 	 * the best time in cycles as the measurement.
 	 */
-	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+	start_ns =3D ktime_get_mono_fast_ns();
+	while (ktime_get_mono_fast_ns() < start_ns + MISALIGNED_ACCESS_NS) {
 		start_cycles =3D get_cycles64();
 		/* Ensure the CSR read can't reorder WRT to the copy. */
 		mb();
@@ -77,11 +76,9 @@ static int check_unaligned_access(void *param)
=20
 	byte_cycles =3D -1ULL;
 	__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
-	start_jiffies =3D jiffies;
-	while ((now =3D jiffies) =3D=3D start_jiffies)
-		cpu_relax();
=20
-	while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+	start_ns =3D ktime_get_mono_fast_ns();
+	while (ktime_get_mono_fast_ns() < start_ns + MISALIGNED_ACCESS_NS) {
 		start_cycles =3D get_cycles64();
 		mb();
 		__riscv_copy_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
@@ -125,13 +122,12 @@ static int check_unaligned_access(void *param)
 	return 0;
 }
=20
-static void __init check_unaligned_access_nonboot_cpu(void *param)
+static void __init _check_unaligned_access(void *param)
 {
 	unsigned int cpu =3D smp_processor_id();
 	struct page **pages =3D param;
=20
-	if (smp_processor_id() !=3D 0)
-		check_unaligned_access(pages[cpu]);
+	check_unaligned_access(pages[cpu]);
 }
=20
 /* Measure unaligned access speed on all CPUs present at boot in parallel.=
 */
@@ -158,11 +154,7 @@ static void __init check_unaligned_access_speed_all_cp=
us(void)
 		}
 	}
=20
-	/* Check everybody except 0, who stays behind to tend jiffies. */
-	on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
-
-	/* Check core 0. */
-	smp_call_on_cpu(0, check_unaligned_access, bufs[0], true);
+	on_each_cpu(_check_unaligned_access, bufs, 1);
=20
 out:
 	for_each_cpu(cpu, cpu_online_mask) {
@@ -494,4 +486,4 @@ static int __init check_unaligned_access_all_cpus(void)
 	return 0;
 }
=20
-arch_initcall(check_unaligned_access_all_cpus);
+late_initcall(check_unaligned_access_all_cpus);
--=20
2.47.3