[PATCH v3 6/8] riscv: lib: add strnlen implementation

Feng Jiang posted 8 patches 2 weeks, 6 days ago
There is a newer version of this series
[PATCH v3 6/8] riscv: lib: add strnlen implementation
Posted by Feng Jiang 2 weeks, 6 days ago
Add an optimized strnlen() implementation for RISC-V. This version
includes a generic word-at-a-time optimization and a Zbb-powered
optimization using the 'orc.b' instruction, derived from the strlen
implementation.

Benchmark results (QEMU TCG, rv64):
  Length | Original (MB/s) | Optimized (MB/s) | Improvement
  -------|-----------------|------------------|------------
  16 B   | 189             | 310              | +64.0%
  512 B  | 344             | 1535             | +346.2%
  4096 B | 363             | 1854             | +410.7%

Suggested-by: Andy Shevchenko <andy@kernel.org>
Tested-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
 arch/riscv/include/asm/string.h |   3 +
 arch/riscv/lib/Makefile         |   1 +
 arch/riscv/lib/strnlen.S        | 164 ++++++++++++++++++++++++++++++++
 arch/riscv/purgatory/Makefile   |   5 +-
 4 files changed, 172 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/lib/strnlen.S

diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
index 5ba77f60bf0b..16634d67c217 100644
--- a/arch/riscv/include/asm/string.h
+++ b/arch/riscv/include/asm/string.h
@@ -28,6 +28,9 @@ extern asmlinkage __kernel_size_t strlen(const char *);
 
 #define __HAVE_ARCH_STRNCMP
 extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
+
+#define __HAVE_ARCH_STRNLEN
+extern asmlinkage __kernel_size_t strnlen(const char *, size_t);
 #endif
 
 /* For those files which don't want to check by kasan. */
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index bbc031124974..0969d8136df0 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -7,6 +7,7 @@ ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
 lib-y			+= strcmp.o
 lib-y			+= strlen.o
 lib-y			+= strncmp.o
+lib-y			+= strnlen.o
 endif
 lib-y			+= csum.o
 ifeq ($(CONFIG_MMU), y)
diff --git a/arch/riscv/lib/strnlen.S b/arch/riscv/lib/strnlen.S
new file mode 100644
index 000000000000..4af0df9442f1
--- /dev/null
+++ b/arch/riscv/lib/strnlen.S
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+/*
+ * Base on arch/riscv/lib/strlen.S
+ *
+ * Copyright (C) Feng Jiang <jiangfeng@kylinos.cn>
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm/hwcap.h>
+
+/* size_t strnlen(const char *s, size_t count) */
+SYM_FUNC_START(strnlen)
+
+	__ALTERNATIVE_CFG("nop", "j strnlen_zbb", 0, RISCV_ISA_EXT_ZBB,
+		IS_ENABLED(CONFIG_RISCV_ISA_ZBB) && IS_ENABLED(CONFIG_TOOLCHAIN_HAS_ZBB))
+
+
+	/*
+	 * Returns
+	 *   a0 - String length
+	 *
+	 * Parameters
+	 *   a0 - String to measure
+	 *   a1 - Max length of string
+	 *
+	 * Clobbers
+	 *   t0, t1, t2
+	 */
+	addi	t1, a0, -1
+	add	t2, a0, a1
+1:
+	addi	t1, t1, 1
+	beq	t1, t2, 2f
+	lbu	t0, 0(t1)
+	bnez	t0, 1b
+2:
+	sub	a0, t1, a0
+	ret
+
+
+/*
+ * Variant of strnlen using the ZBB extension if available
+ */
+#if defined(CONFIG_RISCV_ISA_ZBB) && defined(CONFIG_TOOLCHAIN_HAS_ZBB)
+strnlen_zbb:
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+# define CZ	clz
+# define SHIFT	sll
+#else
+# define CZ	ctz
+# define SHIFT	srl
+#endif
+
+.option push
+.option arch,+zbb
+
+	/*
+	 * Returns
+	 *   a0 - String length
+	 *
+	 * Parameters
+	 *   a0 - String to measure
+	 *   a1 - Max length of string
+	 *
+	 * Clobbers
+	 *   t0, t1, t2, t3, t4
+	 */
+
+	/* If maxlen is 0, return 0. */
+	beqz	a1, 3f
+
+	/* Number of irrelevant bytes in the first word. */
+	andi	t2, a0, SZREG-1
+
+	/* Align pointer. */
+	andi	t0, a0, -SZREG
+
+	li	t3, SZREG
+	sub	t3, t3, t2
+	slli	t2, t2, 3
+
+	/* Aligned boundary. */
+	add	t4, a0, a1
+	andi	t4, t4, -SZREG
+
+	/* Get the first word.  */
+	REG_L	t1, 0(t0)
+
+	/*
+	 * Shift away the partial data we loaded to remove the irrelevant bytes
+	 * preceding the string with the effect of adding NUL bytes at the
+	 * end of the string's first word.
+	 */
+	SHIFT	t1, t1, t2
+
+	/* Convert non-NUL into 0xff and NUL into 0x00. */
+	orc.b	t1, t1
+
+	/* Convert non-NUL into 0x00 and NUL into 0xff. */
+	not	t1, t1
+
+	/*
+	 * Search for the first set bit (corresponding to a NUL byte in the
+	 * original chunk).
+	 */
+	CZ	t1, t1
+
+	/*
+	 * The first chunk is special: compare against the number
+	 * of valid bytes in this chunk.
+	 */
+	srli	a0, t1, 3
+
+	/* Limit the result by maxlen. */
+	bleu	a1, a0, 3f
+
+	bgtu	t3, a0, 2f
+
+	/* Prepare for the word comparison loop. */
+	addi	t2, t0, SZREG
+	li	t3, -1
+
+	/*
+	 * Our critical loop is 4 instructions and processes data in
+	 * 4 byte or 8 byte chunks.
+	 */
+	.p2align 3
+1:
+	REG_L	t1, SZREG(t0)
+	addi	t0, t0, SZREG
+	orc.b	t1, t1
+	bgeu	t0, t4, 4f
+	beq	t1, t3, 1b
+4:
+	not	t1, t1
+	CZ	t1, t1
+	srli	t1, t1, 3
+
+	/* Get number of processed bytes. */
+	sub	t2, t0, t2
+
+	/* Add number of characters in the first word.  */
+	add	a0, a0, t2
+
+	/* Add number of characters in the last word.  */
+	add	a0, a0, t1
+
+	/* Ensure the final result does not exceed maxlen. */
+	bgeu	a0, a1, 3f
+2:
+	ret
+3:
+	mv	a0, a1
+	ret
+
+.option pop
+#endif
+SYM_FUNC_END(strnlen)
+SYM_FUNC_ALIAS(__pi_strnlen, strnlen)
+EXPORT_SYMBOL(strnlen)
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 530e497ca2f9..d7c0533108be 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -2,7 +2,7 @@
 
 purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
 ifeq ($(CONFIG_KASAN_GENERIC)$(CONFIG_KASAN_SW_TAGS),)
-purgatory-y += strcmp.o strlen.o strncmp.o
+purgatory-y += strcmp.o strlen.o strncmp.o strnlen.o
 endif
 
 targets += $(purgatory-y)
@@ -32,6 +32,9 @@ $(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
 $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+$(obj)/strnlen.o: $(srctree)/arch/riscv/lib/strnlen.S FORCE
+	$(call if_changed_rule,as_o_S)
+
 CFLAGS_sha256.o := -D__DISABLE_EXPORTS -D__NO_FORTIFY
 CFLAGS_string.o := -D__DISABLE_EXPORTS
 CFLAGS_ctype.o := -D__DISABLE_EXPORTS
-- 
2.25.1
Re: [PATCH v3 6/8] riscv: lib: add strnlen implementation
Posted by Qingfang Deng 2 weeks, 5 days ago
On Tue, 20 Jan 2026 14:58:50 +0800, Feng Jiang wrote:
> diff --git a/arch/riscv/lib/strnlen.S b/arch/riscv/lib/strnlen.S

Branches that test maxlen can be replaced with Zbb minu instruction.
(see below)

> +	/*
> +	 * Returns
> +	 *   a0 - String length
> +	 *
> +	 * Parameters
> +	 *   a0 - String to measure
> +	 *   a1 - Max length of string
> +	 *
> +	 * Clobbers
> +	 *   t0, t1, t2, t3, t4
> +	 */
> +
> +	/* If maxlen is 0, return 0. */
> +	beqz	a1, 3f
> +
> +	/* Number of irrelevant bytes in the first word. */
> +	andi	t2, a0, SZREG-1
> +
> +	/* Align pointer. */
> +	andi	t0, a0, -SZREG
> +
> +	li	t3, SZREG
> +	sub	t3, t3, t2
> +	slli	t2, t2, 3
> +
> +	/* Aligned boundary. */
> +	add	t4, a0, a1
> +	andi	t4, t4, -SZREG
> +
> +	/* Get the first word.  */
> +	REG_L	t1, 0(t0)
> +
> +	/*
> +	 * Shift away the partial data we loaded to remove the irrelevant bytes
> +	 * preceding the string with the effect of adding NUL bytes at the
> +	 * end of the string's first word.
> +	 */
> +	SHIFT	t1, t1, t2
> +
> +	/* Convert non-NUL into 0xff and NUL into 0x00. */
> +	orc.b	t1, t1
> +
> +	/* Convert non-NUL into 0x00 and NUL into 0xff. */
> +	not	t1, t1
> +
> +	/*
> +	 * Search for the first set bit (corresponding to a NUL byte in the
> +	 * original chunk).
> +	 */
> +	CZ	t1, t1
> +
> +	/*
> +	 * The first chunk is special: compare against the number
> +	 * of valid bytes in this chunk.
> +	 */
> +	srli	a0, t1, 3
> +
> +	/* Limit the result by maxlen. */
> +	bleu	a1, a0, 3f

minu	a0, a0, a1

> +
> +	bgtu	t3, a0, 2f
> +
> +	/* Prepare for the word comparison loop. */
> +	addi	t2, t0, SZREG
> +	li	t3, -1
> +
> +	/*
> +	 * Our critical loop is 4 instructions and processes data in
> +	 * 4 byte or 8 byte chunks.
> +	 */
> +	.p2align 3
> +1:
> +	REG_L	t1, SZREG(t0)
> +	addi	t0, t0, SZREG
> +	orc.b	t1, t1
> +	bgeu	t0, t4, 4f
> +	beq	t1, t3, 1b
> +4:
> +	not	t1, t1
> +	CZ	t1, t1
> +	srli	t1, t1, 3
> +
> +	/* Get number of processed bytes. */
> +	sub	t2, t0, t2
> +
> +	/* Add number of characters in the first word.  */
> +	add	a0, a0, t2
> +
> +	/* Add number of characters in the last word.  */
> +	add	a0, a0, t1
> +
> +	/* Ensure the final result does not exceed maxlen. */
> +	bgeu	a0, a1, 3f

minu	a0, a0, a1

> +2:
> +	ret
> +3:
> +	mv	a0, a1
> +	ret
> +
> +.option pop

--
Qingfang
Re: [PATCH v3 6/8] riscv: lib: add strnlen implementation
Posted by Feng Jiang 2 weeks, 3 days ago
On 2026/1/21 15:24, Qingfang Deng wrote:
> On Tue, 20 Jan 2026 14:58:50 +0800, Feng Jiang wrote:
>> diff --git a/arch/riscv/lib/strnlen.S b/arch/riscv/lib/strnlen.S
> 
> Branches that test maxlen can be replaced with Zbb minu instruction.
> (see below)
> 

...

>> +	/*
>> +	 * The first chunk is special: compare against the number
>> +	 * of valid bytes in this chunk.
>> +	 */
>> +	srli	a0, t1, 3
>> +
>> +	/* Limit the result by maxlen. */
>> +	bleu	a1, a0, 3f
> 
> minu	a0, a0, a1
> 
>> +
>> +	bgtu	t3, a0, 2f
>> +
>> +	/* Prepare for the word comparison loop. */
>> +	addi	t2, t0, SZREG
>> +	li	t3, -1
>> +
>> +	/*
>> +	 * Our critical loop is 4 instructions and processes data in
>> +	 * 4 byte or 8 byte chunks.
>> +	 */
>> +	.p2align 3
>> +1:
>> +	REG_L	t1, SZREG(t0)
>> +	addi	t0, t0, SZREG
>> +	orc.b	t1, t1
>> +	bgeu	t0, t4, 4f
>> +	beq	t1, t3, 1b
>> +4:
>> +	not	t1, t1
>> +	CZ	t1, t1
>> +	srli	t1, t1, 3
>> +
>> +	/* Get number of processed bytes. */
>> +	sub	t2, t0, t2
>> +
>> +	/* Add number of characters in the first word.  */
>> +	add	a0, a0, t2
>> +
>> +	/* Add number of characters in the last word.  */
>> +	add	a0, a0, t1
>> +
>> +	/* Ensure the final result does not exceed maxlen. */
>> +	bgeu	a0, a1, 3f
> 
> minu	a0, a0, a1
> 

Thanks for the great suggestion! I see your point now—using minu is indeed a much
more elegant and efficient way to handle the maxlen constraint. It nicely eliminates
unnecessary branches and simplifies the code while still allowing for early returns.

I'll incorporate this into a v4 patch and add a Suggested-by tag for you. Thanks
again for your insightful review!

-- 
With Best Regards,
Feng Jiang

Re: [PATCH v3 6/8] riscv: lib: add strnlen implementation
Posted by Andy Shevchenko 2 weeks, 6 days ago
On Tue, Jan 20, 2026 at 02:58:50PM +0800, Feng Jiang wrote:
> Add an optimized strnlen() implementation for RISC-V. This version
> includes a generic word-at-a-time optimization and a Zbb-powered
> optimization using the 'orc.b' instruction, derived from the strlen
> implementation.
> 
> Benchmark results (QEMU TCG, rv64):
>   Length | Original (MB/s) | Optimized (MB/s) | Improvement
>   -------|-----------------|------------------|------------
>   16 B   | 189             | 310              | +64.0%
>   512 B  | 344             | 1535             | +346.2%
>   4096 B | 363             | 1854             | +410.7%

> Suggested-by: Andy Shevchenko <andy@kernel.org>

Wrong tag, I have zero knowledge about RISC V.

-- 
With Best Regards,
Andy Shevchenko
Re: [PATCH v3 6/8] riscv: lib: add strnlen implementation
Posted by Feng Jiang 2 weeks, 5 days ago
On 2026/1/20 15:31, Andy Shevchenko wrote:
> On Tue, Jan 20, 2026 at 02:58:50PM +0800, Feng Jiang wrote:
>> Add an optimized strnlen() implementation for RISC-V. This version
>> includes a generic word-at-a-time optimization and a Zbb-powered
>> optimization using the 'orc.b' instruction, derived from the strlen
>> implementation.
>>
>> Benchmark results (QEMU TCG, rv64):
>>   Length | Original (MB/s) | Optimized (MB/s) | Improvement
>>   -------|-----------------|------------------|------------
>>   16 B   | 189             | 310              | +64.0%
>>   512 B  | 344             | 1535             | +346.2%
>>   4096 B | 363             | 1854             | +410.7%
> 
>> Suggested-by: Andy Shevchenko <andy@kernel.org>
> 
> Wrong tag, I have zero knowledge about RISC V.
> 

Sorry for the confusion. I misunderstood the scope of the 'Suggested-by' tag.
I will remove it from the RISC-V specific implementation patches and only keep
relevant credits in the benchmarking/testing patches where your feedback was
applied. 

Thanks for clarifying!

-- 
With Best Regards,
Feng Jiang