[v5] riscv: optimize string functions and add kunit tests

[PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Feng Jiang 1 week, 6 days ago

Introduce a benchmarking framework to the string_kunit test suite to
measure the execution efficiency of string functions.

The implementation is inspired by crc_benchmark(), measuring throughput
(MB/s) and latency (ns/call) across a range of string lengths. It
includes a warm-up phase, disables preemption during measurement, and
uses a fixed seed for reproducible results.

This framework allows for comparing different implementations (e.g.,
generic C vs. architecture-optimized assembly) within the KUnit
environment.

Initially, provide a benchmark for strlen().

Suggested-by: Andy Shevchenko <andy@kernel.org>
Suggested-by: Eric Biggers <ebiggers@kernel.org>
Signed-off-by: Feng Jiang <jiangfeng@kylinos.cn>
---
 lib/Kconfig.debug        |  11 +++
 lib/tests/string_kunit.c | 159 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba36939fda79..21b058ae815f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2475,6 +2475,17 @@ config STRING_HELPERS_KUNIT_TEST
 	depends on KUNIT
 	default KUNIT_ALL_TESTS
 
+config STRING_KUNIT_BENCH
+       bool "Benchmark string functions at runtime"
+       depends on STRING_KUNIT_TEST
+       help
+         Enable performance measurement for string functions.
+
+         This measures the execution efficiency of string functions
+         during the KUnit test run.
+
+         If unsure, say N.
+
 config FFS_KUNIT_TEST
 	tristate "KUnit test ffs-family functions at runtime" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/tests/string_kunit.c b/lib/tests/string_kunit.c
index 7f1e2bf6a352..3fcd55953bd7 100644
--- a/lib/tests/string_kunit.c
+++ b/lib/tests/string_kunit.c
@@ -6,10 +6,15 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <kunit/test.h>
+#include <linux/ktime.h>
+#include <linux/math64.h>
 #include <linux/module.h>
+#include <linux/prandom.h>
 #include <linux/printk.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/time64.h>
+#include <linux/units.h>
 
 #define STRCMP_LARGE_BUF_LEN 2048
 #define STRCMP_CHANGE_POINT 1337
@@ -20,6 +25,9 @@
 #define STRING_TEST_MAX_LEN	128
 #define STRING_TEST_MAX_OFFSET	16
 
+#define STRING_BENCH_SEED	888
+#define STRING_BENCH_WORKLOAD	(1 * MEGA)
+
 static void string_test_memset16(struct kunit *test)
 {
 	unsigned i, j, k;
@@ -700,6 +708,156 @@ static void string_test_strends(struct kunit *test)
 	KUNIT_EXPECT_TRUE(test, strends("", ""));
 }
 
+#if IS_ENABLED(CONFIG_STRING_KUNIT_BENCH)
+/* Target string lengths for benchmarking */
+static const size_t bench_lens[] = {
+	0, 1, 7, 8, 16, 31, 64, 127, 512, 1024, 3173, 4096,
+};
+
+/**
+ * alloc_max_bench_buffer() - Allocate buffer for the max test case.
+ * @test: KUnit context for managed allocation.
+ * @lens: Array of lengths used in the benchmark cases.
+ * @count: Number of elements in the @lens array.
+ * @buf_len: [out] Pointer to store the actually allocated buffer
+ * size (including NUL character).
+ *
+ * Return: Pointer to the allocated memory, or NULL on failure.
+ */
+static void *alloc_max_bench_buffer(struct kunit *test,
+		const size_t *lens, size_t count, size_t *buf_len)
+{
+	size_t max_len = 0;
+	void *buf;
+
+	for (size_t i = 0; i < count; i++)
+		max_len = max(lens[i], max_len);
+
+	/* Add space for NUL character */
+	max_len += 1;
+
+	buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	if (buf_len)
+		*buf_len = max_len;
+
+	return buf;
+}
+
+/**
+ * fill_random_string() - Populate a buffer with a random NUL-terminated string.
+ * @buf: Buffer to fill.
+ * @len: Length of the buffer in bytes.
+ *
+ * Fills the buffer with random non-NUL bytes and ensures the string is
+ * properly NUL-terminated.
+ */
+static void fill_random_string(char *buf, size_t len)
+{
+	struct rnd_state state;
+
+	if (!buf || !len)
+		return;
+
+	/* Use a fixed seed to ensure deterministic benchmark results */
+	prandom_seed_state(&state, STRING_BENCH_SEED);
+	prandom_bytes_state(&state, buf, len);
+
+	/* Replace NUL characters to avoid early string termination */
+	for (size_t i = 0; i < len; i++) {
+		if (buf[i] == '\0')
+			buf[i] = 0x01;
+	}
+
+	buf[len - 1] = '\0';
+}
+
+/**
+ * STRING_BENCH() - Benchmark string functions.
+ * @iters: Number of iterations to run.
+ * @func: Function to benchmark.
+ * @...: Variable arguments passed to @func.
+ *
+ * Disables preemption and measures the total time in nanoseconds to execute
+ * @func(@__VA_ARGS__) for @iters times, including a small warm-up phase.
+ *
+ * Context: Disables preemption during measurement.
+ * Return: Total execution time in nanoseconds (u64).
+ */
+#define STRING_BENCH(iters, func, ...)					\
+({									\
+	/* Volatile function pointer prevents dead code elimination */	\
+	typeof(func) (* volatile __func) = (func);			\
+	size_t __bn_iters = (iters);					\
+	size_t __bn_warm_iters;						\
+	u64 __bn_t;							\
+									\
+	__bn_warm_iters = max(__bn_iters / 10, 50U);			\
+									\
+	for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++)	\
+		(void)__func(__VA_ARGS__);				\
+									\
+	preempt_disable();						\
+	__bn_t = ktime_get_ns();					\
+	for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++)		\
+		(void)__func(__VA_ARGS__);				\
+	__bn_t = ktime_get_ns() - __bn_t;				\
+	preempt_enable();						\
+	__bn_t;								\
+})
+
+/**
+ * STRING_BENCH_BUF() - Benchmark harness for single-buffer functions.
+ * @test: KUnit context.
+ * @buf_name: Local char * variable name to be defined.
+ * @buf_size: Local size_t variable name to be defined.
+ * @func: Function to benchmark.
+ * @...: Extra arguments for @func.
+ *
+ * Prepares a randomized, NUL-terminated buffer and iterates through lengths
+ * in bench_lens, defining @buf_name and @buf_size in each loop.
+ */
+#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
+do {									\
+	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
+	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
+	char *buf_name, *_bn_buf;					\
+									\
+	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
+			ARRAY_SIZE(bench_lens), &_bn_size);		\
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
+									\
+	fill_random_string(_bn_buf, _bn_size);				\
+									\
+	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
+		buf_size = bench_lens[_bn_i];				\
+		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
+		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
+									\
+		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
+									\
+		if (_bn_t > 0) {					\
+			_bn_mbps = (u64)(buf_size) * _bn_iters		\
+					* (NSEC_PER_SEC / MEGA);	\
+			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
+			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
+		}							\
+		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
+				buf_size, _bn_mbps, _bn_lat);		\
+	}								\
+} while (0)
+#else
+#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
+	kunit_skip(test, "not enabled")
+#endif /* IS_ENABLED(CONFIG_STRING_KUNIT_BENCH) */
+
+static void string_bench_strlen(struct kunit *test)
+{
+	STRING_BENCH_BUF(test, buf, len, strlen, buf);
+}
+
 static struct kunit_case string_test_cases[] = {
 	KUNIT_CASE(string_test_memset16),
 	KUNIT_CASE(string_test_memset32),
@@ -725,6 +883,7 @@ static struct kunit_case string_test_cases[] = {
 	KUNIT_CASE(string_test_strtomem),
 	KUNIT_CASE(string_test_memtostr),
 	KUNIT_CASE(string_test_strends),
+	KUNIT_CASE(string_bench_strlen),
 	{}
 };
 
-- 
2.25.1

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Andy Shevchenko 1 week, 6 days ago

On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
> Introduce a benchmarking framework to the string_kunit test suite to
> measure the execution efficiency of string functions.
> 
> The implementation is inspired by crc_benchmark(), measuring throughput
> (MB/s) and latency (ns/call) across a range of string lengths. It
> includes a warm-up phase, disables preemption during measurement, and
> uses a fixed seed for reproducible results.
> 
> This framework allows for comparing different implementations (e.g.,
> generic C vs. architecture-optimized assembly) within the KUnit
> environment.

Acked-by: Andy Shevchenko <andy@kernel.org>
A few nit-picks below.

...

> +static void *alloc_max_bench_buffer(struct kunit *test,
> +		const size_t *lens, size_t count, size_t *buf_len)
> +{
> +	size_t max_len = 0;
> +	void *buf;
> +
> +	for (size_t i = 0; i < count; i++)
> +		max_len = max(lens[i], max_len);

You also need minmax.h.

> +	/* Add space for NUL character */
> +	max_len += 1;
> +
> +	buf = kunit_kzalloc(test, max_len, GFP_KERNEL);
> +	if (!buf)
> +		return NULL;
> +
> +	if (buf_len)
> +		*buf_len = max_len;
> +
> +	return buf;
> +}

...

> +#define STRING_BENCH(iters, func, ...)					\
> +({									\
> +	/* Volatile function pointer prevents dead code elimination */	\
> +	typeof(func) (* volatile __func) = (func);			\
> +	size_t __bn_iters = (iters);					\
> +	size_t __bn_warm_iters;						\
> +	u64 __bn_t;							\


Perhaps a short comment here

	/* Use 10% of the given iterations (maximum 50) to warm up */

> +	__bn_warm_iters = max(__bn_iters / 10, 50U);			\
> +									\
> +	for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++)	\
> +		(void)__func(__VA_ARGS__);				\
> +									\
> +	preempt_disable();						\
> +	__bn_t = ktime_get_ns();					\
> +	for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++)		\
> +		(void)__func(__VA_ARGS__);				\
> +	__bn_t = ktime_get_ns() - __bn_t;				\
> +	preempt_enable();						\
> +	__bn_t;								\
> +})

...

> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
> +do {									\
> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
> +	char *buf_name, *_bn_buf;					\
> +									\
> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
> +									\
> +	fill_random_string(_bn_buf, _bn_size);				\
> +									\
> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
> +		buf_size = bench_lens[_bn_i];				\
> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
> +									\
> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\

> +									\

Remove unneeded blank line.

> +		if (_bn_t > 0) {					\
> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\

Why buf_size in the parentheses here and not anywhere else (above)?
I assume it's just an external temporary variable? But why do we need to have
it in the parameters to the macro?

> +					* (NSEC_PER_SEC / MEGA);	\

Leave '*' on the previous line.

> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
> +		}							\
> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
> +				buf_size, _bn_mbps, _bn_lat);		\
> +	}								\
> +} while (0)

-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Feng Jiang 1 week, 6 days ago

On 2026/1/27 16:57, Andy Shevchenko wrote:
> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
>> Introduce a benchmarking framework to the string_kunit test suite to
>> measure the execution efficiency of string functions.
>>
>> The implementation is inspired by crc_benchmark(), measuring throughput
>> (MB/s) and latency (ns/call) across a range of string lengths. It
>> includes a warm-up phase, disables preemption during measurement, and
>> uses a fixed seed for reproducible results.
>>
>> This framework allows for comparing different implementations (e.g.,
>> generic C vs. architecture-optimized assembly) within the KUnit
>> environment.
> 
> Acked-by: Andy Shevchenko <andy@kernel.org>
> A few nit-picks below.
> 

Thanks for the Acked-by and the detailed review.

> ...
> 
>> +static void *alloc_max_bench_buffer(struct kunit *test,
>> +		const size_t *lens, size_t count, size_t *buf_len)
>> +{
>> +	size_t max_len = 0;
>> +	void *buf;
>> +
>> +	for (size_t i = 0; i < count; i++)
>> +		max_len = max(lens[i], max_len);
> 
> You also need minmax.h.

Will add.

> ...
> 
>> +#define STRING_BENCH(iters, func, ...)					\
>> +({									\
>> +	/* Volatile function pointer prevents dead code elimination */	\
>> +	typeof(func) (* volatile __func) = (func);			\
>> +	size_t __bn_iters = (iters);					\
>> +	size_t __bn_warm_iters;						\
>> +	u64 __bn_t;							\
> 
> 
> Perhaps a short comment here
> 
> 	/* Use 10% of the given iterations (maximum 50) to warm up */

Will add this comment.

>> +	__bn_warm_iters = max(__bn_iters / 10, 50U);			\
>> +									\
>> +	for (size_t __bn_i = 0; __bn_i < __bn_warm_iters; __bn_i++)	\
>> +		(void)__func(__VA_ARGS__);				\
>> +									\
>> +	preempt_disable();						\
>> +	__bn_t = ktime_get_ns();					\
>> +	for (size_t __bn_i = 0; __bn_i < __bn_iters; __bn_i++)		\
>> +		(void)__func(__VA_ARGS__);				\
>> +	__bn_t = ktime_get_ns() - __bn_t;				\
>> +	preempt_enable();						\
>> +	__bn_t;								\
>> +})
> 
> ...
> 
>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
>> +do {									\
>> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
>> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
>> +	char *buf_name, *_bn_buf;					\
>> +									\
>> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
>> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
>> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
>> +									\
>> +	fill_random_string(_bn_buf, _bn_size);				\
>> +									\
>> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
>> +		buf_size = bench_lens[_bn_i];				\
>> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
>> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
>> +									\
>> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
> 
>> +									\
> 
> Remove unneeded blank line.

Will fix.

>> +		if (_bn_t > 0) {					\
>> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\
> 
> Why buf_size in the parentheses here and not anywhere else (above)?

It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.

> I assume it's just an external temporary variable? But why do we need to have
> it in the parameters to the macro?

This is necessary because buf_size often needs to be passed as an argument
to the function under test. For instance, when benchmarking strnlen, the
caller must pass the current length as an argument:
STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);

>> +					* (NSEC_PER_SEC / MEGA);	\
> > Leave '*' on the previous line.

Will fix.

>> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
>> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
>> +		}							\
>> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
>> +				buf_size, _bn_mbps, _bn_lat);		\
>> +	}								\
>> +} while (0)
> 

Thank you again for your patience and for helping me improve the quality of
this patch.

-- 
With Best Regards,
Feng Jiang

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Andy Shevchenko 1 week, 6 days ago

On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
> On 2026/1/27 16:57, Andy Shevchenko wrote:
> > On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:

...

> >> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
> >> +do {									\
> >> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
> >> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
> >> +	char *buf_name, *_bn_buf;					\
> >> +									\
> >> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
> >> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
> >> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
> >> +									\
> >> +	fill_random_string(_bn_buf, _bn_size);				\
> >> +									\
> >> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
> >> +		buf_size = bench_lens[_bn_i];				\
> >> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
> >> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
> >> +									\
> >> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
> > 
> >> +									\
> > 
> > Remove unneeded blank line.
> 
> Will fix.
> 
> >> +		if (_bn_t > 0) {					\
> >> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\
> > 
> > Why buf_size in the parentheses here and not anywhere else (above)?
> 
> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
> 
> > I assume it's just an external temporary variable? But why do we need to have
> > it in the parameters to the macro?
> 
> This is necessary because buf_size often needs to be passed as an argument
> to the function under test. For instance, when benchmarking strnlen, the
> caller must pass the current length as an argument:
> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);

Okay, and why is it needed in this macro? It get overridden immediately in
the loop. Assuming that the array size of bench lengths is not zero, this
has no visible use. Can you elaborate?

> >> +					* (NSEC_PER_SEC / MEGA);	\
> > > Leave '*' on the previous line.
> 
> Will fix.
> 
> >> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
> >> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
> >> +		}							\
> >> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
> >> +				buf_size, _bn_mbps, _bn_lat);		\
> >> +	}								\
> >> +} while (0)

-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Feng Jiang 1 week, 5 days ago

On 2026/1/27 17:50, Andy Shevchenko wrote:
> On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
>> On 2026/1/27 16:57, Andy Shevchenko wrote:
>>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
> 
> ...
> 
>>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
>>>> +do {									\
>>>> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
>>>> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
>>>> +	char *buf_name, *_bn_buf;					\
>>>> +									\
>>>> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
>>>> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
>>>> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
>>>> +									\
>>>> +	fill_random_string(_bn_buf, _bn_size);				\
>>>> +									\
>>>> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
>>>> +		buf_size = bench_lens[_bn_i];				\
>>>> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
>>>> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
>>>> +									\
>>>> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
>>>
>>>> +									\
>>>
>>> Remove unneeded blank line.
>>
>> Will fix.
>>
>>>> +		if (_bn_t > 0) {					\
>>>> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\
>>>
>>> Why buf_size in the parentheses here and not anywhere else (above)?
>>
>> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
>>
>>> I assume it's just an external temporary variable? But why do we need to have
>>> it in the parameters to the macro?
>>
>> This is necessary because buf_size often needs to be passed as an argument
>> to the function under test. For instance, when benchmarking strnlen, the
>> caller must pass the current length as an argument:
>> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
> 
> Okay, and why is it needed in this macro? It get overridden immediately in
> the loop. Assuming that the array size of bench lengths is not zero, this
> has no visible use. Can you elaborate?

Thank you for the explanation. I see the source of the confusion now.

In v5, buf_name and buf_size were not intended to pass external variables into
the macro. Instead, they were naming placeholders for local variables declared
inside the macro's scope. This allows the caller to define the names used in
the variadic arguments.

To resolve the logical inconsistency you pointed out, I'd like to propose two
options for v6. Which one would you prefer?

Option 1: Internal Declaration (The "Self-Contained" approach)

We declare and initialize the variables directly inside the loop. This keeps
the macro self-contained and the caller doesn't need to pre-declare anything.

for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
    size_t buf_size = bench_lens[_bn_i];
    char *buf_name = _bn_buf + _bn_size - buf_size - 1;
    ...
}

Usage:
  STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);

Option 2: External Declaration (The list.h approach)

The macro expects the caller to provide pre-declared variables, similar to
list_for_each_entry(). This removes all re-declarations inside the macro.

for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
    buf_size = bench_lens[_bn_i];
    buf_name = _bn_buf + _bn_size - buf_size - 1;
    ...
}

Usage:
  size_t my_len;
  char *my_buf;
  STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);

Please let me know which style fits the kernel's preference better, and
I will implement it in v6 along with your other suggestions.

Thanks for the catch!

> 
>>>> +					* (NSEC_PER_SEC / MEGA);	\
>>>> Leave '*' on the previous line.
>>
>> Will fix.
>>
>>>> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
>>>> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
>>>> +		}							\
>>>> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
>>>> +				buf_size, _bn_mbps, _bn_lat);		\
>>>> +	}								\
>>>> +} while (0)
> 

-- 
With Best Regards,
Feng Jiang

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Andy Shevchenko 1 week, 5 days ago

On Wed, Jan 28, 2026 at 09:44:40AM +0800, Feng Jiang wrote:
> On 2026/1/27 17:50, Andy Shevchenko wrote:
> > On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
> >> On 2026/1/27 16:57, Andy Shevchenko wrote:
> >>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:

...

> >>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
> >>>> +do {									\
> >>>> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
> >>>> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
> >>>> +	char *buf_name, *_bn_buf;					\
> >>>> +									\
> >>>> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
> >>>> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
> >>>> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
> >>>> +									\
> >>>> +	fill_random_string(_bn_buf, _bn_size);				\
> >>>> +									\
> >>>> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
> >>>> +		buf_size = bench_lens[_bn_i];				\
> >>>> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
> >>>> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
> >>>> +									\
> >>>> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
> >>>
> >>>> +									\
> >>>
> >>> Remove unneeded blank line.
> >>
> >> Will fix.
> >>
> >>>> +		if (_bn_t > 0) {					\
> >>>> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\
> >>>
> >>> Why buf_size in the parentheses here and not anywhere else (above)?
> >>
> >> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
> >>
> >>> I assume it's just an external temporary variable? But why do we need to have
> >>> it in the parameters to the macro?
> >>
> >> This is necessary because buf_size often needs to be passed as an argument
> >> to the function under test. For instance, when benchmarking strnlen, the
> >> caller must pass the current length as an argument:
> >> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
> > 
> > Okay, and why is it needed in this macro? It get overridden immediately in
> > the loop. Assuming that the array size of bench lengths is not zero, this
> > has no visible use. Can you elaborate?
> 
> Thank you for the explanation. I see the source of the confusion now.
> 
> In v5, buf_name and buf_size were not intended to pass external variables into
> the macro. Instead, they were naming placeholders for local variables declared
> inside the macro's scope. This allows the caller to define the names used in
> the variadic arguments.
> 
> To resolve the logical inconsistency you pointed out, I'd like to propose two
> options for v6. Which one would you prefer?
> 
> Option 1: Internal Declaration (The "Self-Contained" approach)
> 
> We declare and initialize the variables directly inside the loop. This keeps
> the macro self-contained and the caller doesn't need to pre-declare anything.
> 
> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>     size_t buf_size = bench_lens[_bn_i];
>     char *buf_name = _bn_buf + _bn_size - buf_size - 1;
>     ...
> }
> 
> Usage:
>   STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);

This option is better as long as the user doesn't need to know the (stale) data
out of these parameters. And I think this is the case, so #1 is the winner.

> Option 2: External Declaration (The list.h approach)
> 
> The macro expects the caller to provide pre-declared variables, similar to
> list_for_each_entry(). This removes all re-declarations inside the macro.
> 
> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>     buf_size = bench_lens[_bn_i];
>     buf_name = _bn_buf + _bn_size - buf_size - 1;
>     ...
> }
> 
> Usage:
>   size_t my_len;
>   char *my_buf;
>   STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
> 
> Please let me know which style fits the kernel's preference better, and
> I will implement it in v6 along with your other suggestions.
> 
> Thanks for the catch!
> 
> >>>> +					* (NSEC_PER_SEC / MEGA);	\
> >>>> Leave '*' on the previous line.
> >>
> >> Will fix.
> >>
> >>>> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
> >>>> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
> >>>> +		}							\
> >>>> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
> >>>> +				buf_size, _bn_mbps, _bn_lat);		\
> >>>> +	}								\
> >>>> +} while (0)

-- 
With Best Regards,
Andy Shevchenko

Re: [PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()

Posted by Feng Jiang 1 week, 5 days ago

On 2026/1/28 16:59, Andy Shevchenko wrote:
> On Wed, Jan 28, 2026 at 09:44:40AM +0800, Feng Jiang wrote:
>> On 2026/1/27 17:50, Andy Shevchenko wrote:
>>> On Tue, Jan 27, 2026 at 05:33:10PM +0800, Feng Jiang wrote:
>>>> On 2026/1/27 16:57, Andy Shevchenko wrote:
>>>>> On Tue, Jan 27, 2026 at 09:25:54AM +0800, Feng Jiang wrote:
> 
> ...
> 
>>>>>> +#define STRING_BENCH_BUF(test, buf_name, buf_size, func, ...)		\
>>>>>> +do {									\
>>>>>> +	size_t buf_size, _bn_i, _bn_iters, _bn_size = 0;		\
>>>>>> +	u64 _bn_t, _bn_mbps = 0, _bn_lat = 0;				\
>>>>>> +	char *buf_name, *_bn_buf;					\
>>>>>> +									\
>>>>>> +	_bn_buf = alloc_max_bench_buffer(test, bench_lens,		\
>>>>>> +			ARRAY_SIZE(bench_lens), &_bn_size);		\
>>>>>> +	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, _bn_buf);			\
>>>>>> +									\
>>>>>> +	fill_random_string(_bn_buf, _bn_size);				\
>>>>>> +									\
>>>>>> +	for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {	\
>>>>>> +		buf_size = bench_lens[_bn_i];				\
>>>>>> +		buf_name = _bn_buf + _bn_size - buf_size - 1;		\
>>>>>> +		_bn_iters = STRING_BENCH_WORKLOAD / max(buf_size, 1U);	\
>>>>>> +									\
>>>>>> +		_bn_t = STRING_BENCH(_bn_iters, func, ##__VA_ARGS__);	\
>>>>>
>>>>>> +									\
>>>>>
>>>>> Remove unneeded blank line.
>>>>
>>>> Will fix.
>>>>
>>>>>> +		if (_bn_t > 0) {					\
>>>>>> +			_bn_mbps = (u64)(buf_size) * _bn_iters		\
>>>>>
>>>>> Why buf_size in the parentheses here and not anywhere else (above)?
>>>>
>>>> It was a bit inconsistent. I'll remove the unneeded parentheses for buf_size.
>>>>
>>>>> I assume it's just an external temporary variable? But why do we need to have
>>>>> it in the parameters to the macro?
>>>>
>>>> This is necessary because buf_size often needs to be passed as an argument
>>>> to the function under test. For instance, when benchmarking strnlen, the
>>>> caller must pass the current length as an argument:
>>>> STRING_BENCH_BUF(test, buf, len, strnlen, buf, len);
>>>
>>> Okay, and why is it needed in this macro? It get overridden immediately in
>>> the loop. Assuming that the array size of bench lengths is not zero, this
>>> has no visible use. Can you elaborate?
>>
>> Thank you for the explanation. I see the source of the confusion now.
>>
>> In v5, buf_name and buf_size were not intended to pass external variables into
>> the macro. Instead, they were naming placeholders for local variables declared
>> inside the macro's scope. This allows the caller to define the names used in
>> the variadic arguments.
>>
>> To resolve the logical inconsistency you pointed out, I'd like to propose two
>> options for v6. Which one would you prefer?
>>
>> Option 1: Internal Declaration (The "Self-Contained" approach)
>>
>> We declare and initialize the variables directly inside the loop. This keeps
>> the macro self-contained and the caller doesn't need to pre-declare anything.
>>
>> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>>     size_t buf_size = bench_lens[_bn_i];
>>     char *buf_name = _bn_buf + _bn_size - buf_size - 1;
>>     ...
>> }
>>
>> Usage:
>>   STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
> 
> This option is better as long as the user doesn't need to know the (stale) data
> out of these parameters. And I think this is the case, so #1 is the winner.

Thanks for the feedback. I'll incorporate this change, along with the other
improvements we discussed, and send out v6 shortly.

>> Option 2: External Declaration (The list.h approach)
>>
>> The macro expects the caller to provide pre-declared variables, similar to
>> list_for_each_entry(). This removes all re-declarations inside the macro.
>>
>> for (_bn_i = 0; _bn_i < ARRAY_SIZE(bench_lens); _bn_i++) {
>>     buf_size = bench_lens[_bn_i];
>>     buf_name = _bn_buf + _bn_size - buf_size - 1;
>>     ...
>> }
>>
>> Usage:
>>   size_t my_len;
>>   char *my_buf;
>>   STRING_BENCH_BUF(test, my_buf, my_len, strnlen, my_buf, my_len);
>>
>> Please let me know which style fits the kernel's preference better, and
>> I will implement it in v6 along with your other suggestions.
>>
>> Thanks for the catch!
>>
>>>>>> +					* (NSEC_PER_SEC / MEGA);	\
>>>>>> Leave '*' on the previous line.
>>>>
>>>> Will fix.
>>>>
>>>>>> +			_bn_mbps = div64_u64(_bn_mbps, _bn_t);		\
>>>>>> +			_bn_lat = div64_u64(_bn_t, _bn_iters);		\
>>>>>> +		}							\
>>>>>> +		kunit_info(test, "len=%zu: %llu MB/s (%llu ns/call)\n",	\
>>>>>> +				buf_size, _bn_mbps, _bn_lat);		\
>>>>>> +	}								\
>>>>>> +} while (0)
> 

-- 
With Best Regards,
Feng Jiang

[PATCH v5 1/8] lib/string_kunit: add correctness test for strlen()
[PATCH v5 2/8] lib/string_kunit: add correctness test for strnlen()
[PATCH v5 3/8] lib/string_kunit: add correctness test for strrchr()
[PATCH v5 4/8] lib/string_kunit: add performance benchmark for strlen()
[PATCH v5 5/8] lib/string_kunit: extend benchmarks to strnlen() and chr searches
[PATCH v5 6/8] riscv: lib: add strnlen() implementation
[PATCH v5 7/8] riscv: lib: add strchr() implementation
[PATCH v5 8/8] riscv: lib: add strrchr() implementation