lib/Kconfig.debug | 9 ++++ lib/tests/memcpy_kunit.c | 107 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+)
Add optional benchmarks for memcpy() and memmove() functions.
Each benchmark is run with different size and both aligned and unaligned
buffers, to spot unaligned issues on platforms where they have a noticeable
performance impact.
Sample output on a RISC-V machine:
# modprobe memcpy_kunit
KTAP version 1
1..1
KTAP version 1
# Subtest: memcpy
# module: memcpy_kunit
1..8
[...]
# memcpy_bench_test: memcpy: aligned copy of len 2: 6 MB/s
# memcpy_bench_test: memcpy: unaligned copy of len 2: 6 MB/s
# memcpy_bench_test: memcpy: aligned copy of len 64: 179 MB/s
# memcpy_bench_test: memcpy: unaligned copy of len 64: 170 MB/s
# memcpy_bench_test: memcpy: aligned copy of len 256: 697 MB/s
# memcpy_bench_test: memcpy: unaligned copy of len 256: 421 MB/s
# memcpy_bench_test: memcpy: aligned copy of len 4194304: 935 MB/s
# memcpy_bench_test: memcpy: unaligned copy of len 4194304: 333 MB/s
# memcpy_bench_test.speed: slow
ok 7 memcpy_bench_test
# memmove_bench_test: memmove: aligned move of len 64: 162 MB/s
# memmove_bench_test: memmove: unaligned move of len 64: 162 MB/s
# memmove_bench_test: memmove: aligned move of len 256: 647 MB/s
# memmove_bench_test: memmove: unaligned move of len 256: 647 MB/s
# memmove_bench_test: memmove: aligned move of len 4194304: 1540 MB/s
# memmove_bench_test: memmove: unaligned move of len 4194304: 1557 MB/s
# memmove_bench_test.speed: slow
ok 8 memmove_bench_test
# memcpy: pass:8 fail:0 skip:0 total:8
# Totals: pass:8 fail:0 skip:0 total:8
ok 1 memcpy
Signed-off-by: Matteo Croce <teknoraver@meta.com>
---
lib/Kconfig.debug | 9 ++++
lib/tests/memcpy_kunit.c | 107 +++++++++++++++++++++++++++++++++++++++
2 files changed, 116 insertions(+)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ba36939fda79..02868c4397cb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2880,6 +2880,15 @@ config MEMCPY_KUNIT_TEST
If unsure, say N.
+config MEMCPY_KUNIT_BENCHMARK
+ bool "Benchmark string functions"
+ depends on MEMCPY_KUNIT_TEST
+ help
+ A benchmark for memcpy() and memmove() functions,
+ with both aligned and unaligned buffers.
+
+ If unsure, say N.
+
config IS_SIGNED_TYPE_KUNIT_TEST
tristate "Test is_signed_type() macro" if !KUNIT_ALL_TESTS
depends on KUNIT
diff --git a/lib/tests/memcpy_kunit.c b/lib/tests/memcpy_kunit.c
index d36933554e46..e7588b868125 100644
--- a/lib/tests/memcpy_kunit.c
+++ b/lib/tests/memcpy_kunit.c
@@ -493,6 +493,109 @@ static void memmove_overlap_test(struct kunit *test)
}
}
+#ifdef CONFIG_MEMCPY_KUNIT_BENCHMARK
+
+#define COPIES_NUM 100
+
+static void memcpy_bench_size_align(struct kunit *test, int size, bool unalign)
+{
+ u64 start, end, total_ns = 0;
+ char *dst, *src;
+ int ret = 0;
+
+ dst = kzalloc(size, GFP_KERNEL);
+ if (!dst) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ src = kzalloc(size, GFP_KERNEL);
+ if (!src) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ for (int i = 0; i < COPIES_NUM; i++) {
+ local_irq_disable();
+ start = ktime_get_ns();
+ memcpy(dst + unalign, src, size - unalign);
+ end = ktime_get_ns();
+ local_irq_enable();
+ total_ns += end - start;
+ }
+
+ /* Avoid division by zero */
+ if (!total_ns)
+ total_ns = 1;
+
+ kunit_info(test, "memcpy: %saligned copy of len %d: %lld MB/s\n",
+ unalign ? "un" : "", size,
+ (COPIES_NUM * size * 1000000000ULL / total_ns) / (1024 * 1024));
+
+ kfree(src);
+
+out_free:
+ kfree(dst);
+
+out:
+ KUNIT_ASSERT_EQ(test, ret, 0);
+}
+
+static void memcpy_bench_size(struct kunit *test, int size)
+{
+ memcpy_bench_size_align(test, size, false);
+ memcpy_bench_size_align(test, size, true);
+}
+
+static void memcpy_bench_test(struct kunit *test)
+{
+ memcpy_bench_size(test, 2);
+ memcpy_bench_size(test, 64);
+ memcpy_bench_size(test, 256);
+ memcpy_bench_size(test, PAGE_SIZE << MAX_PAGE_ORDER);
+}
+
+static void memmove_bench_size_align(struct kunit *test, int size, bool unalign)
+{
+ u64 start, end, total_ns = 0;
+ char *buf;
+ const int shift = size / 10;
+
+ buf = kzalloc(size, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, buf);
+
+ for (int i = 0; i < COPIES_NUM; i++) {
+ local_irq_disable();
+ start = ktime_get_ns();
+ memmove(buf + shift + unalign, buf, size - shift - unalign);
+ end = ktime_get_ns();
+ local_irq_enable();
+ total_ns += end - start;
+ }
+
+ if (!total_ns)
+ total_ns = 1;
+
+ kunit_info(test, "memmove: %saligned move of len %d: %lld MB/s\n",
+ unalign ? "un" : "", size,
+ (COPIES_NUM * (size - shift) * 1000000000ULL / total_ns) / (1024 * 1024));
+ kfree(buf);
+}
+
+static void memmove_bench_size(struct kunit *test, int size)
+{
+ memmove_bench_size_align(test, size, false);
+ memmove_bench_size_align(test, size, true);
+}
+
+static void memmove_bench_test(struct kunit *test)
+{
+ memmove_bench_size(test, 64);
+ memmove_bench_size(test, 256);
+ memmove_bench_size(test, PAGE_SIZE << MAX_PAGE_ORDER);
+}
+#endif
+
static struct kunit_case memcpy_test_cases[] = {
KUNIT_CASE(memset_test),
KUNIT_CASE(memcpy_test),
@@ -500,6 +603,10 @@ static struct kunit_case memcpy_test_cases[] = {
KUNIT_CASE_SLOW(memmove_test),
KUNIT_CASE_SLOW(memmove_large_test),
KUNIT_CASE_SLOW(memmove_overlap_test),
+#ifdef CONFIG_MEMCPY_KUNIT_BENCHMARK
+ KUNIT_CASE_SLOW(memcpy_bench_test),
+ KUNIT_CASE_SLOW(memmove_bench_test),
+#endif
{}
};
--
2.52.0
© 2016 - 2026 Red Hat, Inc.