From: sunliming <sunliming@kylinos.cn>
Optimizing the raid6_select_algo time. In raid6_select_algo(), an raid6 gen
algorithm is first selected quickly through synchronous processing, while
the time-consuming process of selecting the optimal algorithm via benchmarking
is handled asynchronously. This approach speeds up the overall startup time
and ultimately ensures the selection of an optimal algorithm.
Signed-off-by: sunliming <sunliming@kylinos.cn>
---
lib/raid6/algos.c | 44 ++++++++++++++++++++++++--------------------
1 file changed, 24 insertions(+), 20 deletions(-)
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index ac6a77b0ae1d..c92168d59df2 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -12,6 +12,7 @@
*/
#include <linux/raid/pq.h>
+#include <linux/workqueue.h>
#ifndef __KERNEL__
#include <sys/mman.h>
#include <stdio.h>
@@ -168,7 +169,7 @@ static inline const struct raid6_calls *raid6_choose_gen_fast(void)
if (best) {
raid6_call = *best;
- pr_info("raid6: skipped pq benchmark and selected %s\n",
+ pr_info("raid6: fast selected %s, async benchmark pending\n",
best->name);
} else {
pr_err("raid6: No valid algorithm found even for fast selection!\n");
@@ -177,7 +178,7 @@ static inline const struct raid6_calls *raid6_choose_gen_fast(void)
return best;
}
-static inline const struct raid6_calls *raid6_gen_benchmark(
+static inline void raid6_gen_benchmark(
void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
{
unsigned long perf, bestgenperf, j0, j1;
@@ -214,12 +215,11 @@ static inline const struct raid6_calls *raid6_gen_benchmark(
}
if (!best) {
- pr_err("raid6: Yikes! No algorithm found!\n");
- goto out;
+ pr_err("raid6: async benchmark failed to find any algorithm\n");
+ return;
}
raid6_call = *best;
-
pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
best->name,
(bestgenperf * HZ * (disks - 2)) >>
@@ -244,14 +244,11 @@ static inline const struct raid6_calls *raid6_gen_benchmark(
(perf * HZ * (disks - 2)) >>
(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
}
-
-out:
- return best;
}
/* Try to pick the best algorithm */
/* This code uses the gfmul table as convenient data set to abuse */
-static int raid6_choose_gen_benmark(const struct raid6_calls **gen_best)
+static int raid6_choose_gen_benmark(void)
{
const int disks = RAID6_TEST_DISKS;
char *disk_ptr, *p;
@@ -278,32 +275,39 @@ static int raid6_choose_gen_benmark(const struct raid6_calls **gen_best)
if ((disks - 2) * PAGE_SIZE % 65536)
memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
- *gen_best = raid6_gen_benchmark(&dptrs, disks);
+ raid6_gen_benchmark(&dptrs, disks);
free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER);
return 0;
}
+static struct work_struct raid6_benchmark_work __initdata;
+
+static __init void benchmark_work_func(struct work_struct *work)
+{
+ raid6_choose_gen_benmark();
+}
+
int __init raid6_select_algo(void)
{
- int ret;
const struct raid6_calls *gen_best = NULL;
const struct raid6_recov_calls *rec_best = NULL;
- /* select raid gen_syndrome functions */
- if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK))
- gen_best = raid6_choose_gen_fast();
- else {
- ret = raid6_choose_gen_benmark(&gen_best);
- if (ret < 0)
- return ret;
- }
+ /* phase 1: synchronous fast selection generation algorithm */
+ gen_best = raid6_choose_gen_fast();
/* select raid recover functions */
rec_best = raid6_choose_recov();
- return gen_best && rec_best ? 0 : -EINVAL;
+ if (!gen_best || !rec_best)
+ return -EINVAL;
+
+ /* phase 2: asynchronous performance benchmarking */
+ INIT_WORK(&raid6_benchmark_work, benchmark_work_func);
+ schedule_work(&raid6_benchmark_work);
+
+ return 0;
}
static void raid6_exit(void)
--
2.25.1
Hello,
kernel test robot noticed "Oops:int3:#[##]SMP_KASAN_NOPTI" on:
commit: e806b74f91ad2c995a3313d4cc369c85fab1da5f ("[PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing")
url: https://github.com/intel-lab-lkp/linux/commits/sunliming-linux-dev/lib-raid6-Divide-the-raid6-algorithm-selection-process-into-two-parts/20260128-185709
base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link: https://lore.kernel.org/all/20260128104923.338443-3-sunliming@linux.dev/
patch subject: [PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing
in testcase: perf-sanity-tests
version:
with following parameters:
perf_compiler: gcc
group: group-01
config: x86_64-rhel-9.4-bpf
compiler: gcc-14
test machine: 16 threads 1 sockets Intel(R) Xeon(R) E-2278G CPU @ 3.40GHz (Coffee Lake-E) with 32G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202602032100.d8e49b6f-lkp@intel.com
[ 81.117339][ T479] xor: automatically using best checksumming function avx
[ 81.122258][ T109] raid6: avx2x2 gen() 17654 MB/s
[ 81.157936][ T109] raid6: avx2x1 gen() 10371 MB/s
[ 81.172813][ T109] raid6: using algorithm avx2x2 gen() 17654 MB/s
[ 81.205659][ T109] raid6: .... xor() 12927 MB/s, rmw enabled
[ 81.221136][ C15] Oops: int3: 0000 [#1] SMP KASAN NOPTI
[ 81.221144][ C15] CPU: 15 UID: 0 PID: 109 Comm: kworker/15:0 Tainted: G S 6.19.0-rc6-00161-ge806b74f91ad #1 PREEMPT(full)
[ 81.221149][ C15] Tainted: [S]=CPU_OUT_OF_SPEC
[ 81.221150][ C15] Hardware name: Intel Corporation Mehlow UP Server Platform/Moss Beach Server, BIOS CNLSE2R1.R00.X188.B13.1903250419 03/25/2019
[ 81.221153][ C15] Workqueue: events benchmark_work_func [raid6_pq]
[ 81.221160][ C15] RIP: 0010:benchmark_work_func (recov_avx512.c:?) raid6_pq
[ 81.221163][ C15] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc <cc> cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
All code
========
0: cc int3
1: cc int3
2: cc int3
3: cc int3
4: cc int3
5: cc int3
6: cc int3
7: cc int3
8: cc int3
9: cc int3
a: cc int3
b: cc int3
c: cc int3
d: cc int3
e: cc int3
f: cc int3
10: cc int3
11: cc int3
12: cc int3
13: cc int3
14: cc int3
15: cc int3
16: cc int3
17: cc int3
18: cc int3
19: cc int3
1a: cc int3
1b: cc int3
1c: cc int3
1d: cc int3
1e: cc int3
1f: cc int3
20: cc int3
21: cc int3
22: cc int3
23: cc int3
24: cc int3
25: cc int3
26: cc int3
27: cc int3
28: cc int3
29: cc int3
2a:* cc int3 <-- trapping instruction
2b: cc int3
2c: cc int3
2d: cc int3
2e: cc int3
2f: cc int3
30: cc int3
31: cc int3
32: cc int3
33: cc int3
34: cc int3
35: cc int3
36: cc int3
37: cc int3
38: cc int3
39: cc int3
3a: cc int3
3b: cc int3
3c: cc int3
3d: cc int3
3e: cc int3
3f: cc int3
Code starting with the faulting instruction
===========================================
0: cc int3
1: cc int3
2: cc int3
3: cc int3
4: cc int3
5: cc int3
6: cc int3
7: cc int3
8: cc int3
9: cc int3
a: cc int3
b: cc int3
c: cc int3
d: cc int3
e: cc int3
f: cc int3
10: cc int3
11: cc int3
12: cc int3
13: cc int3
14: cc int3
15: cc int3
[ 81.221166][ C15] RSP: 0018:ffff888101557b58 EFLAGS: 00000282
[ 81.221169][ C15] RAX: 0000000000000029 RBX: 1ffff110202aaf6b RCX: 0000000000000000
[ 81.221171][ C15] RDX: 0000000000000029 RSI: ffffffff81513d9c RDI: ffffed10202aaf5e
[ 81.221173][ C15] RBP: ffff88836aad0000 R08: 0000000000000000 R09: fffffbfff0b21df4
[ 81.221175][ C15] R10: ffffffff8590efa7 R11: 0000000000000001 R12: ffff888101557bb8
[ 81.221178][ C15] R13: ffff88836aad8000 R14: ffff888101557bb8 R15: ffff8887887c33c0
[ 81.221180][ C15] FS: 0000000000000000(0000) GS:ffff888801e6a000(0000) knlGS:0000000000000000
[ 81.221182][ C15] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 81.221185][ C15] CR2: 00007fbfb077d000 CR3: 000000084d2f6001 CR4: 00000000003726f0
[ 81.221187][ C15] Call Trace:
[ 81.221189][ C15] <TASK>
[ 81.221194][ C15] ? rcu_is_watching (kbuild/src/consumer/arch/x86/include/asm/atomic.h:23 kbuild/src/consumer/include/linux/atomic/atomic-arch-fallback.h:457 kbuild/src/consumer/include/linux/context_tracking.h:128 kbuild/src/consumer/kernel/rcu/tree.c:751)
[ 81.221199][ C15] ? rcu_is_watching (kbuild/src/consumer/arch/x86/include/asm/atomic.h:23 kbuild/src/consumer/include/linux/atomic/atomic-arch-fallback.h:457 kbuild/src/consumer/include/linux/context_tracking.h:128 kbuild/src/consumer/kernel/rcu/tree.c:751)
[ 81.221203][ C15] ? lock_acquire (kbuild/src/consumer/include/trace/events/lock.h:24 (discriminator 2) kbuild/src/consumer/kernel/locking/lockdep.c:5831 (discriminator 2))
[ 81.221208][ C15] ? process_one_work (kbuild/src/consumer/arch/x86/include/asm/jump_label.h:37 kbuild/src/consumer/include/trace/events/workqueue.h:110 kbuild/src/consumer/kernel/workqueue.c:3262)
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20260203/202602032100.d8e49b6f-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.