[PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing

sunliming@linux.dev posted 3 patches 1 week, 2 days ago
There is a newer version of this series
[PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing
Posted by sunliming@linux.dev 1 week, 2 days ago
From: sunliming <sunliming@kylinos.cn>

Optimizing the raid6_select_algo time. In raid6_select_algo(), an raid6 gen
algorithm is first selected quickly through synchronous processing, while
the time-consuming process of selecting the optimal algorithm via benchmarking
is handled asynchronously. This approach speeds up the overall startup time
and ultimately ensures the selection of an optimal algorithm.

Signed-off-by: sunliming <sunliming@kylinos.cn>
---
 lib/raid6/algos.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index ac6a77b0ae1d..c92168d59df2 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/raid/pq.h>
+#include <linux/workqueue.h>
 #ifndef __KERNEL__
 #include <sys/mman.h>
 #include <stdio.h>
@@ -168,7 +169,7 @@ static inline const struct raid6_calls *raid6_choose_gen_fast(void)
 
 	if (best) {
 		raid6_call = *best;
-		pr_info("raid6: skipped pq benchmark and selected %s\n",
+		pr_info("raid6: fast selected %s, async benchmark pending\n",
 			best->name);
 	} else {
 		pr_err("raid6: No valid algorithm found even for fast selection!\n");
@@ -177,7 +178,7 @@ static inline const struct raid6_calls *raid6_choose_gen_fast(void)
 	return best;
 }
 
-static inline const struct raid6_calls *raid6_gen_benchmark(
+static inline void raid6_gen_benchmark(
 		void *(*const dptrs)[RAID6_TEST_DISKS], const int disks)
 {
 	unsigned long perf, bestgenperf, j0, j1;
@@ -214,12 +215,11 @@ static inline const struct raid6_calls *raid6_gen_benchmark(
 	}
 
 	if (!best) {
-		pr_err("raid6: Yikes! No algorithm found!\n");
-		goto out;
+		pr_err("raid6: async benchmark failed to find any algorithm\n");
+		return;
 	}
 
 	raid6_call = *best;
-
 	pr_info("raid6: using algorithm %s gen() %ld MB/s\n",
 		best->name,
 		(bestgenperf * HZ * (disks - 2)) >>
@@ -244,14 +244,11 @@ static inline const struct raid6_calls *raid6_gen_benchmark(
 			(perf * HZ * (disks - 2)) >>
 			(20 - PAGE_SHIFT + RAID6_TIME_JIFFIES_LG2 + 1));
 	}
-
-out:
-	return best;
 }
 
 /* Try to pick the best algorithm */
 /* This code uses the gfmul table as convenient data set to abuse */
-static int raid6_choose_gen_benmark(const struct raid6_calls **gen_best)
+static int raid6_choose_gen_benmark(void)
 {
 	const int disks = RAID6_TEST_DISKS;
 	char *disk_ptr, *p;
@@ -278,32 +275,39 @@ static int raid6_choose_gen_benmark(const struct raid6_calls **gen_best)
 	if ((disks - 2) * PAGE_SIZE % 65536)
 		memcpy(p, raid6_gfmul, (disks - 2) * PAGE_SIZE % 65536);
 
-	*gen_best = raid6_gen_benchmark(&dptrs, disks);
+	raid6_gen_benchmark(&dptrs, disks);
 
 	free_pages((unsigned long)disk_ptr, RAID6_TEST_DISKS_ORDER);
 
 	return 0;
 }
 
+static struct work_struct raid6_benchmark_work __initdata;
+
+static __init void benchmark_work_func(struct work_struct *work)
+{
+	raid6_choose_gen_benmark();
+}
+
 int __init raid6_select_algo(void)
 {
-	int ret;
 	const struct raid6_calls *gen_best = NULL;
 	const struct raid6_recov_calls *rec_best = NULL;
 
-	/* select raid gen_syndrome functions */
-	if (!IS_ENABLED(CONFIG_RAID6_PQ_BENCHMARK))
-		gen_best = raid6_choose_gen_fast();
-	else {
-		ret = raid6_choose_gen_benmark(&gen_best);
-		if (ret < 0)
-			return ret;
-	}
+	/* phase 1: synchronous fast selection generation algorithm */
+	gen_best = raid6_choose_gen_fast();
 
 	/* select raid recover functions */
 	rec_best = raid6_choose_recov();
 
-	return gen_best && rec_best ? 0 : -EINVAL;
+	if (!gen_best || !rec_best)
+		return -EINVAL;
+
+	/* phase 2: asynchronous performance benchmarking */
+	INIT_WORK(&raid6_benchmark_work, benchmark_work_func);
+	schedule_work(&raid6_benchmark_work);
+
+	return 0;
 }
 
 static void raid6_exit(void)
-- 
2.25.1
Re: [PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing
Posted by kernel test robot 3 days, 19 hours ago

Hello,

kernel test robot noticed "Oops:int3:#[##]SMP_KASAN_NOPTI" on:

commit: e806b74f91ad2c995a3313d4cc369c85fab1da5f ("[PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing")
url: https://github.com/intel-lab-lkp/linux/commits/sunliming-linux-dev/lib-raid6-Divide-the-raid6-algorithm-selection-process-into-two-parts/20260128-185709
base: https://git.kernel.org/cgit/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link: https://lore.kernel.org/all/20260128104923.338443-3-sunliming@linux.dev/
patch subject: [PATCH 2/3] lib/raid6: Optimizing the raid6_select_algo time through asynchronous processing

in testcase: perf-sanity-tests
version: 
with following parameters:

	perf_compiler: gcc
	group: group-01



config: x86_64-rhel-9.4-bpf
compiler: gcc-14
test machine: 16 threads 1 sockets Intel(R) Xeon(R) E-2278G CPU @ 3.40GHz (Coffee Lake-E) with 32G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202602032100.d8e49b6f-lkp@intel.com




[   81.117339][  T479] xor: automatically using best checksumming function   avx
[   81.122258][  T109] raid6: avx2x2   gen() 17654 MB/s
[   81.157936][  T109] raid6: avx2x1   gen() 10371 MB/s
[   81.172813][  T109] raid6: using algorithm avx2x2 gen() 17654 MB/s
[   81.205659][  T109] raid6: .... xor() 12927 MB/s, rmw enabled
[   81.221136][   C15] Oops: int3: 0000 [#1] SMP KASAN NOPTI
[   81.221144][   C15] CPU: 15 UID: 0 PID: 109 Comm: kworker/15:0 Tainted: G S                  6.19.0-rc6-00161-ge806b74f91ad #1 PREEMPT(full)
[   81.221149][   C15] Tainted: [S]=CPU_OUT_OF_SPEC
[   81.221150][   C15] Hardware name: Intel Corporation Mehlow UP Server Platform/Moss Beach Server, BIOS CNLSE2R1.R00.X188.B13.1903250419 03/25/2019
[   81.221153][   C15] Workqueue: events benchmark_work_func [raid6_pq]
[   81.221160][   C15] RIP: 0010:benchmark_work_func (recov_avx512.c:?) raid6_pq
[   81.221163][   C15] Code: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc <cc> cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc
All code
========
   0:	cc                   	int3
   1:	cc                   	int3
   2:	cc                   	int3
   3:	cc                   	int3
   4:	cc                   	int3
   5:	cc                   	int3
   6:	cc                   	int3
   7:	cc                   	int3
   8:	cc                   	int3
   9:	cc                   	int3
   a:	cc                   	int3
   b:	cc                   	int3
   c:	cc                   	int3
   d:	cc                   	int3
   e:	cc                   	int3
   f:	cc                   	int3
  10:	cc                   	int3
  11:	cc                   	int3
  12:	cc                   	int3
  13:	cc                   	int3
  14:	cc                   	int3
  15:	cc                   	int3
  16:	cc                   	int3
  17:	cc                   	int3
  18:	cc                   	int3
  19:	cc                   	int3
  1a:	cc                   	int3
  1b:	cc                   	int3
  1c:	cc                   	int3
  1d:	cc                   	int3
  1e:	cc                   	int3
  1f:	cc                   	int3
  20:	cc                   	int3
  21:	cc                   	int3
  22:	cc                   	int3
  23:	cc                   	int3
  24:	cc                   	int3
  25:	cc                   	int3
  26:	cc                   	int3
  27:	cc                   	int3
  28:	cc                   	int3
  29:	cc                   	int3
  2a:*	cc                   	int3		<-- trapping instruction
  2b:	cc                   	int3
  2c:	cc                   	int3
  2d:	cc                   	int3
  2e:	cc                   	int3
  2f:	cc                   	int3
  30:	cc                   	int3
  31:	cc                   	int3
  32:	cc                   	int3
  33:	cc                   	int3
  34:	cc                   	int3
  35:	cc                   	int3
  36:	cc                   	int3
  37:	cc                   	int3
  38:	cc                   	int3
  39:	cc                   	int3
  3a:	cc                   	int3
  3b:	cc                   	int3
  3c:	cc                   	int3
  3d:	cc                   	int3
  3e:	cc                   	int3
  3f:	cc                   	int3

Code starting with the faulting instruction
===========================================
   0:	cc                   	int3
   1:	cc                   	int3
   2:	cc                   	int3
   3:	cc                   	int3
   4:	cc                   	int3
   5:	cc                   	int3
   6:	cc                   	int3
   7:	cc                   	int3
   8:	cc                   	int3
   9:	cc                   	int3
   a:	cc                   	int3
   b:	cc                   	int3
   c:	cc                   	int3
   d:	cc                   	int3
   e:	cc                   	int3
   f:	cc                   	int3
  10:	cc                   	int3
  11:	cc                   	int3
  12:	cc                   	int3
  13:	cc                   	int3
  14:	cc                   	int3
  15:	cc                   	int3
[   81.221166][   C15] RSP: 0018:ffff888101557b58 EFLAGS: 00000282
[   81.221169][   C15] RAX: 0000000000000029 RBX: 1ffff110202aaf6b RCX: 0000000000000000
[   81.221171][   C15] RDX: 0000000000000029 RSI: ffffffff81513d9c RDI: ffffed10202aaf5e
[   81.221173][   C15] RBP: ffff88836aad0000 R08: 0000000000000000 R09: fffffbfff0b21df4
[   81.221175][   C15] R10: ffffffff8590efa7 R11: 0000000000000001 R12: ffff888101557bb8
[   81.221178][   C15] R13: ffff88836aad8000 R14: ffff888101557bb8 R15: ffff8887887c33c0
[   81.221180][   C15] FS:  0000000000000000(0000) GS:ffff888801e6a000(0000) knlGS:0000000000000000
[   81.221182][   C15] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   81.221185][   C15] CR2: 00007fbfb077d000 CR3: 000000084d2f6001 CR4: 00000000003726f0
[   81.221187][   C15] Call Trace:
[   81.221189][   C15]  <TASK>
[   81.221194][   C15]  ? rcu_is_watching (kbuild/src/consumer/arch/x86/include/asm/atomic.h:23 kbuild/src/consumer/include/linux/atomic/atomic-arch-fallback.h:457 kbuild/src/consumer/include/linux/context_tracking.h:128 kbuild/src/consumer/kernel/rcu/tree.c:751)
[   81.221199][   C15]  ? rcu_is_watching (kbuild/src/consumer/arch/x86/include/asm/atomic.h:23 kbuild/src/consumer/include/linux/atomic/atomic-arch-fallback.h:457 kbuild/src/consumer/include/linux/context_tracking.h:128 kbuild/src/consumer/kernel/rcu/tree.c:751)
[   81.221203][   C15]  ? lock_acquire (kbuild/src/consumer/include/trace/events/lock.h:24 (discriminator 2) kbuild/src/consumer/kernel/locking/lockdep.c:5831 (discriminator 2))
[   81.221208][   C15]  ? process_one_work (kbuild/src/consumer/arch/x86/include/asm/jump_label.h:37 kbuild/src/consumer/include/trace/events/workqueue.h:110 kbuild/src/consumer/kernel/workqueue.c:3262)


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20260203/202602032100.d8e49b6f-lkp@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki