Creates an "rcuscale" folder in debugfs and creates a "writer_durations"
file in the folder. This file is in CSV format. Each line represents
one duration record, with columns defined as:
writer_id,duration
Added an option "writer_no_print" to skip printing writer durations on
cleanup.
This allows external tools to read structured data and also drastically
improves cleanup performance on large core count machines.
On a 256C 512T machines running nreaders=1 nwriters=511:
Before:
$ time modprobe -r rcuscale; modprobe -r torture
real 3m17.349s
user 0m0.000s
sys 3m15.288s
After:
$ time cat /sys/kernel/debug/rcuscale/writer_durations > durations.csv
real 0m0.005s
user 0m0.000s
sys 0m0.005s
$ time modprobe -r rcuscale; modprobe -r torture
real 0m0.388s
user 0m0.000s
sys 0m0.335s
Signed-off-by: Yuzhuo Jing <yuzhuo@google.com>
---
.../admin-guide/kernel-parameters.txt | 5 +
kernel/rcu/rcuscale.c | 142 +++++++++++++++++-
2 files changed, 139 insertions(+), 8 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..7b62a84a19d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5583,6 +5583,11 @@
periods, but in jiffies. The default of zero
says no holdoff.
+ rcuscale.writer_no_print= [KNL]
+ Do not print writer durations to kernel ring buffer.
+ Instead, users can read them from the
+ rcuscale/writer_durations file in debugfs.
+
rcutorture.fqs_duration= [KNL]
Set duration of force_quiescent_state bursts
in microseconds.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index b521d0455992..ad10b42be6fc 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -40,6 +40,8 @@
#include <linux/vmalloc.h>
#include <linux/rcupdate_trace.h>
#include <linux/sched/debug.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include "rcu.h"
@@ -97,6 +99,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
+torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -138,6 +141,9 @@ static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
+static struct dentry *debugfs_dir;
+static struct dentry *debugfs_writer_durations;
+
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -607,6 +613,7 @@ rcu_scale_writer(void *arg)
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
i_max = i;
+ writer_n_durations[me] = i_max + 1;
if (!started &&
atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
started = true;
@@ -620,6 +627,7 @@ rcu_scale_writer(void *arg)
nrealwriters) {
schedule_timeout_interruptible(10);
rcu_ftrace_dump(DUMP_ALL);
+ WRITE_ONCE(test_complete, true);
SCALEOUT_STRING("Test complete");
t_rcu_scale_writer_finished = t;
if (gp_exp) {
@@ -666,7 +674,6 @@ rcu_scale_writer(void *arg)
rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
- writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer");
return 0;
}
@@ -941,6 +948,117 @@ kfree_scale_init(void)
return firsterr;
}
+/*
+ * A seq_file for writer_durations. Content is only visible when all writers
+ * finish. Element i of the sequence is writer_durations + i.
+ */
+static void *writer_durations_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t writer_id = *pos;
+
+ if (!test_complete || writer_id < 0 || writer_id >= nrealwriters)
+ return NULL;
+
+ return writer_durations + writer_id;
+}
+
+static void *writer_durations_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return writer_durations_start(m, pos);
+}
+
+static void writer_durations_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * Each element in the seq_file is an array of one writer's durations.
+ * Each element prints writer_n_durations[writer_id] lines, and each line
+ * contains one duration record, in CSV format:
+ * writer_id,duration
+ */
+static int writer_durations_show(struct seq_file *m, void *v)
+{
+ u64 **durations = v;
+ loff_t writer_id = durations - writer_durations;
+
+ for (int i = 0; i < writer_n_durations[writer_id]; ++i)
+ seq_printf(m, "%lld,%lld\n", writer_id, durations[0][i]);
+
+ return 0;
+}
+
+static const struct seq_operations writer_durations_op = {
+ .start = writer_durations_start,
+ .next = writer_durations_next,
+ .stop = writer_durations_stop,
+ .show = writer_durations_show
+};
+
+static int writer_durations_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &writer_durations_op);
+}
+
+static const struct file_operations writer_durations_fops = {
+ .owner = THIS_MODULE,
+ .open = writer_durations_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * Create an rcuscale directory exposing run states and results.
+ */
+static int register_debugfs(void)
+{
+#define try_create_file(variable, name, mode, parent, data, fops) \
+({ \
+ variable = debugfs_create_file((name), (mode), (parent), (data), (fops)); \
+ err = PTR_ERR_OR_ZERO(variable); \
+ err; \
+})
+
+ int err;
+
+ debugfs_dir = debugfs_create_dir("rcuscale", NULL);
+ err = PTR_ERR_OR_ZERO(debugfs_dir);
+ if (err)
+ goto fail;
+
+ if (try_create_file(debugfs_writer_durations, "writer_durations", 0444,
+ debugfs_dir, NULL, &writer_durations_fops))
+ goto fail;
+
+ return 0;
+fail:
+ pr_err("rcu-scale: Failed to create debugfs file.");
+ /* unregister_debugfs is called by rcu_scale_cleanup, avoid
+ * calling it twice.
+ */
+ return err;
+#undef try_create_file
+}
+
+static void unregister_debugfs(void)
+{
+#define try_remove(variable) \
+do { \
+ if (!IS_ERR_OR_NULL(variable)) \
+ debugfs_remove(variable); \
+ variable = NULL; \
+} while (0)
+
+ try_remove(debugfs_writer_durations);
+
+ /* Remove directory after files. */
+ try_remove(debugfs_dir);
+
+#undef try_remove
+}
+
static void
rcu_scale_cleanup(void)
{
@@ -961,6 +1079,8 @@ rcu_scale_cleanup(void)
if (gp_exp && gp_async)
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+ unregister_debugfs();
+
// If built-in, just report all of the GP kthread's CPU time.
if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
kthread_tp = cur_ops->rso_gp_kthread();
@@ -1020,13 +1140,15 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i];
if (!wdpp)
continue;
- for (j = 0; j < writer_n_durations[i]; j++) {
- wdp = &wdpp[j];
- pr_alert("%s%s %4d writer-duration: %5d %llu\n",
- scale_type, SCALE_FLAG,
- i, j, *wdp);
- if (j % 100 == 0)
- schedule_timeout_uninterruptible(1);
+ if (!writer_no_print) {
+ for (j = 0; j < writer_n_durations[i]; j++) {
+ wdp = &wdpp[j];
+ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+ scale_type, SCALE_FLAG,
+ i, j, *wdp);
+ if (j % 100 == 0)
+ schedule_timeout_uninterruptible(1);
+ }
}
kfree(writer_durations[i]);
if (writer_freelists) {
@@ -1202,6 +1324,10 @@ rcu_scale_init(void)
if (torture_init_error(firsterr))
goto unwind;
}
+
+ if (register_debugfs())
+ goto unwind;
+
torture_init_end();
return 0;
--
2.50.1.552.g942d659e1b-goog
Hi Yuzhuo, kernel test robot noticed the following build errors: [auto build test ERROR on rcu/rcu/dev] [also build test ERROR on linus/master v6.16 next-20250731] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Yuzhuo-Jing/rcuscale-Create-debugfs-file-for-writer-durations/20250730-102613 base: https://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev patch link: https://lore.kernel.org/r/20250730022347.71722-2-yuzhuo%40google.com patch subject: [PATCH v1 1/4] rcuscale: Create debugfs file for writer durations config: i386-randconfig-002-20250731 (https://download.01.org/0day-ci/archive/20250731/202507311504.ttQGhW04-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250731/202507311504.ttQGhW04-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202507311504.ttQGhW04-lkp@intel.com/ All errors (new ones prefixed by >>): In file included from <command-line>: kernel/rcu/rcuscale.c: In function 'rcu_scale_writer': >> kernel/rcu/rcuscale.c:630:44: error: 'test_complete' undeclared (first use in this function); did you mean 'complete'? 630 | WRITE_ONCE(test_complete, true); | ^~~~~~~~~~~~~ include/linux/compiler_types.h:548:23: note: in definition of macro '__compiletime_assert' 548 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:568:9: note: in expansion of macro '_compiletime_assert' 568 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:9: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:28: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:60:9: note: in expansion of macro 'compiletime_assert_rwonce_type' 60 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/rcu/rcuscale.c:630:33: note: in expansion of macro 'WRITE_ONCE' 630 | WRITE_ONCE(test_complete, true); | ^~~~~~~~~~ kernel/rcu/rcuscale.c:630:44: note: each undeclared identifier is reported only once for each function it appears in 630 | WRITE_ONCE(test_complete, true); | ^~~~~~~~~~~~~ include/linux/compiler_types.h:548:23: note: in definition of macro '__compiletime_assert' 548 | if (!(condition)) \ | ^~~~~~~~~ include/linux/compiler_types.h:568:9: note: in expansion of macro '_compiletime_assert' 568 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) | ^~~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:9: note: in expansion of macro 'compiletime_assert' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~~~~~~ include/asm-generic/rwonce.h:36:28: note: in expansion of macro '__native_word' 36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ | ^~~~~~~~~~~~~ include/asm-generic/rwonce.h:60:9: note: in expansion of macro 'compiletime_assert_rwonce_type' 60 | compiletime_assert_rwonce_type(x); \ | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/rcu/rcuscale.c:630:33: note: in expansion of macro 'WRITE_ONCE' 630 | WRITE_ONCE(test_complete, true); | ^~~~~~~~~~ kernel/rcu/rcuscale.c: In function 'writer_durations_start': kernel/rcu/rcuscale.c:959:14: error: 'test_complete' undeclared (first use in this function); did you mean 'complete'? 959 | if (!test_complete || writer_id < 0 || writer_id >= nrealwriters) | ^~~~~~~~~~~~~ | complete vim +630 kernel/rcu/rcuscale.c 534 535 /* 536 * RCU scale writer kthread. Repeatedly does a grace period. 537 */ 538 static int 539 rcu_scale_writer(void *arg) 540 { 541 int i = 0; 542 int i_max; 543 unsigned long jdone; 544 long me = (long)arg; 545 bool selfreport = false; 546 bool started = false, done = false, alldone = false; 547 u64 t; 548 DEFINE_TORTURE_RANDOM(tr); 549 u64 *wdp; 550 u64 *wdpp = writer_durations[me]; 551 struct writer_freelist *wflp = &writer_freelists[me]; 552 struct writer_mblock *wmbp = NULL; 553 554 VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started"); 555 WARN_ON(!wdpp); 556 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)); 557 current->flags |= PF_NO_SETAFFINITY; 558 sched_set_fifo_low(current); 559 560 if (holdoff) 561 schedule_timeout_idle(holdoff * HZ); 562 563 /* 564 * Wait until rcu_end_inkernel_boot() is called for normal GP tests 565 * so that RCU is not always expedited for normal GP tests. 566 * The system_state test is approximate, but works well in practice. 567 */ 568 while (!gp_exp && system_state != SYSTEM_RUNNING) 569 schedule_timeout_uninterruptible(1); 570 571 t = ktime_get_mono_fast_ns(); 572 if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) { 573 t_rcu_scale_writer_started = t; 574 if (gp_exp) { 575 b_rcu_gp_test_started = 576 cur_ops->exp_completed() / 2; 577 } else { 578 b_rcu_gp_test_started = cur_ops->get_gp_seq(); 579 } 580 } 581 582 jdone = jiffies + minruntime * HZ; 583 do { 584 bool gp_succeeded = false; 585 586 if (writer_holdoff) 587 udelay(writer_holdoff); 588 if (writer_holdoff_jiffies) 589 schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1); 590 wdp = &wdpp[i]; 591 *wdp = ktime_get_mono_fast_ns(); 592 if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) { 593 if (!wmbp) 594 wmbp = rcu_scale_alloc(me); 595 if (wmbp && atomic_read(&wflp->ws_inflight) < gp_async_max) { 596 atomic_inc(&wflp->ws_inflight); 597 cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb); 598 wmbp = NULL; 599 gp_succeeded = true; 600 } else if (!kthread_should_stop()) { 601 cur_ops->gp_barrier(); 602 } else { 603 rcu_scale_free(wmbp); /* Because we are stopping. */ 604 wmbp = NULL; 605 } 606 } else if (gp_exp) { 607 cur_ops->exp_sync(); 608 gp_succeeded = true; 609 } else { 610 cur_ops->sync(); 611 gp_succeeded = true; 612 } 613 t = ktime_get_mono_fast_ns(); 614 *wdp = t - *wdp; 615 i_max = i; 616 writer_n_durations[me] = i_max + 1; 617 if (!started && 618 atomic_read(&n_rcu_scale_writer_started) >= nrealwriters) 619 started = true; 620 if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) { 621 done = true; 622 WRITE_ONCE(writer_done[me], true); 623 sched_set_normal(current, 0); 624 pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n", 625 scale_type, SCALE_FLAG, me, MIN_MEAS); 626 if (atomic_inc_return(&n_rcu_scale_writer_finished) >= 627 nrealwriters) { 628 schedule_timeout_interruptible(10); 629 rcu_ftrace_dump(DUMP_ALL); > 630 WRITE_ONCE(test_complete, true); 631 SCALEOUT_STRING("Test complete"); 632 t_rcu_scale_writer_finished = t; 633 if (gp_exp) { 634 b_rcu_gp_test_finished = 635 cur_ops->exp_completed() / 2; 636 } else { 637 b_rcu_gp_test_finished = 638 cur_ops->get_gp_seq(); 639 } 640 if (shutdown) { 641 smp_mb(); /* Assign before wake. */ 642 wake_up(&shutdown_wq); 643 } 644 } 645 } 646 if (done && !alldone && 647 atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters) 648 alldone = true; 649 if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) { 650 static atomic_t dumped; 651 int i; 652 653 if (!atomic_xchg(&dumped, 1)) { 654 for (i = 0; i < nrealwriters; i++) { 655 if (writer_done[i]) 656 continue; 657 pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i); 658 sched_show_task(writer_tasks[i]); 659 } 660 if (cur_ops->stats) 661 cur_ops->stats(); 662 } 663 } 664 if (!selfreport && time_after(jiffies, jdone + HZ * (70 + me))) { 665 pr_info("%s: Writer %ld self-report: started %d done %d/%d->%d i %d jdone %lu.\n", 666 __func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone); 667 selfreport = true; 668 } 669 if (gp_succeeded && started && !alldone && i < MAX_MEAS - 1) 670 i++; 671 rcu_scale_wait_shutdown(); 672 } while (!torture_must_stop()); 673 if (gp_async && cur_ops->async) { 674 rcu_scale_free(wmbp); 675 cur_ops->gp_barrier(); 676 } 677 torture_kthread_stopping("rcu_scale_writer"); 678 return 0; 679 } 680 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.