Creates an "rcuscale" folder in debugfs and creates a "writer_durations"
file in the folder. This file is in CSV format. Each line represents
one duration record, with columns defined as:
writer_id,duration
Added an option "writer_no_print" to skip printing writer durations on
cleanup.
This allows external tools to read structured data and also drastically
improves cleanup performance on large core count machines.
On a 256C 512T machines running nreaders=1 nwriters=511:
Before:
$ time modprobe -r rcuscale; modprobe -r torture
real 3m17.349s
user 0m0.000s
sys 3m15.288s
After:
$ time cat /sys/kernel/debug/rcuscale/writer_durations > durations.csv
real 0m0.005s
user 0m0.000s
sys 0m0.005s
$ time modprobe -r rcuscale; modprobe -r torture
real 0m0.388s
user 0m0.000s
sys 0m0.335s
Signed-off-by: Yuzhuo Jing <yuzhuo@google.com>
---
.../admin-guide/kernel-parameters.txt | 5 +
kernel/rcu/rcuscale.c | 142 +++++++++++++++++-
2 files changed, 139 insertions(+), 8 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1f2c0874da9..7b62a84a19d4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5583,6 +5583,11 @@
periods, but in jiffies. The default of zero
says no holdoff.
+ rcuscale.writer_no_print= [KNL]
+ Do not print writer durations to kernel ring buffer.
+ Instead, users can read them from the
+ rcuscale/writer_durations file in debugfs.
+
rcutorture.fqs_duration= [KNL]
Set duration of force_quiescent_state bursts
in microseconds.
diff --git a/kernel/rcu/rcuscale.c b/kernel/rcu/rcuscale.c
index b521d0455992..ad10b42be6fc 100644
--- a/kernel/rcu/rcuscale.c
+++ b/kernel/rcu/rcuscale.c
@@ -40,6 +40,8 @@
#include <linux/vmalloc.h>
#include <linux/rcupdate_trace.h>
#include <linux/sched/debug.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
#include "rcu.h"
@@ -97,6 +99,7 @@ torture_param(bool, shutdown, RCUSCALE_SHUTDOWN,
torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
torture_param(int, writer_holdoff_jiffies, 0, "Holdoff (jiffies) between GPs, zero to disable");
+torture_param(bool, writer_no_print, false, "Do not print writer durations to ring buffer");
torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() scale test?");
torture_param(int, kfree_mult, 1, "Multiple of kfree_obj size to allocate.");
torture_param(int, kfree_by_call_rcu, 0, "Use call_rcu() to emulate kfree_rcu()?");
@@ -138,6 +141,9 @@ static u64 t_rcu_scale_writer_finished;
static unsigned long b_rcu_gp_test_started;
static unsigned long b_rcu_gp_test_finished;
+static struct dentry *debugfs_dir;
+static struct dentry *debugfs_writer_durations;
+
#define MAX_MEAS 10000
#define MIN_MEAS 100
@@ -607,6 +613,7 @@ rcu_scale_writer(void *arg)
t = ktime_get_mono_fast_ns();
*wdp = t - *wdp;
i_max = i;
+ writer_n_durations[me] = i_max + 1;
if (!started &&
atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
started = true;
@@ -620,6 +627,7 @@ rcu_scale_writer(void *arg)
nrealwriters) {
schedule_timeout_interruptible(10);
rcu_ftrace_dump(DUMP_ALL);
+ WRITE_ONCE(test_complete, true);
SCALEOUT_STRING("Test complete");
t_rcu_scale_writer_finished = t;
if (gp_exp) {
@@ -666,7 +674,6 @@ rcu_scale_writer(void *arg)
rcu_scale_free(wmbp);
cur_ops->gp_barrier();
}
- writer_n_durations[me] = i_max + 1;
torture_kthread_stopping("rcu_scale_writer");
return 0;
}
@@ -941,6 +948,117 @@ kfree_scale_init(void)
return firsterr;
}
+/*
+ * A seq_file for writer_durations. Content is only visible when all writers
+ * finish. Element i of the sequence is writer_durations + i.
+ */
+static void *writer_durations_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t writer_id = *pos;
+
+ if (!test_complete || writer_id < 0 || writer_id >= nrealwriters)
+ return NULL;
+
+ return writer_durations + writer_id;
+}
+
+static void *writer_durations_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return writer_durations_start(m, pos);
+}
+
+static void writer_durations_stop(struct seq_file *m, void *v)
+{
+}
+
+/*
+ * Each element in the seq_file is an array of one writer's durations.
+ * Each element prints writer_n_durations[writer_id] lines, and each line
+ * contains one duration record, in CSV format:
+ * writer_id,duration
+ */
+static int writer_durations_show(struct seq_file *m, void *v)
+{
+ u64 **durations = v;
+ loff_t writer_id = durations - writer_durations;
+
+ for (int i = 0; i < writer_n_durations[writer_id]; ++i)
+ seq_printf(m, "%lld,%lld\n", writer_id, durations[0][i]);
+
+ return 0;
+}
+
+static const struct seq_operations writer_durations_op = {
+ .start = writer_durations_start,
+ .next = writer_durations_next,
+ .stop = writer_durations_stop,
+ .show = writer_durations_show
+};
+
+static int writer_durations_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &writer_durations_op);
+}
+
+static const struct file_operations writer_durations_fops = {
+ .owner = THIS_MODULE,
+ .open = writer_durations_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*
+ * Create an rcuscale directory exposing run states and results.
+ */
+static int register_debugfs(void)
+{
+#define try_create_file(variable, name, mode, parent, data, fops) \
+({ \
+ variable = debugfs_create_file((name), (mode), (parent), (data), (fops)); \
+ err = PTR_ERR_OR_ZERO(variable); \
+ err; \
+})
+
+ int err;
+
+ debugfs_dir = debugfs_create_dir("rcuscale", NULL);
+ err = PTR_ERR_OR_ZERO(debugfs_dir);
+ if (err)
+ goto fail;
+
+ if (try_create_file(debugfs_writer_durations, "writer_durations", 0444,
+ debugfs_dir, NULL, &writer_durations_fops))
+ goto fail;
+
+ return 0;
+fail:
+ pr_err("rcu-scale: Failed to create debugfs file.");
+ /* unregister_debugfs is called by rcu_scale_cleanup, avoid
+ * calling it twice.
+ */
+ return err;
+#undef try_create_file
+}
+
+static void unregister_debugfs(void)
+{
+#define try_remove(variable) \
+do { \
+ if (!IS_ERR_OR_NULL(variable)) \
+ debugfs_remove(variable); \
+ variable = NULL; \
+} while (0)
+
+ try_remove(debugfs_writer_durations);
+
+ /* Remove directory after files. */
+ try_remove(debugfs_dir);
+
+#undef try_remove
+}
+
static void
rcu_scale_cleanup(void)
{
@@ -961,6 +1079,8 @@ rcu_scale_cleanup(void)
if (gp_exp && gp_async)
SCALEOUT_ERRSTRING("No expedited async GPs, so went with async!");
+ unregister_debugfs();
+
// If built-in, just report all of the GP kthread's CPU time.
if (IS_BUILTIN(CONFIG_RCU_SCALE_TEST) && !kthread_tp && cur_ops->rso_gp_kthread)
kthread_tp = cur_ops->rso_gp_kthread();
@@ -1020,13 +1140,15 @@ rcu_scale_cleanup(void)
wdpp = writer_durations[i];
if (!wdpp)
continue;
- for (j = 0; j < writer_n_durations[i]; j++) {
- wdp = &wdpp[j];
- pr_alert("%s%s %4d writer-duration: %5d %llu\n",
- scale_type, SCALE_FLAG,
- i, j, *wdp);
- if (j % 100 == 0)
- schedule_timeout_uninterruptible(1);
+ if (!writer_no_print) {
+ for (j = 0; j < writer_n_durations[i]; j++) {
+ wdp = &wdpp[j];
+ pr_alert("%s%s %4d writer-duration: %5d %llu\n",
+ scale_type, SCALE_FLAG,
+ i, j, *wdp);
+ if (j % 100 == 0)
+ schedule_timeout_uninterruptible(1);
+ }
}
kfree(writer_durations[i]);
if (writer_freelists) {
@@ -1202,6 +1324,10 @@ rcu_scale_init(void)
if (torture_init_error(firsterr))
goto unwind;
}
+
+ if (register_debugfs())
+ goto unwind;
+
torture_init_end();
return 0;
--
2.50.1.552.g942d659e1b-goog
Hi Yuzhuo,
kernel test robot noticed the following build errors:
[auto build test ERROR on rcu/rcu/dev]
[also build test ERROR on linus/master v6.16 next-20250731]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Yuzhuo-Jing/rcuscale-Create-debugfs-file-for-writer-durations/20250730-102613
base: https://git.kernel.org/pub/scm/linux/kernel/git/rcu/linux.git rcu/dev
patch link: https://lore.kernel.org/r/20250730022347.71722-2-yuzhuo%40google.com
patch subject: [PATCH v1 1/4] rcuscale: Create debugfs file for writer durations
config: i386-randconfig-002-20250731 (https://download.01.org/0day-ci/archive/20250731/202507311504.ttQGhW04-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14+deb12u1) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250731/202507311504.ttQGhW04-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202507311504.ttQGhW04-lkp@intel.com/
All errors (new ones prefixed by >>):
In file included from <command-line>:
kernel/rcu/rcuscale.c: In function 'rcu_scale_writer':
>> kernel/rcu/rcuscale.c:630:44: error: 'test_complete' undeclared (first use in this function); did you mean 'complete'?
630 | WRITE_ONCE(test_complete, true);
| ^~~~~~~~~~~~~
include/linux/compiler_types.h:548:23: note: in definition of macro '__compiletime_assert'
548 | if (!(condition)) \
| ^~~~~~~~~
include/linux/compiler_types.h:568:9: note: in expansion of macro '_compiletime_assert'
568 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
include/asm-generic/rwonce.h:36:9: note: in expansion of macro 'compiletime_assert'
36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
| ^~~~~~~~~~~~~~~~~~
include/asm-generic/rwonce.h:36:28: note: in expansion of macro '__native_word'
36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
| ^~~~~~~~~~~~~
include/asm-generic/rwonce.h:60:9: note: in expansion of macro 'compiletime_assert_rwonce_type'
60 | compiletime_assert_rwonce_type(x); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
kernel/rcu/rcuscale.c:630:33: note: in expansion of macro 'WRITE_ONCE'
630 | WRITE_ONCE(test_complete, true);
| ^~~~~~~~~~
kernel/rcu/rcuscale.c:630:44: note: each undeclared identifier is reported only once for each function it appears in
630 | WRITE_ONCE(test_complete, true);
| ^~~~~~~~~~~~~
include/linux/compiler_types.h:548:23: note: in definition of macro '__compiletime_assert'
548 | if (!(condition)) \
| ^~~~~~~~~
include/linux/compiler_types.h:568:9: note: in expansion of macro '_compiletime_assert'
568 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| ^~~~~~~~~~~~~~~~~~~
include/asm-generic/rwonce.h:36:9: note: in expansion of macro 'compiletime_assert'
36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
| ^~~~~~~~~~~~~~~~~~
include/asm-generic/rwonce.h:36:28: note: in expansion of macro '__native_word'
36 | compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \
| ^~~~~~~~~~~~~
include/asm-generic/rwonce.h:60:9: note: in expansion of macro 'compiletime_assert_rwonce_type'
60 | compiletime_assert_rwonce_type(x); \
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
kernel/rcu/rcuscale.c:630:33: note: in expansion of macro 'WRITE_ONCE'
630 | WRITE_ONCE(test_complete, true);
| ^~~~~~~~~~
kernel/rcu/rcuscale.c: In function 'writer_durations_start':
kernel/rcu/rcuscale.c:959:14: error: 'test_complete' undeclared (first use in this function); did you mean 'complete'?
959 | if (!test_complete || writer_id < 0 || writer_id >= nrealwriters)
| ^~~~~~~~~~~~~
| complete
vim +630 kernel/rcu/rcuscale.c
534
535 /*
536 * RCU scale writer kthread. Repeatedly does a grace period.
537 */
538 static int
539 rcu_scale_writer(void *arg)
540 {
541 int i = 0;
542 int i_max;
543 unsigned long jdone;
544 long me = (long)arg;
545 bool selfreport = false;
546 bool started = false, done = false, alldone = false;
547 u64 t;
548 DEFINE_TORTURE_RANDOM(tr);
549 u64 *wdp;
550 u64 *wdpp = writer_durations[me];
551 struct writer_freelist *wflp = &writer_freelists[me];
552 struct writer_mblock *wmbp = NULL;
553
554 VERBOSE_SCALEOUT_STRING("rcu_scale_writer task started");
555 WARN_ON(!wdpp);
556 set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
557 current->flags |= PF_NO_SETAFFINITY;
558 sched_set_fifo_low(current);
559
560 if (holdoff)
561 schedule_timeout_idle(holdoff * HZ);
562
563 /*
564 * Wait until rcu_end_inkernel_boot() is called for normal GP tests
565 * so that RCU is not always expedited for normal GP tests.
566 * The system_state test is approximate, but works well in practice.
567 */
568 while (!gp_exp && system_state != SYSTEM_RUNNING)
569 schedule_timeout_uninterruptible(1);
570
571 t = ktime_get_mono_fast_ns();
572 if (atomic_inc_return(&n_rcu_scale_writer_started) >= nrealwriters) {
573 t_rcu_scale_writer_started = t;
574 if (gp_exp) {
575 b_rcu_gp_test_started =
576 cur_ops->exp_completed() / 2;
577 } else {
578 b_rcu_gp_test_started = cur_ops->get_gp_seq();
579 }
580 }
581
582 jdone = jiffies + minruntime * HZ;
583 do {
584 bool gp_succeeded = false;
585
586 if (writer_holdoff)
587 udelay(writer_holdoff);
588 if (writer_holdoff_jiffies)
589 schedule_timeout_idle(torture_random(&tr) % writer_holdoff_jiffies + 1);
590 wdp = &wdpp[i];
591 *wdp = ktime_get_mono_fast_ns();
592 if (gp_async && !WARN_ON_ONCE(!cur_ops->async)) {
593 if (!wmbp)
594 wmbp = rcu_scale_alloc(me);
595 if (wmbp && atomic_read(&wflp->ws_inflight) < gp_async_max) {
596 atomic_inc(&wflp->ws_inflight);
597 cur_ops->async(&wmbp->wmb_rh, rcu_scale_async_cb);
598 wmbp = NULL;
599 gp_succeeded = true;
600 } else if (!kthread_should_stop()) {
601 cur_ops->gp_barrier();
602 } else {
603 rcu_scale_free(wmbp); /* Because we are stopping. */
604 wmbp = NULL;
605 }
606 } else if (gp_exp) {
607 cur_ops->exp_sync();
608 gp_succeeded = true;
609 } else {
610 cur_ops->sync();
611 gp_succeeded = true;
612 }
613 t = ktime_get_mono_fast_ns();
614 *wdp = t - *wdp;
615 i_max = i;
616 writer_n_durations[me] = i_max + 1;
617 if (!started &&
618 atomic_read(&n_rcu_scale_writer_started) >= nrealwriters)
619 started = true;
620 if (!done && i >= MIN_MEAS && time_after(jiffies, jdone)) {
621 done = true;
622 WRITE_ONCE(writer_done[me], true);
623 sched_set_normal(current, 0);
624 pr_alert("%s%s rcu_scale_writer %ld has %d measurements\n",
625 scale_type, SCALE_FLAG, me, MIN_MEAS);
626 if (atomic_inc_return(&n_rcu_scale_writer_finished) >=
627 nrealwriters) {
628 schedule_timeout_interruptible(10);
629 rcu_ftrace_dump(DUMP_ALL);
> 630 WRITE_ONCE(test_complete, true);
631 SCALEOUT_STRING("Test complete");
632 t_rcu_scale_writer_finished = t;
633 if (gp_exp) {
634 b_rcu_gp_test_finished =
635 cur_ops->exp_completed() / 2;
636 } else {
637 b_rcu_gp_test_finished =
638 cur_ops->get_gp_seq();
639 }
640 if (shutdown) {
641 smp_mb(); /* Assign before wake. */
642 wake_up(&shutdown_wq);
643 }
644 }
645 }
646 if (done && !alldone &&
647 atomic_read(&n_rcu_scale_writer_finished) >= nrealwriters)
648 alldone = true;
649 if (done && !alldone && time_after(jiffies, jdone + HZ * 60)) {
650 static atomic_t dumped;
651 int i;
652
653 if (!atomic_xchg(&dumped, 1)) {
654 for (i = 0; i < nrealwriters; i++) {
655 if (writer_done[i])
656 continue;
657 pr_info("%s: Task %ld flags writer %d:\n", __func__, me, i);
658 sched_show_task(writer_tasks[i]);
659 }
660 if (cur_ops->stats)
661 cur_ops->stats();
662 }
663 }
664 if (!selfreport && time_after(jiffies, jdone + HZ * (70 + me))) {
665 pr_info("%s: Writer %ld self-report: started %d done %d/%d->%d i %d jdone %lu.\n",
666 __func__, me, started, done, writer_done[me], atomic_read(&n_rcu_scale_writer_finished), i, jiffies - jdone);
667 selfreport = true;
668 }
669 if (gp_succeeded && started && !alldone && i < MAX_MEAS - 1)
670 i++;
671 rcu_scale_wait_shutdown();
672 } while (!torture_must_stop());
673 if (gp_async && cur_ops->async) {
674 rcu_scale_free(wmbp);
675 cur_ops->gp_barrier();
676 }
677 torture_kthread_stopping("rcu_scale_writer");
678 return 0;
679 }
680
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.