kernel/sched/debug.c | 52 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 3 deletions(-)
The debug files under sched/domains can take a long time to regenerate,
especially when updates are done one at a time. Move these files under
the sched verbose debug flag. Allow changes to verbose to trigger
generation of the files. This lets a user batch the updates but still
have the information available. The detailed topology printk messages
are also under verbose.
Discussion that lead to this approach can be found in the link below.
Simplified code to maintain use of debugfs bool routines suggested by
Michael Ellerman <mpe@ellerman.id.au>.
Signed-off-by: Phil Auld <pauld@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/
---
v2: fix comment typo and use cpumask_empty()
kernel/sched/debug.c | 52 +++++++++++++++++++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 1637b65ba07a..0b2340a79b65 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -280,6 +280,45 @@ static const struct file_operations sched_dynamic_fops = {
__read_mostly bool sched_debug_verbose;
+#ifdef CONFIG_SMP
+static struct dentry *sd_dentry;
+
+
+static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ ssize_t result;
+ bool orig;
+
+ cpus_read_lock();
+ mutex_lock(&sched_domains_mutex);
+
+ orig = sched_debug_verbose;
+ result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
+
+ if (sched_debug_verbose && !orig)
+ update_sched_domain_debugfs();
+ else if (!sched_debug_verbose && orig) {
+ debugfs_remove(sd_dentry);
+ sd_dentry = NULL;
+ }
+
+ mutex_unlock(&sched_domains_mutex);
+ cpus_read_unlock();
+
+ return result;
+}
+#else
+#define sched_verbose_write debugfs_write_file_bool
+#endif
+
+static const struct file_operations sched_verbose_fops = {
+ .read = debugfs_read_file_bool,
+ .write = sched_verbose_write,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
static const struct seq_operations sched_debug_sops;
static int sched_debug_open(struct inode *inode, struct file *filp)
@@ -303,7 +342,7 @@ static __init int sched_init_debug(void)
debugfs_sched = debugfs_create_dir("sched", NULL);
debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops);
- debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose);
+ debugfs_create_file_unsafe("verbose", 0644, debugfs_sched, &sched_debug_verbose, &sched_verbose_fops);
#ifdef CONFIG_PREEMPT_DYNAMIC
debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops);
#endif
@@ -345,7 +384,6 @@ late_initcall(sched_init_debug);
#ifdef CONFIG_SMP
static cpumask_var_t sd_sysctl_cpus;
-static struct dentry *sd_dentry;
static int sd_flags_show(struct seq_file *m, void *v)
{
@@ -402,15 +440,23 @@ void update_sched_domain_debugfs(void)
if (!debugfs_sched)
return;
+ if (!sched_debug_verbose)
+ return;
+
if (!cpumask_available(sd_sysctl_cpus)) {
if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL))
return;
cpumask_copy(sd_sysctl_cpus, cpu_possible_mask);
}
- if (!sd_dentry)
+ if (!sd_dentry) {
sd_dentry = debugfs_create_dir("domains", debugfs_sched);
+ /* rebuild sd_sysctl_cpus if empty since it gets cleared below */
+ if (cpumask_empty(sd_sysctl_cpus))
+ cpumask_copy(sd_sysctl_cpus, cpu_online_mask);
+ }
+
for_each_cpu(cpu, sd_sysctl_cpus) {
struct sched_domain *sd;
struct dentry *d_cpu;
--
2.31.1
On Fri, Jan 20, 2023 at 11:33:30AM -0500, Phil Auld wrote: > The debug files under sched/domains can take a long time to regenerate, > especially when updates are done one at a time. Move these files under > the sched verbose debug flag. Allow changes to verbose to trigger > generation of the files. This lets a user batch the updates but still > have the information available. The detailed topology printk messages > are also under verbose. > > Discussion that lead to this approach can be found in the link below. > > Simplified code to maintain use of debugfs bool routines suggested by > Michael Ellerman <mpe@ellerman.id.au>. > > Signed-off-by: Phil Auld <pauld@redhat.com> > Cc: Michael Ellerman <mpe@ellerman.id.au> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> > Cc: Valentin Schneider <vschneid@redhat.com> > Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com> > Cc: Vincent Guittot <vincent.guittot@linaro.org> > Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/ > --- > > v2: fix comment typo and use cpumask_empty() > > kernel/sched/debug.c | 52 +++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 49 insertions(+), 3 deletions(-) Thanks for the Patch. Tested-by: Vishal Chourasia <vishalc@linux.vnet.ibm.com> Base Tag : v6.2-rc8 Patch : [V2] sched/debug: Put sched/domains files under the verbose flag # time ppc64_cpu --smt=off | When kernel is built | with the patch, time to real 17m54.246s | smt=off is 17m54s. user 0m0.013s | sys 0m4.661s | System Configuration: Architecture : ppc64le CPUS : 1600 Machine : Power10 Thread per core : 8 Sockets : 15 - debug files are not created for the case where kernel command line string lacks sched_verbose parameter. - When sched_verbose is passed as kernel commandline parameter, kernel exports /sys/kernel/debug/sched/verbose file which controls creation of the debug files. + setting Y/N to this file enables/disables creation of debug files during CPU hotplug operation. Below is the table listing time taken to perform smt=off operation under different settings. +--------------------------+---------------+---------+------------+ | kernel | sched_verbose | verbose | time | +--------------------------+---------------+---------+------------+ | 6.2.0-rc8-patch-applied+ | YES | N | 16m58.196s | | 6.2.0-rc8-patch-applied+ | YES | Y | 33m21.774s | | 6.2.0-rc8-patch-applied+ | NO | NA | 17m54.246s | | 6.2.0-rc8 | NO | NA | 30m11.344s | +--------------------------+---------------+---------+------------+ -- vishal.c
Hi Ingo, Peter, On Fri, Jan 20, 2023 at 11:33:30AM -0500 Phil Auld wrote: > The debug files under sched/domains can take a long time to regenerate, > especially when updates are done one at a time. Move these files under > the sched verbose debug flag. Allow changes to verbose to trigger > generation of the files. This lets a user batch the updates but still > have the information available. The detailed topology printk messages > are also under verbose. > > Discussion that lead to this approach can be found in the link below. > > Simplified code to maintain use of debugfs bool routines suggested by > Michael Ellerman <mpe@ellerman.id.au>. > > Signed-off-by: Phil Auld <pauld@redhat.com> > Cc: Michael Ellerman <mpe@ellerman.id.au> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> > Cc: Valentin Schneider <vschneid@redhat.com> > Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com> > Cc: Vincent Guittot <vincent.guittot@linaro.org> > Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/ > --- Thoughts on this one? Thanks, Phil > > v2: fix comment typo and use cpumask_empty() > > kernel/sched/debug.c | 52 +++++++++++++++++++++++++++++++++++++++++--- > 1 file changed, 49 insertions(+), 3 deletions(-) > > diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c > index 1637b65ba07a..0b2340a79b65 100644 > --- a/kernel/sched/debug.c > +++ b/kernel/sched/debug.c > @@ -280,6 +280,45 @@ static const struct file_operations sched_dynamic_fops = { > > __read_mostly bool sched_debug_verbose; > > +#ifdef CONFIG_SMP > +static struct dentry *sd_dentry; > + > + > +static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf, > + size_t cnt, loff_t *ppos) > +{ > + ssize_t result; > + bool orig; > + > + cpus_read_lock(); > + mutex_lock(&sched_domains_mutex); > + > + orig = sched_debug_verbose; > + result = debugfs_write_file_bool(filp, ubuf, cnt, ppos); > + > + if (sched_debug_verbose && !orig) > + update_sched_domain_debugfs(); > + else if (!sched_debug_verbose && orig) { > + debugfs_remove(sd_dentry); > + sd_dentry = NULL; > + } > + > + mutex_unlock(&sched_domains_mutex); > + cpus_read_unlock(); > + > + return result; > +} > +#else > +#define sched_verbose_write debugfs_write_file_bool > +#endif > + > +static const struct file_operations sched_verbose_fops = { > + .read = debugfs_read_file_bool, > + .write = sched_verbose_write, > + .open = simple_open, > + .llseek = default_llseek, > +}; > + > static const struct seq_operations sched_debug_sops; > > static int sched_debug_open(struct inode *inode, struct file *filp) > @@ -303,7 +342,7 @@ static __init int sched_init_debug(void) > debugfs_sched = debugfs_create_dir("sched", NULL); > > debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); > - debugfs_create_bool("verbose", 0644, debugfs_sched, &sched_debug_verbose); > + debugfs_create_file_unsafe("verbose", 0644, debugfs_sched, &sched_debug_verbose, &sched_verbose_fops); > #ifdef CONFIG_PREEMPT_DYNAMIC > debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); > #endif > @@ -345,7 +384,6 @@ late_initcall(sched_init_debug); > #ifdef CONFIG_SMP > > static cpumask_var_t sd_sysctl_cpus; > -static struct dentry *sd_dentry; > > static int sd_flags_show(struct seq_file *m, void *v) > { > @@ -402,15 +440,23 @@ void update_sched_domain_debugfs(void) > if (!debugfs_sched) > return; > > + if (!sched_debug_verbose) > + return; > + > if (!cpumask_available(sd_sysctl_cpus)) { > if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) > return; > cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); > } > > - if (!sd_dentry) > + if (!sd_dentry) { > sd_dentry = debugfs_create_dir("domains", debugfs_sched); > > + /* rebuild sd_sysctl_cpus if empty since it gets cleared below */ > + if (cpumask_empty(sd_sysctl_cpus)) > + cpumask_copy(sd_sysctl_cpus, cpu_online_mask); > + } > + > for_each_cpu(cpu, sd_sysctl_cpus) { > struct sched_domain *sd; > struct dentry *d_cpu; > -- > 2.31.1 > --
* Phil Auld <pauld@redhat.com> [2023-01-20 11:33:30]: > The debug files under sched/domains can take a long time to regenerate, > especially when updates are done one at a time. Move these files under > the sched verbose debug flag. Allow changes to verbose to trigger > generation of the files. This lets a user batch the updates but still > have the information available. The detailed topology printk messages > are also under verbose. > > Discussion that lead to this approach can be found in the link below. > > Simplified code to maintain use of debugfs bool routines suggested by > Michael Ellerman <mpe@ellerman.id.au>. > > Signed-off-by: Phil Auld <pauld@redhat.com> > Cc: Michael Ellerman <mpe@ellerman.id.au> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> > Cc: Valentin Schneider <vschneid@redhat.com> > Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com> > Cc: Vincent Guittot <vincent.guittot@linaro.org> > Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/ > --- > Looks good to me. Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com> -- Thanks and Regards Srikar Dronamraju
On 20/01/23 11:33, Phil Auld wrote: > The debug files under sched/domains can take a long time to regenerate, > especially when updates are done one at a time. Move these files under > the sched verbose debug flag. Allow changes to verbose to trigger > generation of the files. This lets a user batch the updates but still > have the information available. The detailed topology printk messages > are also under verbose. > > Discussion that lead to this approach can be found in the link below. > > Simplified code to maintain use of debugfs bool routines suggested by > Michael Ellerman <mpe@ellerman.id.au>. > > Signed-off-by: Phil Auld <pauld@redhat.com> > Cc: Michael Ellerman <mpe@ellerman.id.au> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> > Cc: Valentin Schneider <vschneid@redhat.com> > Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com> > Cc: Vincent Guittot <vincent.guittot@linaro.org> > Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/ Reviewed-by: Valentin Schneider <vschneid@redhat.com>
On Fri, Jan 20, 2023 at 11:33:30AM -0500, Phil Auld wrote: > The debug files under sched/domains can take a long time to regenerate, > especially when updates are done one at a time. Move these files under > the sched verbose debug flag. Allow changes to verbose to trigger > generation of the files. This lets a user batch the updates but still > have the information available. The detailed topology printk messages > are also under verbose. > > Discussion that lead to this approach can be found in the link below. > > Simplified code to maintain use of debugfs bool routines suggested by > Michael Ellerman <mpe@ellerman.id.au>. > > Signed-off-by: Phil Auld <pauld@redhat.com> > Cc: Michael Ellerman <mpe@ellerman.id.au> > Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> > Cc: Ingo Molnar <mingo@redhat.com> > Cc: Peter Zijlstra (Intel) <peterz@infradead.org> > Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> > Cc: Valentin Schneider <vschneid@redhat.com> > Cc: Vishal Chourasia <vishalc@linux.vnet.ibm.com> > Cc: Vincent Guittot <vincent.guittot@linaro.org> > Link: https://lore.kernel.org/all/Y01UWQL2y2r69sBX@li-05afa54c-330e-11b2-a85c-e3f3aa0db1e9.ibm.com/ > --- > > v2: fix comment typo and use cpumask_empty() Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
© 2016 - 2025 Red Hat, Inc.