"sched_itmt_enabled" was only introduced as a debug toggle for any funky
ITMT behavior. Move the sysctl controlled from
"/proc/sys/kernel/sched_itmt_enabled" to debugfs at
"/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
cat on the file will return "Y" or "N" instead of "1" or "0" to
indicate that feature is enabled or disabled respectively.
Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
toggle was moved to "/sys/kernel/debug/x86/" as opposed to
"/sys/kernel/debug/sched/"
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
arch/x86/kernel/itmt.c | 56 ++++++++++++++++++++----------------------
1 file changed, 27 insertions(+), 29 deletions(-)
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index ee43d1bd41d0..9cea1fc36c18 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/cpuset.h>
+#include <linux/debugfs.h>
#include <linux/mutex.h>
#include <linux/sysctl.h>
#include <linux/nodemask.h>
@@ -34,45 +35,38 @@ static bool __read_mostly sched_itmt_capable;
* of higher turbo frequency for cpus supporting Intel Turbo Boost Max
* Technology 3.0.
*
- * It can be set via /proc/sys/kernel/sched_itmt_enabled
+ * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled
*/
bool __read_mostly sysctl_sched_itmt_enabled;
-static int sched_itmt_update_handler(const struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
+static ssize_t sched_itmt_enabled_write(struct file *filp,
+ const char __user *ubuf,
+ size_t cnt, loff_t *ppos)
{
- unsigned int old_sysctl;
- int ret;
+ ssize_t result;
+ bool orig;
guard(mutex)(&itmt_update_mutex);
- if (!sched_itmt_capable)
- return -EINVAL;
-
- old_sysctl = sysctl_sched_itmt_enabled;
- ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ orig = sysctl_sched_itmt_enabled;
+ result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
- if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
+ if (sysctl_sched_itmt_enabled != orig) {
x86_topology_update = true;
rebuild_sched_domains();
}
- return ret;
+ return result;
}
-static struct ctl_table itmt_kern_table[] = {
- {
- .procname = "sched_itmt_enabled",
- .data = &sysctl_sched_itmt_enabled,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = sched_itmt_update_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
+static const struct file_operations dfs_sched_itmt_fops = {
+ .read = debugfs_read_file_bool,
+ .write = sched_itmt_enabled_write,
+ .open = simple_open,
+ .llseek = default_llseek,
};
-static struct ctl_table_header *itmt_sysctl_header;
+static struct dentry *dfs_sched_itmt;
/**
* sched_set_itmt_support() - Indicate platform supports ITMT
@@ -98,9 +92,15 @@ int sched_set_itmt_support(void)
if (sched_itmt_capable)
return 0;
- itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
- if (!itmt_sysctl_header)
+ dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled",
+ 0644,
+ arch_debugfs_dir,
+ &sysctl_sched_itmt_enabled,
+ &dfs_sched_itmt_fops);
+ if (IS_ERR_OR_NULL(dfs_sched_itmt)) {
+ dfs_sched_itmt = NULL;
return -ENOMEM;
+ }
sched_itmt_capable = true;
@@ -131,10 +131,8 @@ void sched_clear_itmt_support(void)
sched_itmt_capable = false;
- if (itmt_sysctl_header) {
- unregister_sysctl_table(itmt_sysctl_header);
- itmt_sysctl_header = NULL;
- }
+ debugfs_remove(dfs_sched_itmt);
+ dfs_sched_itmt = NULL;
if (sysctl_sched_itmt_enabled) {
/* disable sched_itmt if we are no longer ITMT capable */
--
2.34.1
On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
> "sched_itmt_enabled" was only introduced as a debug toggle for any funky
> ITMT behavior. Move the sysctl controlled from
> "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
> "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
> cat on the file will return "Y" or "N" instead of "1" or "0" to
> indicate that feature is enabled or disabled respectively.
>
Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
to "Y" or "N".
> Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
> toggle was moved to "/sys/kernel/debug/x86/" as opposed to
> "/sys/kernel/debug/sched/"
>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Tim
> ---
> arch/x86/kernel/itmt.c | 56 ++++++++++++++++++++----------------------
> 1 file changed, 27 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
> index ee43d1bd41d0..9cea1fc36c18 100644
> --- a/arch/x86/kernel/itmt.c
> +++ b/arch/x86/kernel/itmt.c
> @@ -19,6 +19,7 @@
> #include <linux/sched.h>
> #include <linux/cpumask.h>
> #include <linux/cpuset.h>
> +#include <linux/debugfs.h>
> #include <linux/mutex.h>
> #include <linux/sysctl.h>
> #include <linux/nodemask.h>
> @@ -34,45 +35,38 @@ static bool __read_mostly sched_itmt_capable;
> * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
> * Technology 3.0.
> *
> - * It can be set via /proc/sys/kernel/sched_itmt_enabled
> + * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled
> */
> bool __read_mostly sysctl_sched_itmt_enabled;
>
> -static int sched_itmt_update_handler(const struct ctl_table *table, int write,
> - void *buffer, size_t *lenp, loff_t *ppos)
> +static ssize_t sched_itmt_enabled_write(struct file *filp,
> + const char __user *ubuf,
> + size_t cnt, loff_t *ppos)
> {
> - unsigned int old_sysctl;
> - int ret;
> + ssize_t result;
> + bool orig;
>
> guard(mutex)(&itmt_update_mutex);
>
> - if (!sched_itmt_capable)
> - return -EINVAL;
> -
> - old_sysctl = sysctl_sched_itmt_enabled;
> - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> + orig = sysctl_sched_itmt_enabled;
> + result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
>
> - if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
> + if (sysctl_sched_itmt_enabled != orig) {
> x86_topology_update = true;
> rebuild_sched_domains();
> }
>
> - return ret;
> + return result;
> }
>
> -static struct ctl_table itmt_kern_table[] = {
> - {
> - .procname = "sched_itmt_enabled",
> - .data = &sysctl_sched_itmt_enabled,
> - .maxlen = sizeof(unsigned int),
> - .mode = 0644,
> - .proc_handler = sched_itmt_update_handler,
> - .extra1 = SYSCTL_ZERO,
> - .extra2 = SYSCTL_ONE,
> - },
> +static const struct file_operations dfs_sched_itmt_fops = {
> + .read = debugfs_read_file_bool,
> + .write = sched_itmt_enabled_write,
> + .open = simple_open,
> + .llseek = default_llseek,
> };
>
> -static struct ctl_table_header *itmt_sysctl_header;
> +static struct dentry *dfs_sched_itmt;
>
> /**
> * sched_set_itmt_support() - Indicate platform supports ITMT
> @@ -98,9 +92,15 @@ int sched_set_itmt_support(void)
> if (sched_itmt_capable)
> return 0;
>
> - itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
> - if (!itmt_sysctl_header)
> + dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled",
> + 0644,
> + arch_debugfs_dir,
> + &sysctl_sched_itmt_enabled,
> + &dfs_sched_itmt_fops);
> + if (IS_ERR_OR_NULL(dfs_sched_itmt)) {
> + dfs_sched_itmt = NULL;
> return -ENOMEM;
> + }
>
> sched_itmt_capable = true;
>
> @@ -131,10 +131,8 @@ void sched_clear_itmt_support(void)
>
> sched_itmt_capable = false;
>
> - if (itmt_sysctl_header) {
> - unregister_sysctl_table(itmt_sysctl_header);
> - itmt_sysctl_header = NULL;
> - }
> + debugfs_remove(dfs_sched_itmt);
> + dfs_sched_itmt = NULL;
>
> if (sysctl_sched_itmt_enabled) {
> /* disable sched_itmt if we are no longer ITMT capable */
On Thu, Dec 12, 2024 at 11:15:43AM -0800, Tim Chen wrote: > On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote: > > "sched_itmt_enabled" was only introduced as a debug toggle for any funky > > ITMT behavior. Move the sysctl controlled from > > "/proc/sys/kernel/sched_itmt_enabled" to debugfs at > > "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a > > cat on the file will return "Y" or "N" instead of "1" or "0" to > > indicate that feature is enabled or disabled respectively. > > > > Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0" > to "Y" or "N". Nope, kstrtobool() accepts a ton of options, very much including 1,0. It's just the print side that has to pick one and went with the Y,N thing. Look at the function and marvel how you can even write: oNcology,oFficious
Hello Tim,
Thank you for reviewing the series.
On 12/13/2024 12:45 AM, Tim Chen wrote:
> On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
>> "sched_itmt_enabled" was only introduced as a debug toggle for any funky
>> ITMT behavior. Move the sysctl controlled from
>> "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
>> "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
>> cat on the file will return "Y" or "N" instead of "1" or "0" to
>> indicate that feature is enabled or disabled respectively.
>>
>
> Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
> to "Y" or "N".
Turns out you can still use "1" and "0". Running:
echo Y > /sys/kernel/debug/sched/verbose
echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
for i in 0 1 N Y;
do
echo "Writing $i to /sys/kernel/debug/x86/sched_itmt_enabled";
echo $i > /sys/kernel/debug/x86/sched_itmt_enabled;
echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
echo "sched domain flags:";
cat /sys/kernel/debug/sched/domains/cpu0/domain*/flags;
echo;
done
Yields the following output on my system:
sched_itmt_enabled: Y
Writing 0 to /sys/kernel/debug/x86/sched_itmt_enabled
sched_itmt_enabled: N
sched domain flags:
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING
Writing 1 to /sys/kernel/debug/x86/sched_itmt_enabled
sched_itmt_enabled: Y
sched domain flags:
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING
Writing N to /sys/kernel/debug/x86/sched_itmt_enabled
sched_itmt_enabled: N
sched domain flags:
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING
Writing Y to /sys/kernel/debug/x86/sched_itmt_enabled
sched_itmt_enabled: Y
sched domain flags:
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING
Would you like me to extend that note as:
... with a notable change that a
cat on the file will return "Y" or "N" instead of "1" or "0" to
indicate that feature is enabled or disabled respectively. User can
either write "0" or "1" to toggle the feature off when enabled, or
"1" or "Y" to toggle the feature on when disabled.
for the record?
>
>> Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
>> toggle was moved to "/sys/kernel/debug/x86/" as opposed to
>> "/sys/kernel/debug/sched/"
>>
>> Suggested-by: Peter Zijlstra <peterz@infradead.org>
>> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Thank you.
--
Thanks and Regards,
Prateek
>
> Tim
>
> [..snip..]
>
On Fri, 2024-12-13 at 09:31 +0530, K Prateek Nayak wrote: > Hello Tim, > > Thank you for reviewing the series. > > On 12/13/2024 12:45 AM, Tim Chen wrote: > > On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote: > > > "sched_itmt_enabled" was only introduced as a debug toggle for any funky > > > ITMT behavior. Move the sysctl controlled from > > > "/proc/sys/kernel/sched_itmt_enabled" to debugfs at > > > "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a > > > cat on the file will return "Y" or "N" instead of "1" or "0" to > > > indicate that feature is enabled or disabled respectively. > > > > > > > Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0" > > to "Y" or "N". > > Turns out you can still use "1" and "0". Running: > > echo Y > /sys/kernel/debug/sched/verbose > echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)"; > for i in 0 1 N Y; > do > echo "Writing $i to /sys/kernel/debug/x86/sched_itmt_enabled"; > echo $i > /sys/kernel/debug/x86/sched_itmt_enabled; > echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)"; > echo "sched domain flags:"; > cat /sys/kernel/debug/sched/domains/cpu0/domain*/flags; > echo; > done > > Yields the following output on my system: > > sched_itmt_enabled: Y > > Writing 0 to /sys/kernel/debug/x86/sched_itmt_enabled > sched_itmt_enabled: N > sched domain flags: > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING > > Writing 1 to /sys/kernel/debug/x86/sched_itmt_enabled > sched_itmt_enabled: Y > sched domain flags: > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING > > Writing N to /sys/kernel/debug/x86/sched_itmt_enabled > sched_itmt_enabled: N > sched domain flags: > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING > > Writing Y to /sys/kernel/debug/x86/sched_itmt_enabled > sched_itmt_enabled: Y > sched domain flags: > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING > SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING > > Would you like me to extend that note as: > > ... with a notable change that a > cat on the file will return "Y" or "N" instead of "1" or "0" to > indicate that feature is enabled or disabled respectively. User can > either write "0" or "1" to toggle the feature off when enabled, or > "1" or "Y" to toggle the feature on when disabled. > > for the record? > > Sure. Thanks. Tim
© 2016 - 2025 Red Hat, Inc.