[PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs

K Prateek Nayak posted 8 patches 1 year ago
There is a newer version of this series
[PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs
Posted by K Prateek Nayak 1 year ago
"sched_itmt_enabled" was only introduced as a debug toggle for any funky
ITMT behavior. Move the sysctl controlled from
"/proc/sys/kernel/sched_itmt_enabled" to debugfs at
"/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
cat on the file will return "Y" or "N" instead of "1" or "0" to
indicate that feature is enabled or disabled respectively.

Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
toggle was moved to "/sys/kernel/debug/x86/" as opposed to
"/sys/kernel/debug/sched/"

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
 arch/x86/kernel/itmt.c | 56 ++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index ee43d1bd41d0..9cea1fc36c18 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/cpuset.h>
+#include <linux/debugfs.h>
 #include <linux/mutex.h>
 #include <linux/sysctl.h>
 #include <linux/nodemask.h>
@@ -34,45 +35,38 @@ static bool __read_mostly sched_itmt_capable;
  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
  * Technology 3.0.
  *
- * It can be set via /proc/sys/kernel/sched_itmt_enabled
+ * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled
  */
 bool __read_mostly sysctl_sched_itmt_enabled;
 
-static int sched_itmt_update_handler(const struct ctl_table *table, int write,
-				     void *buffer, size_t *lenp, loff_t *ppos)
+static ssize_t sched_itmt_enabled_write(struct file *filp,
+					const char __user *ubuf,
+					size_t cnt, loff_t *ppos)
 {
-	unsigned int old_sysctl;
-	int ret;
+	ssize_t result;
+	bool orig;
 
 	guard(mutex)(&itmt_update_mutex);
 
-	if (!sched_itmt_capable)
-		return -EINVAL;
-
-	old_sysctl = sysctl_sched_itmt_enabled;
-	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+	orig = sysctl_sched_itmt_enabled;
+	result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
 
-	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
+	if (sysctl_sched_itmt_enabled != orig) {
 		x86_topology_update = true;
 		rebuild_sched_domains();
 	}
 
-	return ret;
+	return result;
 }
 
-static struct ctl_table itmt_kern_table[] = {
-	{
-		.procname	= "sched_itmt_enabled",
-		.data		= &sysctl_sched_itmt_enabled,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_itmt_update_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
+static const struct file_operations dfs_sched_itmt_fops = {
+	.read =         debugfs_read_file_bool,
+	.write =        sched_itmt_enabled_write,
+	.open =         simple_open,
+	.llseek =       default_llseek,
 };
 
-static struct ctl_table_header *itmt_sysctl_header;
+static struct dentry *dfs_sched_itmt;
 
 /**
  * sched_set_itmt_support() - Indicate platform supports ITMT
@@ -98,9 +92,15 @@ int sched_set_itmt_support(void)
 	if (sched_itmt_capable)
 		return 0;
 
-	itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
-	if (!itmt_sysctl_header)
+	dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled",
+						    0644,
+						    arch_debugfs_dir,
+						    &sysctl_sched_itmt_enabled,
+						    &dfs_sched_itmt_fops);
+	if (IS_ERR_OR_NULL(dfs_sched_itmt)) {
+		dfs_sched_itmt = NULL;
 		return -ENOMEM;
+	}
 
 	sched_itmt_capable = true;
 
@@ -131,10 +131,8 @@ void sched_clear_itmt_support(void)
 
 	sched_itmt_capable = false;
 
-	if (itmt_sysctl_header) {
-		unregister_sysctl_table(itmt_sysctl_header);
-		itmt_sysctl_header = NULL;
-	}
+	debugfs_remove(dfs_sched_itmt);
+	dfs_sched_itmt = NULL;
 
 	if (sysctl_sched_itmt_enabled) {
 		/* disable sched_itmt if we are no longer ITMT capable */
-- 
2.34.1
Re: [PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs
Posted by Tim Chen 1 year ago
On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
> "sched_itmt_enabled" was only introduced as a debug toggle for any funky
> ITMT behavior. Move the sysctl controlled from
> "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
> "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
> cat on the file will return "Y" or "N" instead of "1" or "0" to
> indicate that feature is enabled or disabled respectively.
> 

Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
to "Y" or "N". 

> Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
> toggle was moved to "/sys/kernel/debug/x86/" as opposed to
> "/sys/kernel/debug/sched/"
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>

Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>

Tim

> ---
>  arch/x86/kernel/itmt.c | 56 ++++++++++++++++++++----------------------
>  1 file changed, 27 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
> index ee43d1bd41d0..9cea1fc36c18 100644
> --- a/arch/x86/kernel/itmt.c
> +++ b/arch/x86/kernel/itmt.c
> @@ -19,6 +19,7 @@
>  #include <linux/sched.h>
>  #include <linux/cpumask.h>
>  #include <linux/cpuset.h>
> +#include <linux/debugfs.h>
>  #include <linux/mutex.h>
>  #include <linux/sysctl.h>
>  #include <linux/nodemask.h>
> @@ -34,45 +35,38 @@ static bool __read_mostly sched_itmt_capable;
>   * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
>   * Technology 3.0.
>   *
> - * It can be set via /proc/sys/kernel/sched_itmt_enabled
> + * It can be set via /sys/kernel/debug/x86/sched_itmt_enabled
>   */
>  bool __read_mostly sysctl_sched_itmt_enabled;
>  
> -static int sched_itmt_update_handler(const struct ctl_table *table, int write,
> -				     void *buffer, size_t *lenp, loff_t *ppos)
> +static ssize_t sched_itmt_enabled_write(struct file *filp,
> +					const char __user *ubuf,
> +					size_t cnt, loff_t *ppos)
>  {
> -	unsigned int old_sysctl;
> -	int ret;
> +	ssize_t result;
> +	bool orig;
>  
>  	guard(mutex)(&itmt_update_mutex);
>  
> -	if (!sched_itmt_capable)
> -		return -EINVAL;
> -
> -	old_sysctl = sysctl_sched_itmt_enabled;
> -	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
> +	orig = sysctl_sched_itmt_enabled;
> +	result = debugfs_write_file_bool(filp, ubuf, cnt, ppos);
>  
> -	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
> +	if (sysctl_sched_itmt_enabled != orig) {
>  		x86_topology_update = true;
>  		rebuild_sched_domains();
>  	}
>  
> -	return ret;
> +	return result;
>  }
>  
> -static struct ctl_table itmt_kern_table[] = {
> -	{
> -		.procname	= "sched_itmt_enabled",
> -		.data		= &sysctl_sched_itmt_enabled,
> -		.maxlen		= sizeof(unsigned int),
> -		.mode		= 0644,
> -		.proc_handler	= sched_itmt_update_handler,
> -		.extra1		= SYSCTL_ZERO,
> -		.extra2		= SYSCTL_ONE,
> -	},
> +static const struct file_operations dfs_sched_itmt_fops = {
> +	.read =         debugfs_read_file_bool,
> +	.write =        sched_itmt_enabled_write,
> +	.open =         simple_open,
> +	.llseek =       default_llseek,
>  };
>  
> -static struct ctl_table_header *itmt_sysctl_header;
> +static struct dentry *dfs_sched_itmt;
>  
>  /**
>   * sched_set_itmt_support() - Indicate platform supports ITMT
> @@ -98,9 +92,15 @@ int sched_set_itmt_support(void)
>  	if (sched_itmt_capable)
>  		return 0;
>  
> -	itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
> -	if (!itmt_sysctl_header)
> +	dfs_sched_itmt = debugfs_create_file_unsafe("sched_itmt_enabled",
> +						    0644,
> +						    arch_debugfs_dir,
> +						    &sysctl_sched_itmt_enabled,
> +						    &dfs_sched_itmt_fops);
> +	if (IS_ERR_OR_NULL(dfs_sched_itmt)) {
> +		dfs_sched_itmt = NULL;
>  		return -ENOMEM;
> +	}
>  
>  	sched_itmt_capable = true;
>  
> @@ -131,10 +131,8 @@ void sched_clear_itmt_support(void)
>  
>  	sched_itmt_capable = false;
>  
> -	if (itmt_sysctl_header) {
> -		unregister_sysctl_table(itmt_sysctl_header);
> -		itmt_sysctl_header = NULL;
> -	}
> +	debugfs_remove(dfs_sched_itmt);
> +	dfs_sched_itmt = NULL;
>  
>  	if (sysctl_sched_itmt_enabled) {
>  		/* disable sched_itmt if we are no longer ITMT capable */
Re: [PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs
Posted by Peter Zijlstra 1 year ago
On Thu, Dec 12, 2024 at 11:15:43AM -0800, Tim Chen wrote:
> On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
> > "sched_itmt_enabled" was only introduced as a debug toggle for any funky
> > ITMT behavior. Move the sysctl controlled from
> > "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
> > "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
> > cat on the file will return "Y" or "N" instead of "1" or "0" to
> > indicate that feature is enabled or disabled respectively.
> > 
> 
> Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
> to "Y" or "N". 

Nope, kstrtobool() accepts a ton of options, very much including 1,0.
It's just the print side that has to pick one and went with the Y,N
thing.

Look at the function and marvel how you can even write: oNcology,oFficious
Re: [PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs
Posted by K Prateek Nayak 1 year ago
Hello Tim,

Thank you for reviewing the series.

On 12/13/2024 12:45 AM, Tim Chen wrote:
> On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
>> "sched_itmt_enabled" was only introduced as a debug toggle for any funky
>> ITMT behavior. Move the sysctl controlled from
>> "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
>> "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
>> cat on the file will return "Y" or "N" instead of "1" or "0" to
>> indicate that feature is enabled or disabled respectively.
>>
> 
> Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
> to "Y" or "N".

Turns out you can still use "1" and "0". Running:

     echo Y > /sys/kernel/debug/sched/verbose
     echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
     for i in 0 1 N Y;
     do
         echo "Writing $i to /sys/kernel/debug/x86/sched_itmt_enabled";
         echo $i > /sys/kernel/debug/x86/sched_itmt_enabled;
         echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
         echo "sched domain flags:";
         cat /sys/kernel/debug/sched/domains/cpu0/domain*/flags;
         echo;
     done

Yields the following output on my system:

     sched_itmt_enabled: Y

     Writing 0 to /sys/kernel/debug/x86/sched_itmt_enabled
     sched_itmt_enabled: N
     sched domain flags:
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING

     Writing 1 to /sys/kernel/debug/x86/sched_itmt_enabled
     sched_itmt_enabled: Y
     sched domain flags:
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING

     Writing N to /sys/kernel/debug/x86/sched_itmt_enabled
     sched_itmt_enabled: N
     sched domain flags:
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING

     Writing Y to /sys/kernel/debug/x86/sched_itmt_enabled
     sched_itmt_enabled: Y
     sched domain flags:
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
     SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING

Would you like me to extend that note as:

     ... with a notable change that a
     cat on the file will return "Y" or "N" instead of "1" or "0" to
     indicate that feature is enabled or disabled respectively. User can
     either write "0" or "1" to toggle the feature off when enabled, or
     "1" or "Y" to toggle the feature on when disabled.

for the record?

> 
>> Since ITMT is x86 specific (and PowerPC uses SD_ASYM_PACKING too), the
>> toggle was moved to "/sys/kernel/debug/x86/" as opposed to
>> "/sys/kernel/debug/sched/"
>>
>> Suggested-by: Peter Zijlstra <peterz@infradead.org>
>> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> 
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>

Thank you.

-- 
Thanks and Regards,
Prateek

> 
> Tim
> 
> [..snip..]
>
Re: [PATCH 3/8] x86/itmt: Move the "sched_itmt_enabled" sysctl to debugfs
Posted by Tim Chen 1 year ago
On Fri, 2024-12-13 at 09:31 +0530, K Prateek Nayak wrote:
> Hello Tim,
> 
> Thank you for reviewing the series.
> 
> On 12/13/2024 12:45 AM, Tim Chen wrote:
> > On Wed, 2024-12-11 at 18:55 +0000, K Prateek Nayak wrote:
> > > "sched_itmt_enabled" was only introduced as a debug toggle for any funky
> > > ITMT behavior. Move the sysctl controlled from
> > > "/proc/sys/kernel/sched_itmt_enabled" to debugfs at
> > > "/sys/kernel/debug/x86/sched_itmt_enabled" with a notable change that a
> > > cat on the file will return "Y" or "N" instead of "1" or "0" to
> > > indicate that feature is enabled or disabled respectively.
> > > 
> > 
> > Valid values of setting "sched_itmt_enabled" likewise change from "1" or "0"
> > to "Y" or "N".
> 
> Turns out you can still use "1" and "0". Running:
> 
>      echo Y > /sys/kernel/debug/sched/verbose
>      echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
>      for i in 0 1 N Y;
>      do
>          echo "Writing $i to /sys/kernel/debug/x86/sched_itmt_enabled";
>          echo $i > /sys/kernel/debug/x86/sched_itmt_enabled;
>          echo "sched_itmt_enabled: $(cat /sys/kernel/debug/x86/sched_itmt_enabled)";
>          echo "sched domain flags:";
>          cat /sys/kernel/debug/sched/domains/cpu0/domain*/flags;
>          echo;
>      done
> 
> Yields the following output on my system:
> 
>      sched_itmt_enabled: Y
> 
>      Writing 0 to /sys/kernel/debug/x86/sched_itmt_enabled
>      sched_itmt_enabled: N
>      sched domain flags:
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING
> 
>      Writing 1 to /sys/kernel/debug/x86/sched_itmt_enabled
>      sched_itmt_enabled: Y
>      sched domain flags:
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING
> 
>      Writing N to /sys/kernel/debug/x86/sched_itmt_enabled
>      sched_itmt_enabled: N
>      sched domain flags:
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_PREFER_SIBLING
> 
>      Writing Y to /sys/kernel/debug/x86/sched_itmt_enabled
>      sched_itmt_enabled: Y
>      sched domain flags:
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_CPUCAPACITY SD_SHARE_LLC SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_SHARE_LLC SD_ASYM_PACKING SD_PREFER_SIBLING
>      SD_BALANCE_NEWIDLE SD_BALANCE_EXEC SD_BALANCE_FORK SD_WAKE_AFFINE SD_ASYM_PACKING SD_PREFER_SIBLING
> 
> Would you like me to extend that note as:
> 
>      ... with a notable change that a
>      cat on the file will return "Y" or "N" instead of "1" or "0" to
>      indicate that feature is enabled or disabled respectively. User can
>      either write "0" or "1" to toggle the feature off when enabled, or
>      "1" or "Y" to toggle the feature on when disabled.
> 
> for the record?
> 
> 
Sure.  Thanks.

Tim