[PATCH v15 24/32] x86/resctrl: Add energy/perf choices to rdt boot option

Tony Luck posted 32 patches 2 weeks ago
There is a newer version of this series
[PATCH v15 24/32] x86/resctrl: Add energy/perf choices to rdt boot option
Posted by Tony Luck 2 weeks ago
Legacy resctrl features are enumerated by X86_FEATURE_* flags. These may be
overridden by quirks to disable features in the case of errata.  Users can
use kernel command line options to either disable a feature, or to force
enable a feature that was disabled by a quirk.

A different approach is needed for hardware features that do not have an
X86_FEATURE_* flag.

Update the parse loop of the "rdt=" boot option with a call to intel_aet_option()
to handles "perf" and "energy" options. Prefixing an option with "!" force
disables a feature. A ":guid" suffix allows for fine grain control per-guid.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 .../admin-guide/kernel-parameters.txt         |  7 +++-
 arch/x86/kernel/cpu/resctrl/internal.h        |  2 ++
 arch/x86/kernel/cpu/resctrl/core.c            |  2 ++
 arch/x86/kernel/cpu/resctrl/intel_aet.c       | 34 +++++++++++++++++++
 4 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2b465eab41a1..cc9d2800abeb 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6217,9 +6217,14 @@
 	rdt=		[HW,X86,RDT]
 			Turn on/off individual RDT features. List is:
 			cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
-			mba, smba, bmec, abmc, sdciae.
+			mba, smba, bmec, abmc, sdciae, energy[:guid],
+			perf[:guid].
 			E.g. to turn on cmt and turn off mba use:
 				rdt=cmt,!mba
+			To turn off all energy telemetry monitoring and ensure that
+			perf telemetry monitoring associated with guid 0x12345
+			is enabled use:
+				rdt=!energy,perf:0x12345
 
 	reboot=		[KNL]
 			Format (x86 or x86_64):
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 3b228b241fb2..df09091f7c6c 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -236,6 +236,7 @@ void __exit intel_aet_exit(void);
 int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
 void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
 				struct list_head *add_pos);
+bool intel_aet_option(bool force_off, char *tok);
 #else
 static inline bool intel_aet_get_events(void) { return false; }
 static inline void __exit intel_aet_exit(void) { }
@@ -246,6 +247,7 @@ static inline int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64
 
 static inline void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
 					      struct list_head *add_pos) { }
+static inline bool intel_aet_option(bool force_off, char *tok) { return false; }
 #endif
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 283d653002a2..960974ffa866 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -820,6 +820,8 @@ static int __init set_rdt_options(char *str)
 		force_off = *tok == '!';
 		if (force_off)
 			tok++;
+		if (intel_aet_option(force_off, tok))
+			continue;
 		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
 			if (strcmp(tok, o->name) == 0) {
 				if (force_off)
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 8fcd72fca81f..fec4bb781f82 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -59,6 +59,10 @@ struct pmt_event {
  *			data for all telemetry regions of type @pfname.
  *			Valid if the system supports the event group,
  *			NULL otherwise.
+ * @force_off:		True when "rdt" command line disables this @guid
+ *			or architecture code disables this @guid.
+ * @force_on:		True when "rdt" command line overrides disable of
+ *			this @guid.
  * @guid:		Unique number per XML description file.
  * @mmio_size:		Number of bytes of MMIO registers for this group.
  * @num_events:		Number of events in this group.
@@ -68,6 +72,7 @@ struct event_group {
 	/* Data fields for additional structures to manage this group. */
 	const char			*pfname;
 	struct pmt_feature_group	*pfg;
+	bool				force_off, force_on;
 
 	/* Remaining fields initialized from XML file. */
 	u32				guid;
@@ -122,6 +127,32 @@ static struct event_group *known_event_groups[] = {
 	     _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)];	\
 	     _peg++)
 
+bool intel_aet_option(bool force_off, char *tok)
+{
+	struct event_group **peg;
+	bool ret = false;
+	u32 guid = 0;
+	char *name;
+
+	name = strsep(&tok, ":");
+	if (tok && kstrtou32(tok, 16, &guid))
+		return false;
+
+	for_each_event_group(peg) {
+		if (strcmp(name, (*peg)->pfname))
+			continue;
+		if (guid && (*peg)->guid != guid)
+			continue;
+		if (force_off)
+			(*peg)->force_off = true;
+		else
+			(*peg)->force_on = true;
+		ret = true;
+	}
+
+	return ret;
+}
+
 /*
  * Clear the address field of regions that did not pass the checks in
  * skip_telem_region() so they will not be used by intel_aet_read_event().
@@ -173,6 +204,9 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
 	int skipped_events = 0;
 
+	if (e->force_off)
+		return false;
+
 	if (!group_has_usable_regions(e, p))
 		return false;
 
-- 
2.51.1
Re: [PATCH v15 24/32] x86/resctrl: Add energy/perf choices to rdt boot option
Posted by Reinette Chatre 1 week, 2 days ago
Hi Tony,

On 12/4/25 12:53 PM, Tony Luck wrote:
> Legacy resctrl features are enumerated by X86_FEATURE_* flags. These may be
> overridden by quirks to disable features in the case of errata.  Users can
> use kernel command line options to either disable a feature, or to force
> enable a feature that was disabled by a quirk.
> 
> A different approach is needed for hardware features that do not have an
> X86_FEATURE_* flag.
> 
> Update the parse loop of the "rdt=" boot option with a call to intel_aet_option()
> to handles "perf" and "energy" options. Prefixing an option with "!" force

"to handles" -> "to handle"? ... although ... "Update the parse loop ... with a call to
intel_aet_option()" just describes what can be seen from the patch. Could this instead
describe on higher level what this code accomplishes? Consider, for example:

	Update parsing of the "rdt=" boot parameter to call the telemetry driver       
	directly to handle new "perf" and "energy" options that controls activation
	of telemetry monitoring of the named type. By itself a "perf" or "energy" option
	controls the forced enabling or disabling (with ! prefix) of all event groups of
	the named type. A ":guid" suffix allows for fine grain control per event group. 

> disables a feature. A ":guid" suffix allows for fine grain control per-guid.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  .../admin-guide/kernel-parameters.txt         |  7 +++-
>  arch/x86/kernel/cpu/resctrl/internal.h        |  2 ++
>  arch/x86/kernel/cpu/resctrl/core.c            |  2 ++
>  arch/x86/kernel/cpu/resctrl/intel_aet.c       | 34 +++++++++++++++++++
>  4 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 2b465eab41a1..cc9d2800abeb 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6217,9 +6217,14 @@
>  	rdt=		[HW,X86,RDT]
>  			Turn on/off individual RDT features. List is:
>  			cmt, mbmtotal, mbmlocal, l3cat, l3cdp, l2cat, l2cdp,
> -			mba, smba, bmec, abmc, sdciae.
> +			mba, smba, bmec, abmc, sdciae, energy[:guid],
> +			perf[:guid].
>  			E.g. to turn on cmt and turn off mba use:
>  				rdt=cmt,!mba
> +			To turn off all energy telemetry monitoring and ensure that
> +			perf telemetry monitoring associated with guid 0x12345
> +			is enabled use:
> +				rdt=!energy,perf:0x12345
>  
>  	reboot=		[KNL]
>  			Format (x86 or x86_64):
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 3b228b241fb2..df09091f7c6c 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -236,6 +236,7 @@ void __exit intel_aet_exit(void);
>  int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64 *val);
>  void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
>  				struct list_head *add_pos);
> +bool intel_aet_option(bool force_off, char *tok);
>  #else
>  static inline bool intel_aet_get_events(void) { return false; }
>  static inline void __exit intel_aet_exit(void) { }
> @@ -246,6 +247,7 @@ static inline int intel_aet_read_event(int domid, u32 rmid, void *arch_priv, u64
>  
>  static inline void intel_aet_mon_domain_setup(int cpu, int id, struct rdt_resource *r,
>  					      struct list_head *add_pos) { }
> +static inline bool intel_aet_option(bool force_off, char *tok) { return false; }
>  #endif
>  
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
> index 283d653002a2..960974ffa866 100644
> --- a/arch/x86/kernel/cpu/resctrl/core.c
> +++ b/arch/x86/kernel/cpu/resctrl/core.c
> @@ -820,6 +820,8 @@ static int __init set_rdt_options(char *str)
>  		force_off = *tok == '!';
>  		if (force_off)
>  			tok++;
> +		if (intel_aet_option(force_off, tok))
> +			continue;
>  		for (o = rdt_options; o < &rdt_options[NUM_RDT_OPTIONS]; o++) {
>  			if (strcmp(tok, o->name) == 0) {
>  				if (force_off)
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 8fcd72fca81f..fec4bb781f82 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -59,6 +59,10 @@ struct pmt_event {
>   *			data for all telemetry regions of type @pfname.
>   *			Valid if the system supports the event group,
>   *			NULL otherwise.
> + * @force_off:		True when "rdt" command line disables this @guid
> + *			or architecture code disables this @guid.

"disables this @guid" is written twice. Even so, is the scope not event group? That is, would
it not be more accurate to say:
		True when "rdt" command line or architecture disables this event group.

> + * @force_on:		True when "rdt" command line overrides disable of
> + *			this @guid.

		"True when "rdt" command line overrides disable of this event group."?

>   * @guid:		Unique number per XML description file.
>   * @mmio_size:		Number of bytes of MMIO registers for this group.
>   * @num_events:		Number of events in this group.
> @@ -68,6 +72,7 @@ struct event_group {
>  	/* Data fields for additional structures to manage this group. */
>  	const char			*pfname;
>  	struct pmt_feature_group	*pfg;
> +	bool				force_off, force_on;
>  
>  	/* Remaining fields initialized from XML file. */
>  	u32				guid;
> @@ -122,6 +127,32 @@ static struct event_group *known_event_groups[] = {
>  	     _peg < &known_event_groups[ARRAY_SIZE(known_event_groups)];	\
>  	     _peg++)
>  
> +bool intel_aet_option(bool force_off, char *tok)
> +{
> +	struct event_group **peg;
> +	bool ret = false;
> +	u32 guid = 0;
> +	char *name;
> +

Could you please add a NULL check for tok? 

> +	name = strsep(&tok, ":");
> +	if (tok && kstrtou32(tok, 16, &guid))
> +		return false;
> +
> +	for_each_event_group(peg) {
> +		if (strcmp(name, (*peg)->pfname))
> +			continue;
> +		if (guid && (*peg)->guid != guid)
> +			continue;
> +		if (force_off)
> +			(*peg)->force_off = true;
> +		else
> +			(*peg)->force_on = true;
> +		ret = true;
> +	}
> +
> +	return ret;
> +}
> +
>  /*
>   * Clear the address field of regions that did not pass the checks in
>   * skip_telem_region() so they will not be used by intel_aet_read_event().
> @@ -173,6 +204,9 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_PERF_PKG].r_resctrl;
>  	int skipped_events = 0;
>  
> +	if (e->force_off)
> +		return false;
> +
>  	if (!group_has_usable_regions(e, p))
>  		return false;
>  

Reinette