[PATCH v5 01/29] x86,fs/resctrl: Consolidate monitor event descriptions

Tony Luck posted 29 patches 6 months, 3 weeks ago
[PATCH v5 01/29] x86,fs/resctrl: Consolidate monitor event descriptions
Posted by Tony Luck 6 months, 3 weeks ago
There are currently only three monitor events, all associated with
the RDT_RESOURCE_L3 resource. Growing support for additional events
will be easier with some restructuring to have a single point in
file system code where all attributes of all events are defined.

Place all event descriptions into an array mon_event_all[]. Doing
this has the beneficial side effect of removing the need for
rdt_resource::evt_list.

Drop the code that builds evt_list and change the two places where
the list is scanned to scan mon_event_all[] instead.

Architecture code now informs file system code which events are
available  with resctrl_enable_mon_event().

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/resctrl.h            |  4 +-
 fs/resctrl/internal.h              | 10 +++--
 arch/x86/kernel/cpu/resctrl/core.c | 12 ++++--
 fs/resctrl/monitor.c               | 63 +++++++++++++++---------------
 fs/resctrl/rdtgroup.c              | 11 +++---
 5 files changed, 55 insertions(+), 45 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 9ba771f2ddea..014cc6fe4a9b 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -269,7 +269,6 @@ enum resctrl_schema_fmt {
  * @mon_domains:	RCU list of all monitor domains for this resource
  * @name:		Name to use in "schemata" file.
  * @schema_fmt:		Which format string and parser is used for this schema.
- * @evt_list:		List of monitoring events
  * @mbm_cfg_mask:	Bandwidth sources that can be tracked when bandwidth
  *			monitoring events can be configured.
  * @cdp_capable:	Is the CDP feature available on this resource
@@ -287,7 +286,6 @@ struct rdt_resource {
 	struct list_head	mon_domains;
 	char			*name;
 	enum resctrl_schema_fmt	schema_fmt;
-	struct list_head	evt_list;
 	unsigned int		mbm_cfg_mask;
 	bool			cdp_capable;
 };
@@ -372,6 +370,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
 u32 resctrl_arch_system_num_rmid_idx(void);
 int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
 
+void resctrl_enable_mon_event(enum resctrl_event_id evtid);
+
 bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
 
 /**
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 9a8cf6f11151..94e635656261 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -52,19 +52,23 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
 }
 
 /**
- * struct mon_evt - Entry in the event list of a resource
+ * struct mon_evt - Description of a monitor event
  * @evtid:		event id
+ * @rid:		index of the resource for this event
  * @name:		name of the event
  * @configurable:	true if the event is configurable
- * @list:		entry in &rdt_resource->evt_list
+ * @enabled:		true if the event is enabled
  */
 struct mon_evt {
 	enum resctrl_event_id	evtid;
+	enum resctrl_res_level	rid;
 	char			*name;
 	bool			configurable;
-	struct list_head	list;
+	bool			enabled;
 };
 
+extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
+
 /**
  * struct mon_data - Monitoring details for each event file.
  * @list:            Member of the global @mon_data_kn_priv_list list.
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 224bed28f341..3d74c2d3dcea 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -861,12 +861,18 @@ static __init bool get_rdt_mon_resources(void)
 {
 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
 
-	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC))
+	if (rdt_cpu_has(X86_FEATURE_CQM_OCCUP_LLC)) {
+		resctrl_enable_mon_event(QOS_L3_OCCUP_EVENT_ID);
 		rdt_mon_features |= (1 << QOS_L3_OCCUP_EVENT_ID);
-	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL))
+	}
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
+		resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
 		rdt_mon_features |= (1 << QOS_L3_MBM_TOTAL_EVENT_ID);
-	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL))
+	}
+	if (rdt_cpu_has(X86_FEATURE_CQM_MBM_LOCAL)) {
+		resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
 		rdt_mon_features |= (1 << QOS_L3_MBM_LOCAL_EVENT_ID);
+	}
 
 	if (!rdt_mon_features)
 		return false;
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index bde2801289d3..31c81d703ff4 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -842,38 +842,39 @@ static void dom_data_exit(struct rdt_resource *r)
 	mutex_unlock(&rdtgroup_mutex);
 }
 
-static struct mon_evt llc_occupancy_event = {
-	.name		= "llc_occupancy",
-	.evtid		= QOS_L3_OCCUP_EVENT_ID,
-};
-
-static struct mon_evt mbm_total_event = {
-	.name		= "mbm_total_bytes",
-	.evtid		= QOS_L3_MBM_TOTAL_EVENT_ID,
-};
-
-static struct mon_evt mbm_local_event = {
-	.name		= "mbm_local_bytes",
-	.evtid		= QOS_L3_MBM_LOCAL_EVENT_ID,
-};
-
 /*
- * Initialize the event list for the resource.
- *
- * Note that MBM events are also part of RDT_RESOURCE_L3 resource
- * because as per the SDM the total and local memory bandwidth
- * are enumerated as part of L3 monitoring.
+ * All available events. Architecture code marks the ones that
+ * are supported by a system using resctrl_enable_mon_event()
+ * to set .enabled.
  */
-static void l3_mon_evt_init(struct rdt_resource *r)
+struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
+	[QOS_L3_OCCUP_EVENT_ID] = {
+		.name	= "llc_occupancy",
+		.evtid	= QOS_L3_OCCUP_EVENT_ID,
+		.rid	= RDT_RESOURCE_L3,
+	},
+	[QOS_L3_MBM_TOTAL_EVENT_ID] = {
+		.name	= "mbm_total_bytes",
+		.evtid	= QOS_L3_MBM_TOTAL_EVENT_ID,
+		.rid	= RDT_RESOURCE_L3,
+	},
+	[QOS_L3_MBM_LOCAL_EVENT_ID] = {
+		.name	= "mbm_local_bytes",
+		.evtid	= QOS_L3_MBM_LOCAL_EVENT_ID,
+		.rid	= RDT_RESOURCE_L3,
+	},
+};
+
+void resctrl_enable_mon_event(enum resctrl_event_id evtid)
 {
-	INIT_LIST_HEAD(&r->evt_list);
+	if (WARN_ON_ONCE(evtid >= QOS_NUM_EVENTS))
+		return;
+	if (mon_event_all[evtid].enabled) {
+		pr_warn("Duplicate enable for event %d\n", evtid);
+		return;
+	}
 
-	if (resctrl_arch_is_llc_occupancy_enabled())
-		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
-	if (resctrl_arch_is_mbm_total_enabled())
-		list_add_tail(&mbm_total_event.list, &r->evt_list);
-	if (resctrl_arch_is_mbm_local_enabled())
-		list_add_tail(&mbm_local_event.list, &r->evt_list);
+	mon_event_all[evtid].enabled = true;
 }
 
 /**
@@ -900,15 +901,13 @@ int resctrl_mon_resource_init(void)
 	if (ret)
 		return ret;
 
-	l3_mon_evt_init(r);
-
 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
-		mbm_total_event.configurable = true;
+		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
 		resctrl_file_fflags_init("mbm_total_bytes_config",
 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
 	}
 	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
-		mbm_local_event.configurable = true;
+		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
 		resctrl_file_fflags_init("mbm_local_bytes_config",
 					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
 	}
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index cc37f58b47dd..69e0d40c4449 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -1150,7 +1150,9 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
 	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
 	struct mon_evt *mevt;
 
-	list_for_each_entry(mevt, &r->evt_list, list) {
+	for (mevt = &mon_event_all[0]; mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) {
+		if (mevt->rid != r->rid || !mevt->enabled)
+			continue;
 		seq_printf(seq, "%s\n", mevt->name);
 		if (mevt->configurable)
 			seq_printf(seq, "%s_config\n", mevt->name);
@@ -3055,10 +3057,9 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
 	struct mon_evt *mevt;
 	int ret, domid;
 
-	if (WARN_ON(list_empty(&r->evt_list)))
-		return -EPERM;
-
-	list_for_each_entry(mevt, &r->evt_list, list) {
+	for (mevt = &mon_event_all[0]; mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) {
+		if (mevt->rid != r->rid || !mevt->enabled)
+			continue;
 		domid = do_sum ? d->ci->id : d->hdr.id;
 		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
 		if (WARN_ON_ONCE(!priv))
-- 
2.49.0
Re: [PATCH v5 01/29] x86,fs/resctrl: Consolidate monitor event descriptions
Posted by Reinette Chatre 6 months, 1 week ago
Hi Tony,

On 5/21/25 3:50 PM, Tony Luck wrote:
> There are currently only three monitor events, all associated with
> the RDT_RESOURCE_L3 resource. Growing support for additional events
> will be easier with some restructuring to have a single point in
> file system code where all attributes of all events are defined.
> 
> Place all event descriptions into an array mon_event_all[]. Doing
> this has the beneficial side effect of removing the need for
> rdt_resource::evt_list.
> 
> Drop the code that builds evt_list and change the two places where
> the list is scanned to scan mon_event_all[] instead.
> 
> Architecture code now informs file system code which events are
> available  with resctrl_enable_mon_event().

nit: extra space above

> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---

...

> @@ -372,6 +370,8 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r);
>  u32 resctrl_arch_system_num_rmid_idx(void);
>  int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid);
>  
> +void resctrl_enable_mon_event(enum resctrl_event_id evtid);
> +
>  bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);

nit: When code is consistent in name use it is easier to read. 
Above there is already resctrl_arch_is_evt_configurable() that uses "evt"
as parameter name so naming the new parameter "evt" instead of "evtid"
will be much easier on the eye to make clear that this is the "same thing".
Also later, when resctrl_is_mbm_event() is moved it will be clean to have
it also use "evt" as parameter name and not end up with three different
"evtid", "evt",  and "e" for these related functions.

>  
>  /**
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index 9a8cf6f11151..94e635656261 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -52,19 +52,23 @@ static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
>  }
>  
>  /**
> - * struct mon_evt - Entry in the event list of a resource
> + * struct mon_evt - Description of a monitor event

nit: "Description" -> "Properties"?

>   * @evtid:		event id
> + * @rid:		index of the resource for this event
>   * @name:		name of the event
>   * @configurable:	true if the event is configurable
> - * @list:		entry in &rdt_resource->evt_list
> + * @enabled:		true if the event is enabled
>   */
>  struct mon_evt {
>  	enum resctrl_event_id	evtid;
> +	enum resctrl_res_level	rid;
>  	char			*name;
>  	bool			configurable;
> -	struct list_head	list;
> +	bool			enabled;
>  };
>  

...

> -static void l3_mon_evt_init(struct rdt_resource *r)
> +struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
> +	[QOS_L3_OCCUP_EVENT_ID] = {
> +		.name	= "llc_occupancy",
> +		.evtid	= QOS_L3_OCCUP_EVENT_ID,
> +		.rid	= RDT_RESOURCE_L3,
> +	},
> +	[QOS_L3_MBM_TOTAL_EVENT_ID] = {
> +		.name	= "mbm_total_bytes",
> +		.evtid	= QOS_L3_MBM_TOTAL_EVENT_ID,
> +		.rid	= RDT_RESOURCE_L3,
> +	},
> +	[QOS_L3_MBM_LOCAL_EVENT_ID] = {
> +		.name	= "mbm_local_bytes",
> +		.evtid	= QOS_L3_MBM_LOCAL_EVENT_ID,
> +		.rid	= RDT_RESOURCE_L3,
> +	},
> +};
> +
> +void resctrl_enable_mon_event(enum resctrl_event_id evtid)
>  {
> -	INIT_LIST_HEAD(&r->evt_list);
> +	if (WARN_ON_ONCE(evtid >= QOS_NUM_EVENTS))

If the goal is range checking then there should be a lower limit also.
With the event IDs starting at 1 it could be useful to ensure that
the range check considers that. To help with this I think it will be
useful to introduce a new enum value, for example QOS_FIRST_EVENT,
that is the same value as QOS_L3_OCCUP_EVENT_ID, and use it in
range checking (more below).

> +		return;
> +	if (mon_event_all[evtid].enabled) {
> +		pr_warn("Duplicate enable for event %d\n", evtid);
> +		return;
> +	}
>  
> -	if (resctrl_arch_is_llc_occupancy_enabled())
> -		list_add_tail(&llc_occupancy_event.list, &r->evt_list);
> -	if (resctrl_arch_is_mbm_total_enabled())
> -		list_add_tail(&mbm_total_event.list, &r->evt_list);
> -	if (resctrl_arch_is_mbm_local_enabled())
> -		list_add_tail(&mbm_local_event.list, &r->evt_list);
> +	mon_event_all[evtid].enabled = true;
>  }
>  
>  /**
> @@ -900,15 +901,13 @@ int resctrl_mon_resource_init(void)
>  	if (ret)
>  		return ret;
>  
> -	l3_mon_evt_init(r);
> -
>  	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
> -		mbm_total_event.configurable = true;
> +		mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
>  		resctrl_file_fflags_init("mbm_total_bytes_config",
>  					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
>  	}
>  	if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
> -		mbm_local_event.configurable = true;
> +		mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
>  		resctrl_file_fflags_init("mbm_local_bytes_config",
>  					 RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
>  	}
> diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
> index cc37f58b47dd..69e0d40c4449 100644
> --- a/fs/resctrl/rdtgroup.c
> +++ b/fs/resctrl/rdtgroup.c
> @@ -1150,7 +1150,9 @@ static int rdt_mon_features_show(struct kernfs_open_file *of,
>  	struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
>  	struct mon_evt *mevt;
>  
> -	list_for_each_entry(mevt, &r->evt_list, list) {
> +	for (mevt = &mon_event_all[0]; mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) {

This looks risky to have the pattern start with mon_event_all[0] when that
array entry is not fully initialized. With a first array entry of all zeroes
a blind copy of this pattern may trip on a false positive with 0 being a
valid resource ID. Here also I think it will help to have a new enum
value of, for example, QOS_FIRST_EVENT, and have the iteration start with it.
This could also be simplified with a helper, for example for_each_mon_evt()
or for_each_mon_event(), that can be used instead of open coding this. If you
do decide to do this it may be useful to rename for_each_mbm_event() to, for example,
for_each_mbm_event_id() to better reflect how that helper is different.


> +		if (mevt->rid != r->rid || !mevt->enabled)
> +			continue;
>  		seq_printf(seq, "%s\n", mevt->name);
>  		if (mevt->configurable)
>  			seq_printf(seq, "%s_config\n", mevt->name);
> @@ -3055,10 +3057,9 @@ static int mon_add_all_files(struct kernfs_node *kn, struct rdt_mon_domain *d,
>  	struct mon_evt *mevt;
>  	int ret, domid;
>  
> -	if (WARN_ON(list_empty(&r->evt_list)))
> -		return -EPERM;
> -
> -	list_for_each_entry(mevt, &r->evt_list, list) {
> +	for (mevt = &mon_event_all[0]; mevt < &mon_event_all[QOS_NUM_EVENTS]; mevt++) {
> +		if (mevt->rid != r->rid || !mevt->enabled)
> +			continue;
>  		domid = do_sum ? d->ci->id : d->hdr.id;
>  		priv = mon_get_kn_priv(r->rid, domid, mevt, do_sum);
>  		if (WARN_ON_ONCE(!priv))

Reinette
Re: [PATCH v5 01/29] x86,fs/resctrl: Consolidate monitor event descriptions
Posted by Luck, Tony 6 months, 1 week ago
On Tue, Jun 03, 2025 at 08:25:56PM -0700, Reinette Chatre wrote:
> Hi Tony,
> > +void resctrl_enable_mon_event(enum resctrl_event_id evtid);
> > +
> >  bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
> 
> nit: When code is consistent in name use it is easier to read. 
> Above there is already resctrl_arch_is_evt_configurable() that uses "evt"
> as parameter name so naming the new parameter "evt" instead of "evtid"
> will be much easier on the eye to make clear that this is the "same thing".
> Also later, when resctrl_is_mbm_event() is moved it will be clean to have
> it also use "evt" as parameter name and not end up with three different
> "evtid", "evt",  and "e" for these related functions.

Should I also clean up existing muddled naming?  Upstream has the
following names for parameters and local variables of type enum
resctrl_event_id (counts for number of occurrences of each):

      6 eventid
      2 evt
      1 evt_id
      3 evtid
      2 mba_mbps_default_event
      1 mba_mbps_event

It seems that "eventid" is the most popular of existing uses.

Also seems the most descriptive.

Perhaps "mevt" would be a good standard choice for "struct mon_evt *mevt"?
Upstream uses this three times, but I add some extra using "*evt".

-Tony
Re: [PATCH v5 01/29] x86,fs/resctrl: Consolidate monitor event descriptions
Posted by Reinette Chatre 6 months, 1 week ago
Hi Tony,

On 6/4/25 9:33 AM, Luck, Tony wrote:
> On Tue, Jun 03, 2025 at 08:25:56PM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>> +void resctrl_enable_mon_event(enum resctrl_event_id evtid);
>>> +
>>>  bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt);
>>
>> nit: When code is consistent in name use it is easier to read. 
>> Above there is already resctrl_arch_is_evt_configurable() that uses "evt"
>> as parameter name so naming the new parameter "evt" instead of "evtid"
>> will be much easier on the eye to make clear that this is the "same thing".
>> Also later, when resctrl_is_mbm_event() is moved it will be clean to have
>> it also use "evt" as parameter name and not end up with three different
>> "evtid", "evt",  and "e" for these related functions.
> 
> Should I also clean up existing muddled naming?  

I think that matching code in same area and not making things more messy is
in the scope of this series. Cleaning up resctrl's variable names is not related
to this work. If you instead find that doing this cleanup simplifies your
contribution then you are welcome to add such changes.

Reinette