[PATCH v12 13/31] x86,fs/resctrl: Add and initialize rdt_resource for package scope monitor

Tony Luck posted 31 patches 2 months ago
There is a newer version of this series
[PATCH v12 13/31] x86,fs/resctrl: Add and initialize rdt_resource for package scope monitor
Posted by Tony Luck 2 months ago
Add a new PERF_PKG resource and introduce package level scope for
monitoring telemetry events so that CPU hot plug notifiers can build
domains at the package granularity.

Use the physical package ID available via topology_physical_package_id()
to identify the monitoring domains with package level scope. This enables
user space to use:
 /sys/devices/system/cpu/cpuX/topology/physical_package_id
to identify the monitoring domain a CPU is associated with.

Now there is a new monitoring resource, add a WARN to places that
implicitly assume RDT_RESOURCE_L3.

Update some kerneldoc to point out L3 dependencies.

Signed-off-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>
---
 include/linux/resctrl.h            |  2 ++
 fs/resctrl/internal.h              |  6 ++++--
 arch/x86/kernel/cpu/resctrl/core.c | 10 ++++++++++
 fs/resctrl/ctrlmondata.c           |  3 +++
 fs/resctrl/monitor.c               |  3 +++
 fs/resctrl/rdtgroup.c              |  5 ++++-
 6 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 6350064ac8be..ff67224b80c8 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -53,6 +53,7 @@ enum resctrl_res_level {
 	RDT_RESOURCE_L2,
 	RDT_RESOURCE_MBA,
 	RDT_RESOURCE_SMBA,
+	RDT_RESOURCE_PERF_PKG,
 
 	/* Must be the last */
 	RDT_NUM_RESOURCES,
@@ -267,6 +268,7 @@ enum resctrl_scope {
 	RESCTRL_L2_CACHE = 2,
 	RESCTRL_L3_CACHE = 3,
 	RESCTRL_L3_NODE,
+	RESCTRL_PACKAGE,
 };
 
 /**
diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index f5189b6771a0..39bdaf45fa2a 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -92,8 +92,8 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
  * @list:            Member of the global @mon_data_kn_priv_list list.
  * @rid:             Resource id associated with the event file.
  * @evt:             Event structure associated with the event file.
- * @sum:             Set when event must be summed across multiple
- *                   domains.
+ * @sum:             Set for RDT_RESOURCE_L3 when event must be summed
+ *                   across multiple domains.
  * @domid:           When @sum is zero this is the domain to which
  *                   the event file belongs. When @sum is one this
  *                   is the id of the L3 cache that all domains to be
@@ -255,6 +255,8 @@ struct rdtgroup {
 
 #define RFTYPE_ASSIGN_CONFIG		BIT(11)
 
+#define RFTYPE_RES_PERF_PKG		BIT(11)
+
 #define RFTYPE_CTRL_INFO		(RFTYPE_INFO | RFTYPE_CTRL)
 
 #define RFTYPE_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 5b6af3b48de3..57a328fdde59 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -100,6 +100,14 @@ struct rdt_hw_resource rdt_resources_all[RDT_NUM_RESOURCES] = {
 			.schema_fmt		= RESCTRL_SCHEMA_RANGE,
 		},
 	},
+	[RDT_RESOURCE_PERF_PKG] =
+	{
+		.r_resctrl = {
+			.name			= "PERF_PKG",
+			.mon_scope		= RESCTRL_PACKAGE,
+			.mon_domains		= mon_domain_init(RDT_RESOURCE_PERF_PKG),
+		},
+	},
 };
 
 u32 resctrl_arch_system_num_rmid_idx(void)
@@ -435,6 +443,8 @@ static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope)
 		return get_cpu_cacheinfo_id(cpu, scope);
 	case RESCTRL_L3_NODE:
 		return cpu_to_node(cpu);
+	case RESCTRL_PACKAGE:
+		return topology_physical_package_id(cpu);
 	default:
 		break;
 	}
diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
index ae43e09fa5e5..d681b71e6eca 100644
--- a/fs/resctrl/ctrlmondata.c
+++ b/fs/resctrl/ctrlmondata.c
@@ -712,6 +712,9 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
 	if (md->sum) {
 		struct rdt_l3_mon_domain *d;
 
+		if (WARN_ON_ONCE(resid != RDT_RESOURCE_L3))
+			return -EINVAL;
+
 		/*
 		 * This file requires summing across all domains that share
 		 * the L3 cache id that was provided in the "domid" field of the
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 255d4bad24cb..4c984dc6784e 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -501,6 +501,9 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	 * all domains fail for any reason.
 	 */
 	ret = -EINVAL;
+	if (WARN_ON_ONCE(rr->r->rid != RDT_RESOURCE_L3))
+		return ret;
+
 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
 		if (d->ci_id != rr->ci->id)
 			continue;
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index f9bfbdeecf94..c9d2cc1fd8bf 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -2330,6 +2330,8 @@ static unsigned long fflags_from_resource(struct rdt_resource *r)
 	case RDT_RESOURCE_MBA:
 	case RDT_RESOURCE_SMBA:
 		return RFTYPE_RES_MB;
+	case RDT_RESOURCE_PERF_PKG:
+		return RFTYPE_RES_PERF_PKG;
 	}
 
 	return WARN_ON_ONCE(1);
@@ -3028,7 +3030,8 @@ static void rmdir_all_sub(void)
  * @rid:    The resource id for the event file being created.
  * @domid:  The domain id for the event file being created.
  * @mevt:   The type of event file being created.
- * @do_sum: Whether SNC summing monitors are being created.
+ * @do_sum: Whether SNC summing monitors are being created. Only set
+ *	    when @rid == RDT_RESOURCE_L3.
  */
 static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
 					struct mon_evt *mevt,
-- 
2.51.0
Re: [PATCH v12 13/31] x86,fs/resctrl: Add and initialize rdt_resource for package scope monitor
Posted by Reinette Chatre 1 month, 3 weeks ago
Hi Tony,

On 10/13/25 3:33 PM, Tony Luck wrote:
> Add a new PERF_PKG resource and introduce package level scope for
> monitoring telemetry events so that CPU hot plug notifiers can build
> domains at the package granularity.
> 
> Use the physical package ID available via topology_physical_package_id()
> to identify the monitoring domains with package level scope. This enables
> user space to use:
>  /sys/devices/system/cpu/cpuX/topology/physical_package_id
> to identify the monitoring domain a CPU is associated with.
> 
> Now there is a new monitoring resource, add a WARN to places that
> implicitly assume RDT_RESOURCE_L3.
> 
> Update some kerneldoc to point out L3 dependencies.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com>

I do not believe that the changes added since my RB tag fit with this patch.

As mentioned in patch #5 I find that (all but one of) the new changes added
to this patch can be merged with those stray hunks found in patch #5 to form one
logical change. 

Below is a high level of what I have in mind presented as a draft changelog
for that new patch:

	The feature to sum event data across multiple domains supports systems
	with Sub-NUMA Cluster (SNC) mode enabled. The top-level monitoring files
	in each "mon_L3_XX" directory provide the sum of data across all SNC
	nodes sharing an L3 cache instance while the "mon_sub_L3_YY" sub-directories
	provide the event data of the individual nodes.

	SNC is only associated with the L3 resource and domains and as a result
	the flow handling the sum of event data implicitly assumes it is
	working with the L3 resource and domains.

	Reading of telemetry events do not require to sum event	data so this
	feature can remain dedicated to SNC and keep the implicit assumption
	of working with the L3 resource and domains.

	Add a WARN to where the implicit assumption of working with the	L3 resource
	is made and add comments on how the structure controlling the event sum
	feature is used.

> ---
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index f5189b6771a0..39bdaf45fa2a 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -92,8 +92,8 @@ extern struct mon_evt mon_event_all[QOS_NUM_EVENTS];
>   * @list:            Member of the global @mon_data_kn_priv_list list.
>   * @rid:             Resource id associated with the event file.
>   * @evt:             Event structure associated with the event file.
> - * @sum:             Set when event must be summed across multiple
> - *                   domains.
> + * @sum:             Set for RDT_RESOURCE_L3 when event must be summed
> + *                   across multiple domains.
>   * @domid:           When @sum is zero this is the domain to which
>   *                   the event file belongs. When @sum is one this
>   *                   is the id of the L3 cache that all domains to be

above can move to the new patch

...

> diff --git a/fs/resctrl/ctrlmondata.c b/fs/resctrl/ctrlmondata.c
> index ae43e09fa5e5..d681b71e6eca 100644
> --- a/fs/resctrl/ctrlmondata.c
> +++ b/fs/resctrl/ctrlmondata.c
> @@ -712,6 +712,9 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg)
>  	if (md->sum) {
>  		struct rdt_l3_mon_domain *d;
>  
> +		if (WARN_ON_ONCE(resid != RDT_RESOURCE_L3))
> +			return -EINVAL;
> +
>  		/*
>  		 * This file requires summing across all domains that share
>  		 * the L3 cache id that was provided in the "domid" field of the

above can move to the new patch ... it can form a new hunk with the hunk from
patch #5 like:


  	if (md->sum) {
 + 		struct rdt_l3_mon_domain *d;
 + 
 +		if (WARN_ON_ONCE(resid != RDT_RESOURCE_L3))
 +			FIXME
 +
  		/*
  		 * This file requires summing across all domains that share
  		 * the L3 cache id that was provided in the "domid" field of the

I added the FIXME because this should not do a direct return here while holding
CPU hotplug lock as well as rdtgroup mutex.

> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index 255d4bad24cb..4c984dc6784e 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -501,6 +501,9 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>  	 * all domains fail for any reason.
>  	 */
>  	ret = -EINVAL;
> +	if (WARN_ON_ONCE(rr->r->rid != RDT_RESOURCE_L3))
> +		return ret;
> +

This looks redundant to me when considering the WARN added to rdtgroup_mondata_show()
... and it is removed in patch #18 anyway.

>  	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>  		if (d->ci_id != rr->ci->id)
>  			continue;

...

> @@ -3028,7 +3030,8 @@ static void rmdir_all_sub(void)
>   * @rid:    The resource id for the event file being created.
>   * @domid:  The domain id for the event file being created.
>   * @mevt:   The type of event file being created.
> - * @do_sum: Whether SNC summing monitors are being created.
> + * @do_sum: Whether SNC summing monitors are being created. Only set
> + *	    when @rid == RDT_RESOURCE_L3.
>   */
>  static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
>  					struct mon_evt *mevt,

above can move to new patch

Reinette