[PATCH v8 22/32] x86/resctrl: Read telemetry events

Tony Luck posted 32 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v8 22/32] x86/resctrl: Read telemetry events
Posted by Tony Luck 1 month, 3 weeks ago
Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.
resctrl enables events with resctrl_enable_mon_event() passing a pointer
to the pmt_event structure for the event within the struct event_group.
The file system stores it in mon_evt::arch_priv.

Add a check to resctrl_arch_rmid_read() for resource id
RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
passing the enum resctrl_event_id for the event and the arch_priv
pointer that was supplied when the event was enabled.

There may be multiple aggregators tracking each package, so scan all of
them and add up all counters.

Resctrl now uses readq() so depends on X86_64. Update Kconfig.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/internal.h  |  7 ++++
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 44 +++++++++++++++++++++++++
 arch/x86/kernel/cpu/resctrl/monitor.c   |  3 ++
 arch/x86/Kconfig                        |  2 +-
 4 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 710493ec6548..b2f0769f63f6 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -172,9 +172,16 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
 bool intel_aet_get_events(void);
 void __exit intel_aet_exit(void);
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+			 void *arch_priv, u64 *val);
 #else
 static inline bool intel_aet_get_events(void) { return false; }
 static inline void __exit intel_aet_exit(void) { }
+static inline int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+				       void *arch_priv, u64 *val)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index dbd317e99ee0..5d49ed446b94 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -14,6 +14,7 @@
 #include <linux/cleanup.h>
 #include <linux/cpu.h>
 #include <linux/intel_vsec.h>
+#include <linux/io.h>
 #include <linux/resctrl.h>
 #include <linux/slab.h>
 
@@ -211,6 +212,9 @@ static int discover_events(struct event_group *e, struct pmt_feature_group *p)
 
 	list_add(&e->list, &active_event_groups);
 
+	for (int i = 0; i < e->num_events; i++)
+		resctrl_enable_mon_event(e->evts[i].id, true, e->evts[i].bin_bits, &e->evts[i]);
+
 	return 0;
 }
 
@@ -278,3 +282,43 @@ void __exit intel_aet_exit(void)
 		list_del(&evg->list);
 	}
 }
+
+#define DATA_VALID	BIT_ULL(63)
+#define DATA_BITS	GENMASK_ULL(62, 0)
+
+/*
+ * Read counter for an event on a domain (summing all aggregators
+ * on the domain).
+ */
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
+			 void *arch_priv, u64 *val)
+{
+	struct pmt_event *pevt = arch_priv;
+	struct pkg_mmio_info *mmi;
+	struct event_group *e;
+	bool valid = false;
+	u64 evtcount;
+	void *pevt0;
+	int idx;
+
+	pevt0 = pevt - pevt->idx;
+	e = container_of(pevt0, struct event_group, evts);
+	idx = rmid * e->num_events;
+	idx += pevt->idx;
+	mmi = e->pkginfo[domid];
+
+	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
+		pr_warn_once("MMIO index %d out of range\n", idx);
+		return -EIO;
+	}
+
+	for (int i = 0; i < mmi->num_regions; i++) {
+		evtcount = readq(mmi->addrs[i] + idx * sizeof(u64));
+		if (!(evtcount & DATA_VALID))
+			continue;
+		*val += evtcount & DATA_BITS;
+		valid = true;
+	}
+
+	return valid ? 0 : -EINVAL;
+}
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 185b203f6321..51d7d99336c6 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -232,6 +232,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
 
 	resctrl_arch_rmid_read_context_check();
 
+	if (r->rid == RDT_RESOURCE_PERF_PKG)
+		return intel_aet_read_event(hdr->id, rmid, eventid, arch_priv, val);
+
 	if (r->rid != RDT_RESOURCE_L3)
 		return -EINVAL;
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 56f0ff94c430..32beb66f0a92 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -527,7 +527,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry" if INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
-	depends on X86_CPU_RESCTRL && CPU_SUP_INTEL
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL
 	help
 	  Enable per-RMID telemetry events in resctrl
 
-- 
2.50.1
Re: [PATCH v8 22/32] x86/resctrl: Read telemetry events
Posted by Reinette Chatre 1 month, 3 weeks ago
Hi Tony,

Subject: "x86/resctrl: Enable and read telemetry events"?

On 8/11/25 11:16 AM, Tony Luck wrote:
> Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.

Above is the context but does not actually describe what this patch builds on.
Below is something to start working from:

	The active event groups are known after matching the known event groups
	with the system's telemetry events enumerated by the INTEL_PMT_TELEMETRY
	subsystem.

	Enable the active events in resctrl filesystem to make them available to
	user space. Pass a pointer to the pmt_event structure of the event within
	the struct event_group that resctrl stores in mon_evt::arch_priv. resctrl
	passes this pointer back when asking to read the event data which enables
	the data to be found in MMIO.

	...

	
> resctrl enables events with resctrl_enable_mon_event() passing a pointer
> to the pmt_event structure for the event within the struct event_group.
> The file system stores it in mon_evt::arch_priv.
> 
> Add a check to resctrl_arch_rmid_read() for resource id
> RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
> passing the enum resctrl_event_id for the event and the arch_priv
> pointer that was supplied when the event was enabled.
> 
> There may be multiple aggregators tracking each package, so scan all of
> them and add up all counters.

As mentioned below it is possible for some aggregators to not return valid data
and this is treated as a success. User will not be aware when this happens.
What is likelihood of this happening? Should user be made aware when this
happens?

> 
> Resctrl now uses readq() so depends on X86_64. Update Kconfig.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---

...

>  
> @@ -211,6 +212,9 @@ static int discover_events(struct event_group *e, struct pmt_feature_group *p)

Recurring feedback to this series is that in the beginning of the series
discover_events() gets function comments the describes the "steps" of discover ... but
later in series (like in this patch) when discover_events() is updated these comments/steps
are no longer updated.
Looking at the final work discover_events() thus has function comments with only two steps
that document part of what it does. 

I actually find the comments within the function that describes what an associated snippet does more
helpful than lumping everything at top of function in a list. For example, below change can get a 
comment like:
	/*
	 * Enable all events of active event group. Pass pointer to event's struct pmt_event
	 * as private data that resctrl fs includes when it requests to read the counter.
 	 */

>  
>  	list_add(&e->list, &active_event_groups);
>  
> +	for (int i = 0; i < e->num_events; i++)
> +		resctrl_enable_mon_event(e->evts[i].id, true, e->evts[i].bin_bits, &e->evts[i]);
> +
>  	return 0;
>  }
>  
> @@ -278,3 +282,43 @@ void __exit intel_aet_exit(void)
>  		list_del(&evg->list);
>  	}
>  }
> +
> +#define DATA_VALID	BIT_ULL(63)
> +#define DATA_BITS	GENMASK_ULL(62, 0)
> +
> +/*
> + * Read counter for an event on a domain (summing all aggregators
> + * on the domain).

Function comment can highlight that it is intentional that as long as
at least one aggregator returns valid data the read is considered a success
with the possibility that partial data may be returned to user space without
user being aware.

> + */
> +int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
> +			 void *arch_priv, u64 *val)
> +{
> +	struct pmt_event *pevt = arch_priv;
> +	struct pkg_mmio_info *mmi;
> +	struct event_group *e;
> +	bool valid = false;
> +	u64 evtcount;
> +	void *pevt0;
> +	int idx;
> +
> +	pevt0 = pevt - pevt->idx;
> +	e = container_of(pevt0, struct event_group, evts);
> +	idx = rmid * e->num_events;
> +	idx += pevt->idx;
> +	mmi = e->pkginfo[domid];
> +
> +	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
> +		pr_warn_once("MMIO index %d out of range\n", idx);
> +		return -EIO;
> +	}
> +
> +	for (int i = 0; i < mmi->num_regions; i++) {
> +		evtcount = readq(mmi->addrs[i] + idx * sizeof(u64));
> +		if (!(evtcount & DATA_VALID))
> +			continue;
> +		*val += evtcount & DATA_BITS;
> +		valid = true;
> +	}
> +
> +	return valid ? 0 : -EINVAL;
> +}

Reinette