[PATCH v9 21/31] x86/resctrl: Read telemetry events

Tony Luck posted 31 patches 1 month ago
There is a newer version of this series
[PATCH v9 21/31] x86/resctrl: Read telemetry events
Posted by Tony Luck 1 month ago
Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.
resctrl enables events with resctrl_enable_mon_event() passing a pointer
to the pmt_event structure for the event within the struct event_group.
The file system stores it in mon_evt::arch_priv.

Clear the address field of regions that did not pass the checks in
skip_this_region() so they will not be used by intel_aet_read_event().
This is safe to do because intel_pmt_get_regions_by_feature() allocates
a new pmt_feature_group structure to return to each caller and only
makes use of the pmt_feature_group::kref field when
intel_pmt_put_feature_group() returns the structure.

Add a check to resctrl_arch_rmid_read() for resource id
RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
passing the enum resctrl_event_id for the event and the arch_priv
pointer that was supplied when the event was enabled.

There may be multiple aggregators tracking each package, so scan all of
them and add up all counters. Aggregators may return an invalid data
indication if they have received no records for a given RMID. Return
success to the user if one or more aggregators provide valid data.

Resctrl now uses readq() so depends on X86_64. Update Kconfig.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/internal.h  |  7 ++++
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 53 ++++++++++++++++++++++++-
 arch/x86/kernel/cpu/resctrl/monitor.c   |  3 ++
 arch/x86/Kconfig                        |  2 +-
 4 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 9ddfbbe5c3cf..8986071dd72a 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -172,9 +172,16 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
 bool intel_aet_get_events(void);
 void __exit intel_aet_exit(void);
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+			 void *arch_priv, u64 *val);
 #else
 static inline bool intel_aet_get_events(void) { return false; }
 static inline void __exit intel_aet_exit(void) { }
+static inline int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+				       void *arch_priv, u64 *val)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index 5c5466dc3189..9239740e9647 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -14,6 +14,7 @@
 #include <linux/cleanup.h>
 #include <linux/cpu.h>
 #include <linux/intel_vsec.h>
+#include <linux/io.h>
 #include <linux/resctrl.h>
 
 #include "internal.h"
@@ -125,8 +126,14 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
 	bool usable_events = false;
 
 	for (int i = 0; i < p->count; i++) {
-		if (skip_this_region(&p->regions[i], e))
+		if (skip_this_region(&p->regions[i], e)) {
+			/*
+			 * Clear addr so that intel_aet_read_event() will
+			 * skip this region.
+			 */
+			p->regions[i].addr = NULL;
 			continue;
+		}
 		usable_events = true;
 	}
 
@@ -204,3 +211,47 @@ void __exit intel_aet_exit(void)
 		(*peg)->pfg = NULL;
 	}
 }
+
+#define DATA_VALID	BIT_ULL(63)
+#define DATA_BITS	GENMASK_ULL(62, 0)
+
+/*
+ * Read counter for an event on a domain (summing all aggregators
+ * on the domain). If an aggregator hasn't received any data for a
+ * specific RMID, the MMIO read indicates that data is not valid.
+ * Return success if at least one aggregator has valid data.
+ */
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
+			 void *arch_priv, u64 *val)
+{
+	struct pmt_event *pevt = arch_priv;
+	struct event_group *e;
+	bool valid = false;
+	u64 evtcount;
+	void *pevt0;
+	int idx;
+
+	pevt0 = pevt - pevt->idx;
+	e = container_of(pevt0, struct event_group, evts);
+	idx = rmid * e->num_events;
+	idx += pevt->idx;
+
+	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
+		pr_warn_once("MMIO index %d out of range\n", idx);
+		return -EIO;
+	}
+
+	for (int i = 0; i < e->pfg->count; i++) {
+		if (!e->pfg->regions[i].addr)
+			continue;
+		if (e->pfg->regions[i].plat_info.package_id != domid)
+			continue;
+		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
+		if (!(evtcount & DATA_VALID))
+			continue;
+		*val += evtcount & DATA_BITS;
+		valid = true;
+	}
+
+	return valid ? 0 : -EINVAL;
+}
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 249569327e4a..0333dd85450b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -232,6 +232,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
 
 	resctrl_arch_rmid_read_context_check();
 
+	if (r->rid == RDT_RESOURCE_PERF_PKG)
+		return intel_aet_read_event(hdr->id, rmid, eventid, arch_priv, val);
+
 	if (r->rid != RDT_RESOURCE_L3)
 		return -EINVAL;
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50051fdf4659..a42f749f31cb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -527,7 +527,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry" if INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
-	depends on X86_CPU_RESCTRL && CPU_SUP_INTEL
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.50.1
Re: [PATCH v9 21/31] x86/resctrl: Read telemetry events
Posted by Ilpo Järvinen 1 month ago
On Fri, 29 Aug 2025, Tony Luck wrote:

> Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.
> resctrl enables events with resctrl_enable_mon_event() passing a pointer
> to the pmt_event structure for the event within the struct event_group.
> The file system stores it in mon_evt::arch_priv.
> 
> Clear the address field of regions that did not pass the checks in
> skip_this_region() so they will not be used by intel_aet_read_event().
> This is safe to do because intel_pmt_get_regions_by_feature() allocates
> a new pmt_feature_group structure to return to each caller and only
> makes use of the pmt_feature_group::kref field when
> intel_pmt_put_feature_group() returns the structure.
> 
> Add a check to resctrl_arch_rmid_read() for resource id
> RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
> passing the enum resctrl_event_id for the event and the arch_priv
> pointer that was supplied when the event was enabled.
> 
> There may be multiple aggregators tracking each package, so scan all of
> them and add up all counters. Aggregators may return an invalid data
> indication if they have received no records for a given RMID. Return
> success to the user if one or more aggregators provide valid data.
> 
> Resctrl now uses readq() so depends on X86_64. Update Kconfig.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/resctrl/internal.h  |  7 ++++
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 53 ++++++++++++++++++++++++-
>  arch/x86/kernel/cpu/resctrl/monitor.c   |  3 ++
>  arch/x86/Kconfig                        |  2 +-
>  4 files changed, 63 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 9ddfbbe5c3cf..8986071dd72a 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -172,9 +172,16 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
>  #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
>  bool intel_aet_get_events(void);
>  void __exit intel_aet_exit(void);
> +int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
> +			 void *arch_priv, u64 *val);
>  #else
>  static inline bool intel_aet_get_events(void) { return false; }
>  static inline void __exit intel_aet_exit(void) { }
> +static inline int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
> +				       void *arch_priv, u64 *val)
> +{
> +	return -EINVAL;
> +}
>  #endif
>  
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index 5c5466dc3189..9239740e9647 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -14,6 +14,7 @@
>  #include <linux/cleanup.h>
>  #include <linux/cpu.h>
>  #include <linux/intel_vsec.h>
> +#include <linux/io.h>
>  #include <linux/resctrl.h>
>  
>  #include "internal.h"
> @@ -125,8 +126,14 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  	bool usable_events = false;
>  
>  	for (int i = 0; i < p->count; i++) {
> -		if (skip_this_region(&p->regions[i], e))
> +		if (skip_this_region(&p->regions[i], e)) {
> +			/*
> +			 * Clear addr so that intel_aet_read_event() will
> +			 * skip this region.
> +			 */
> +			p->regions[i].addr = NULL;

As this is at least semi-hacky, I suggest you move it into own function 
and add a bit longer comment to the function (along the lines what the 
changelog also states why it works).

>  			continue;
> +		}
>  		usable_events = true;
>  	}
>  
> @@ -204,3 +211,47 @@ void __exit intel_aet_exit(void)
>  		(*peg)->pfg = NULL;
>  	}
>  }
> +
> +#define DATA_VALID	BIT_ULL(63)
> +#define DATA_BITS	GENMASK_ULL(62, 0)

+ linux/bits.h

> +
> +/*
> + * Read counter for an event on a domain (summing all aggregators
> + * on the domain). If an aggregator hasn't received any data for a
> + * specific RMID, the MMIO read indicates that data is not valid.
> + * Return success if at least one aggregator has valid data.
> + */
> +int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
> +			 void *arch_priv, u64 *val)
> +{
> +	struct pmt_event *pevt = arch_priv;
> +	struct event_group *e;
> +	bool valid = false;
> +	u64 evtcount;
> +	void *pevt0;
> +	int idx;
> +
> +	pevt0 = pevt - pevt->idx;
> +	e = container_of(pevt0, struct event_group, evts);

+ linux/container_of.h

> +	idx = rmid * e->num_events;
> +	idx += pevt->idx;
> +
> +	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
> +		pr_warn_once("MMIO index %d out of range\n", idx);
> +		return -EIO;
> +	}
> +
> +	for (int i = 0; i < e->pfg->count; i++) {
> +		if (!e->pfg->regions[i].addr)
> +			continue;
> +		if (e->pfg->regions[i].plat_info.package_id != domid)
> +			continue;
> +		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
> +		if (!(evtcount & DATA_VALID))
> +			continue;
> +		*val += evtcount & DATA_BITS;
> +		valid = true;
> +	}
> +
> +	return valid ? 0 : -EINVAL;
> +}
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index 249569327e4a..0333dd85450b 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -232,6 +232,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
>  
>  	resctrl_arch_rmid_read_context_check();
>  
> +	if (r->rid == RDT_RESOURCE_PERF_PKG)
> +		return intel_aet_read_event(hdr->id, rmid, eventid, arch_priv, val);
> +
>  	if (r->rid != RDT_RESOURCE_L3)
>  		return -EINVAL;
>  
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 50051fdf4659..a42f749f31cb 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -527,7 +527,7 @@ config X86_CPU_RESCTRL
>  
>  config X86_CPU_RESCTRL_INTEL_AET
>  	bool "Intel Application Energy Telemetry" if INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
> -	depends on X86_CPU_RESCTRL && CPU_SUP_INTEL
> +	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL
>  	help
>  	  Enable per-RMID telemetry events in resctrl.
>  
> 

-- 
 i.
Re: [PATCH v9 21/31] x86/resctrl: Read telemetry events
Posted by Luck, Tony 4 weeks, 1 day ago
On Mon, Sep 01, 2025 at 12:15:49PM +0300, Ilpo Järvinen wrote:
> > @@ -125,8 +126,14 @@ static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
> >  	bool usable_events = false;
> >  
> >  	for (int i = 0; i < p->count; i++) {
> > -		if (skip_this_region(&p->regions[i], e))
> > +		if (skip_this_region(&p->regions[i], e)) {
> > +			/*
> > +			 * Clear addr so that intel_aet_read_event() will
> > +			 * skip this region.
> > +			 */
> > +			p->regions[i].addr = NULL;
> 
> As this is at least semi-hacky, I suggest you move it into own function 
> and add a bit longer comment to the function (along the lines what the 
> changelog also states why it works).

Agreed. See new mark_telem_region_unusable() in updated patch below.
Also pushed to
git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git rdt-aet-v10-wip

-Tony

P.S. In addition to all the extra #include files you suggested, I found
a few others missing. So those are added as needed in the series.

From 163824a2e10de9f63e20f0bc9f86b8c14f58bfcb Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Mon, 25 Aug 2025 10:47:06 -0700
Subject: [PATCH 21/31] x86/resctrl: Read telemetry events

Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.
resctrl enables events with resctrl_enable_mon_event() passing a pointer
to the pmt_event structure for the event within the struct event_group.
The file system stores it in mon_evt::arch_priv.

Mark regions that did not pass the checks in skip_telemetry_region()
so they will not be used by intel_aet_read_event().

Add a check to resctrl_arch_rmid_read() for resource id
RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
passing the enum resctrl_event_id for the event and the arch_priv
pointer that was supplied when the event was enabled.

There may be multiple aggregators tracking each package, so scan all of
them and add up all counters. Aggregators may return an invalid data
indication if they have received no records for a given RMID. Return
success to the user if one or more aggregators provide valid data.

Resctrl now uses readq() so depends on X86_64. Update Kconfig.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/internal.h  |  7 +++
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 65 ++++++++++++++++++++++++-
 arch/x86/kernel/cpu/resctrl/monitor.c   |  3 ++
 arch/x86/Kconfig                        |  2 +-
 4 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 9ddfbbe5c3cf..8986071dd72a 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -172,9 +172,16 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
 bool intel_aet_get_events(void);
 void __exit intel_aet_exit(void);
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+			 void *arch_priv, u64 *val);
 #else
 static inline bool intel_aet_get_events(void) { return false; }
 static inline void __exit intel_aet_exit(void) { }
+static inline int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
+				       void *arch_priv, u64 *val)
+{
+	return -EINVAL;
+}
 #endif
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index e36b3790733b..170158d40e27 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -12,13 +12,17 @@
 #define pr_fmt(fmt)   "resctrl: " fmt
 
 #include <linux/array_size.h>
+#include <linux/bits.h>
 #include <linux/cleanup.h>
 #include <linux/compiler_types.h>
+#include <linux/container_of.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
+#include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/intel_pmt_features.h>
 #include <linux/intel_vsec.h>
+#include <linux/io.h>
 #include <linux/overflow.h>
 #include <linux/printk.h>
 #include <linux/resctrl.h>
@@ -131,13 +135,28 @@ static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e
 	return false;
 }
 
+/*
+ * Clear the address field of regions that did not pass the checks in
+ * skip_telem_region() so they will not be used by intel_aet_read_event().
+ * This is safe to do because intel_pmt_get_regions_by_feature() allocates
+ * a new pmt_feature_group structure to return to each caller and only makes
+ * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
+ * returns the structure.
+ */
+static void mark_telem_region_unusable(struct telemetry_region *tr)
+{
+	tr->addr = NULL;
+}
+
 static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
 {
 	bool usable_events = false;
 
 	for (int i = 0; i < p->count; i++) {
-		if (skip_telem_region(&p->regions[i], e))
+		if (skip_telem_region(&p->regions[i], e)) {
+			mark_telem_region_unusable(&p->regions[i]);
 			continue;
+		}
 		usable_events = true;
 	}
 
@@ -215,3 +234,47 @@ void __exit intel_aet_exit(void)
 		(*peg)->pfg = NULL;
 	}
 }
+
+#define DATA_VALID	BIT_ULL(63)
+#define DATA_BITS	GENMASK_ULL(62, 0)
+
+/*
+ * Read counter for an event on a domain (summing all aggregators
+ * on the domain). If an aggregator hasn't received any data for a
+ * specific RMID, the MMIO read indicates that data is not valid.
+ * Return success if at least one aggregator has valid data.
+ */
+int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
+			 void *arch_priv, u64 *val)
+{
+	struct pmt_event *pevt = arch_priv;
+	struct event_group *e;
+	bool valid = false;
+	u64 evtcount;
+	void *pevt0;
+	int idx;
+
+	pevt0 = pevt - pevt->idx;
+	e = container_of(pevt0, struct event_group, evts);
+	idx = rmid * e->num_events;
+	idx += pevt->idx;
+
+	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
+		pr_warn_once("MMIO index %d out of range\n", idx);
+		return -EIO;
+	}
+
+	for (int i = 0; i < e->pfg->count; i++) {
+		if (!e->pfg->regions[i].addr)
+			continue;
+		if (e->pfg->regions[i].plat_info.package_id != domid)
+			continue;
+		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
+		if (!(evtcount & DATA_VALID))
+			continue;
+		*val += evtcount & DATA_BITS;
+		valid = true;
+	}
+
+	return valid ? 0 : -EINVAL;
+}
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 249569327e4a..0333dd85450b 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -232,6 +232,9 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain_hdr *hdr,
 
 	resctrl_arch_rmid_read_context_check();
 
+	if (r->rid == RDT_RESOURCE_PERF_PKG)
+		return intel_aet_read_event(hdr->id, rmid, eventid, arch_priv, val);
+
 	if (r->rid != RDT_RESOURCE_L3)
 		return -EINVAL;
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50051fdf4659..a42f749f31cb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -527,7 +527,7 @@ config X86_CPU_RESCTRL
 
 config X86_CPU_RESCTRL_INTEL_AET
 	bool "Intel Application Energy Telemetry" if INTEL_PMT_TELEMETRY=y && INTEL_TPMI=y
-	depends on X86_CPU_RESCTRL && CPU_SUP_INTEL
+	depends on X86_64 && X86_CPU_RESCTRL && CPU_SUP_INTEL
 	help
 	  Enable per-RMID telemetry events in resctrl.
 
-- 
2.51.0
Re: [PATCH v9 21/31] x86/resctrl: Read telemetry events
Posted by Reinette Chatre 3 weeks, 2 days ago
Hi Tony,

On 9/3/25 11:24 AM, Luck, Tony wrote:

> Agreed. See new mark_telem_region_unusable() in updated patch below.
> Also pushed to
> git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux.git rdt-aet-v10-wip
> 
> -Tony
> 
> P.S. In addition to all the extra #include files you suggested, I found
> a few others missing. So those are added as needed in the series.
> 
> From 163824a2e10de9f63e20f0bc9f86b8c14f58bfcb Mon Sep 17 00:00:00 2001
> From: Tony Luck <tony.luck@intel.com>
> Date: Mon, 25 Aug 2025 10:47:06 -0700
> Subject: [PATCH 21/31] x86/resctrl: Read telemetry events
> 
> Telemetry events are enumerated by the INTEL_PMT_TELEMETRY subsystem.
> resctrl enables events with resctrl_enable_mon_event() passing a pointer
> to the pmt_event structure for the event within the struct event_group.
> The file system stores it in mon_evt::arch_priv.

This is getting to be very detailed description of code as opposed to
what the code does ... hmmm ... looks like I already made an attempt
with a proposal in v8 that was just ignored without any reason. <sigh>

> 
> Mark regions that did not pass the checks in skip_telemetry_region()
> so they will not be used by intel_aet_read_event().

Apart from skip_telemetry_region() -> skip_telem_region() I think this
should lighten up on the code details. Compare with, for example:

	Mark telemetry regions that did not pass the sanity checks by
	clearing their MMIO address fields so that they will not be
	used when reading events.  

(i.e, more about what the code does instead of actual code)

> 
> Add a check to resctrl_arch_rmid_read() for resource id
> RDT_RESOURCE_PERF_PKG and directly call intel_aet_read_event()
> passing the enum resctrl_event_id for the event and the arch_priv
> pointer that was supplied when the event was enabled.

Also heavy on code details while written as though reader is familiar
with function (intel_aet_read_event()) introduced in this patch. Compare
with, for example:
	Introduce intel_aet_read_event() to read telemetry events for
	resource RDT_RESOURCE_PERF_PKG. There may be multiple aggregators ...
	
> 
> There may be multiple aggregators tracking each package, so scan all of
> them and add up all counters. Aggregators may return an invalid data
> indication if they have received no records for a given RMID. Return
> success to the user if one or more aggregators provide valid data.
> 
> Resctrl now uses readq() so depends on X86_64. Update Kconfig.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/resctrl/internal.h  |  7 +++
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 65 ++++++++++++++++++++++++-
>  arch/x86/kernel/cpu/resctrl/monitor.c   |  3 ++
>  arch/x86/Kconfig                        |  2 +-
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index 9ddfbbe5c3cf..8986071dd72a 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -172,9 +172,16 @@ void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
>  #ifdef CONFIG_X86_CPU_RESCTRL_INTEL_AET
>  bool intel_aet_get_events(void);
>  void __exit intel_aet_exit(void);
> +int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,

Patch 24 goes through effort of making RMID unsigned while intel_aet_read_event()
casts an unsigned RMID to signed?

> +			 void *arch_priv, u64 *val);
>  #else
>  static inline bool intel_aet_get_events(void) { return false; }
>  static inline void __exit intel_aet_exit(void) { }
> +static inline int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id evtid,
> +				       void *arch_priv, u64 *val)
> +{
> +	return -EINVAL;
> +}
>  #endif
>  
>  #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index e36b3790733b..170158d40e27 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -12,13 +12,17 @@
>  #define pr_fmt(fmt)   "resctrl: " fmt
>  
>  #include <linux/array_size.h>
> +#include <linux/bits.h>
>  #include <linux/cleanup.h>
>  #include <linux/compiler_types.h>
> +#include <linux/container_of.h>
>  #include <linux/cpu.h>
>  #include <linux/err.h>
> +#include <linux/errno.h>
>  #include <linux/init.h>
>  #include <linux/intel_pmt_features.h>
>  #include <linux/intel_vsec.h>
> +#include <linux/io.h>
>  #include <linux/overflow.h>
>  #include <linux/printk.h>
>  #include <linux/resctrl.h>
> @@ -131,13 +135,28 @@ static bool skip_telem_region(struct telemetry_region *tr, struct event_group *e
>  	return false;
>  }
>  
> +/*
> + * Clear the address field of regions that did not pass the checks in
> + * skip_telem_region() so they will not be used by intel_aet_read_event().
> + * This is safe to do because intel_pmt_get_regions_by_feature() allocates
> + * a new pmt_feature_group structure to return to each caller and only makes
> + * use of the pmt_feature_group::kref field when intel_pmt_put_feature_group()
> + * returns the structure.
> + */
> +static void mark_telem_region_unusable(struct telemetry_region *tr)
> +{
> +	tr->addr = NULL;
> +}
> +
>  static bool enable_events(struct event_group *e, struct pmt_feature_group *p)
>  {
>  	bool usable_events = false;
>  
>  	for (int i = 0; i < p->count; i++) {
> -		if (skip_telem_region(&p->regions[i], e))
> +		if (skip_telem_region(&p->regions[i], e)) {
> +			mark_telem_region_unusable(&p->regions[i]);
>  			continue;
> +		}
>  		usable_events = true;
>  	}
>  
> @@ -215,3 +234,47 @@ void __exit intel_aet_exit(void)
>  		(*peg)->pfg = NULL;
>  	}
>  }
> +
> +#define DATA_VALID	BIT_ULL(63)
> +#define DATA_BITS	GENMASK_ULL(62, 0)
> +
> +/*
> + * Read counter for an event on a domain (summing all aggregators
> + * on the domain). If an aggregator hasn't received any data for a
> + * specific RMID, the MMIO read indicates that data is not valid.
> + * Return success if at least one aggregator has valid data.
> + */
> +int intel_aet_read_event(int domid, int rmid, enum resctrl_event_id eventid,
> +			 void *arch_priv, u64 *val)
> +{
> +	struct pmt_event *pevt = arch_priv;
> +	struct event_group *e;
> +	bool valid = false;
> +	u64 evtcount;
> +	void *pevt0;
> +	int idx;
> +
> +	pevt0 = pevt - pevt->idx;
> +	e = container_of(pevt0, struct event_group, evts);
> +	idx = rmid * e->num_events;

Looks like idx can also be unsigned (since rmid and num_events are unsigned) and
should be printed with %u?

> +	idx += pevt->idx;
> +
> +	if (idx * sizeof(u64) + sizeof(u64) > e->mmio_size) {
> +		pr_warn_once("MMIO index %d out of range\n", idx);
> +		return -EIO;
> +	}
> +
> +	for (int i = 0; i < e->pfg->count; i++) {
> +		if (!e->pfg->regions[i].addr)
> +			continue;
> +		if (e->pfg->regions[i].plat_info.package_id != domid)
> +			continue;
> +		evtcount = readq(e->pfg->regions[i].addr + idx * sizeof(u64));
> +		if (!(evtcount & DATA_VALID))
> +			continue;
> +		*val += evtcount & DATA_BITS;
> +		valid = true;
> +	}
> +
> +	return valid ? 0 : -EINVAL;
> +}

Reinette