[PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package

Tony Luck posted 30 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Tony Luck 3 months, 1 week ago
There may be multiple telemetry aggregators per package, each enumerated
by a telemetry region structure in the feature group.

Scan the array of telemetry region structures and count how many are
in each package in preparation to allocate structures to save the MMIO
addresses for each in a convenient format for use when reading event
counters.

Sanity check that the telemetry region structures have a valid
package_id and that the size they report for the MMIO space is as
large as expected from the XML description of the registers in
the region.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/resctrl/intel_aet.c | 55 ++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
index b09044b093dd..8d67ed709a74 100644
--- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
+++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
@@ -15,6 +15,7 @@
 #include <linux/cpu.h>
 #include <linux/intel_vsec.h>
 #include <linux/resctrl.h>
+#include <linux/slab.h>
 
 #include "internal.h"
 
@@ -24,6 +25,7 @@
  *			within the OOBMSM driver that contains data for all
  *			telemetry regions.
  * @guid:		Unique number per XML description file.
+ * @mmio_size:		Number of bytes of MMIO registers for this group.
  */
 struct event_group {
 	/* Data fields for additional structures to manage this group. */
@@ -31,14 +33,19 @@ struct event_group {
 
 	/* Remaining fields initialized from XML file. */
 	u32				guid;
+	size_t				mmio_size;
 };
 
+#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status)	\
+	(((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))
+
 /*
  * Link: https://github.com/intel/Intel-PMT
  * File: xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
  */
 static struct event_group energy_0x26696143 = {
 	.guid		= 0x26696143,
+	.mmio_size	= XML_MMIO_SIZE(576, 2, 3),
 };
 
 /*
@@ -47,6 +54,7 @@ static struct event_group energy_0x26696143 = {
  */
 static struct event_group perf_0x26557651 = {
 	.guid		= 0x26557651,
+	.mmio_size	= XML_MMIO_SIZE(576, 7, 3),
 };
 
 static struct event_group *known_event_groups[] = {
@@ -56,10 +64,53 @@ static struct event_group *known_event_groups[] = {
 
 #define NUM_KNOWN_GROUPS ARRAY_SIZE(known_event_groups)
 
-/* Stub for now */
+static bool skip_this_region(struct telemetry_region *tr, struct event_group *e)
+{
+	if (tr->guid != e->guid)
+		return true;
+	if (tr->plat_info.package_id >= topology_max_packages()) {
+		pr_warn_once("Bad package %d in guid 0x%x\n", tr->plat_info.package_id,
+			     tr->guid);
+		return true;
+	}
+	if (tr->size < e->mmio_size) {
+		pr_warn_once("MMIO space %zu too small for guid 0x%x\n", tr->size, e->guid);
+		return true;
+	}
+
+	return false;
+}
+
+/*
+ * Configure events from one pmt_feature_group.
+ * 1) Count how many per package.
+ * 2...) To be continued.
+ */
 static int configure_events(struct event_group *e, struct pmt_feature_group *p)
 {
-	return -EINVAL;
+	int *pkgcounts __free(kfree) = NULL;
+	struct telemetry_region *tr;
+	int num_pkgs;
+
+	num_pkgs = topology_max_packages();
+
+	/* Get per-package counts of telemetry_regions for this event group */
+	for (int i = 0; i < p->count; i++) {
+		tr = &p->regions[i];
+		if (skip_this_region(tr, e))
+			continue;
+		if (!pkgcounts) {
+			pkgcounts = kcalloc(num_pkgs, sizeof(*pkgcounts), GFP_KERNEL);
+			if (!pkgcounts)
+				return -ENOMEM;
+		}
+		pkgcounts[tr->plat_info.package_id]++;
+	}
+
+	if (!pkgcounts)
+		return -ENODEV;
+
+	return 0;
 }
 
 DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *,
-- 
2.49.0
Re: [PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Reinette Chatre 3 months ago
Hi Tony,

On 6/26/25 9:49 AM, Tony Luck wrote:
> There may be multiple telemetry aggregators per package, each enumerated
> by a telemetry region structure in the feature group.

This is the valuable connection missing from earlier patch changelog.

> 
> Scan the array of telemetry region structures and count how many are
> in each package in preparation to allocate structures to save the MMIO
> addresses for each in a convenient format for use when reading event
> counters.
> 
> Sanity check that the telemetry region structures have a valid
> package_id and that the size they report for the MMIO space is as
> large as expected from the XML description of the registers in
> the region.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/resctrl/intel_aet.c | 55 ++++++++++++++++++++++++-
>  1 file changed, 53 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/intel_aet.c b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> index b09044b093dd..8d67ed709a74 100644
> --- a/arch/x86/kernel/cpu/resctrl/intel_aet.c
> +++ b/arch/x86/kernel/cpu/resctrl/intel_aet.c
> @@ -15,6 +15,7 @@
>  #include <linux/cpu.h>
>  #include <linux/intel_vsec.h>
>  #include <linux/resctrl.h>
> +#include <linux/slab.h>
>  
>  #include "internal.h"
>  
> @@ -24,6 +25,7 @@
>   *			within the OOBMSM driver that contains data for all
>   *			telemetry regions.
>   * @guid:		Unique number per XML description file.
> + * @mmio_size:		Number of bytes of MMIO registers for this group.
>   */
>  struct event_group {
>  	/* Data fields for additional structures to manage this group. */
> @@ -31,14 +33,19 @@ struct event_group {
>  
>  	/* Remaining fields initialized from XML file. */
>  	u32				guid;
> +	size_t				mmio_size;
>  };
>  
> +#define XML_MMIO_SIZE(num_rmids, num_events, num_extra_status)	\
> +	(((num_rmids) * (num_events) + (num_extra_status)) * sizeof(u64))
> +
>  /*
>   * Link: https://github.com/intel/Intel-PMT
>   * File: xml/CWF/OOBMSM/RMID-ENERGY/cwf_aggregator.xml
>   */
>  static struct event_group energy_0x26696143 = {
>  	.guid		= 0x26696143,
> +	.mmio_size	= XML_MMIO_SIZE(576, 2, 3),
>  };
>  
>  /*
> @@ -47,6 +54,7 @@ static struct event_group energy_0x26696143 = {
>   */
>  static struct event_group perf_0x26557651 = {
>  	.guid		= 0x26557651,
> +	.mmio_size	= XML_MMIO_SIZE(576, 7, 3),
>  };
>  
>  static struct event_group *known_event_groups[] = {
> @@ -56,10 +64,53 @@ static struct event_group *known_event_groups[] = {
>  
>  #define NUM_KNOWN_GROUPS ARRAY_SIZE(known_event_groups)
>  
> -/* Stub for now */
> +static bool skip_this_region(struct telemetry_region *tr, struct event_group *e)
> +{
> +	if (tr->guid != e->guid)
> +		return true;
> +	if (tr->plat_info.package_id >= topology_max_packages()) {
> +		pr_warn_once("Bad package %d in guid 0x%x\n", tr->plat_info.package_id,

If struct event_group includes the RMID telemetry feature ID (see below) then it
would be helpful to print that here.

> +			     tr->guid);
> +		return true;
> +	}
> +	if (tr->size < e->mmio_size) {

Why not "tr->size != e->mmio_size"?

Patch #25 explains how tr->num_rmids may be smaller than the number of RMIDs in XML file
but from that description I got the impression that telemetry regions should always
support all registers documented in the XML file. Similarly, in the earlier "fake OOBMSM"
code the MMIO size of the "energy" MMIO size could still accommodate the 576 RMIDs while
the regions were configured to only support 64 RMIDs. 

> +		pr_warn_once("MMIO space %zu too small for guid 0x%x\n", tr->size, e->guid);
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +/*
> + * Configure events from one pmt_feature_group.

"Configure events" -> "Discover events"?

> + * 1) Count how many per package.

It is not clear what is counted here ... first sentence is "Configure events" followed
by this unspecific "Count how many per package" that can be interpreted that it
counts events here ... but it is actually counting telemetry regions?

> + * 2...) To be continued.

This comment implies that as capabilities are added to this function the comments
will be amended to document these new capabilities ... but at the end of this series
this function comment still reads as "2...) To be continued".

> + */
>  static int configure_events(struct event_group *e, struct pmt_feature_group *p)
>  {
> -	return -EINVAL;
> +	int *pkgcounts __free(kfree) = NULL;
> +	struct telemetry_region *tr;
> +	int num_pkgs;
> +
> +	num_pkgs = topology_max_packages();
> +
> +	/* Get per-package counts of telemetry_regions for this event group */

"telemetry_regions" -> "telemetry regions"? Or is it referring to the actual struct
here?

> +	for (int i = 0; i < p->count; i++) {
> +		tr = &p->regions[i];
> +		if (skip_this_region(tr, e))
> +			continue;

The function calling configure_event() does:

	struct event_group **peg;

	for (peg = &known_event_groups[0]; peg < &known_event_groups[NUM_KNOWN_GROUPS]; peg++)  {
		ret = configure_events(*peg, p);
		...
	}

As I understand there is 1:1 relationship between struct event_group and struct pmt_feature_group.
It thus seems unnecessary to loop through all the telemetry regions of a struct pmt_feature_group
if it is known to not be associated with the "event group"?
Could it be helpful to add a new (hardcoded) event_group::id that is of type enum pmt_feature_id
that can be used to ensure that only relevant struct pmt_feature_group is used to discover events
for a particular struct event_group?

Another consideration is that this implementation seems to require that guids are unique across
all telemetry regions of all RMID telemetry features, is this guaranteed?

> +		if (!pkgcounts) {
> +			pkgcounts = kcalloc(num_pkgs, sizeof(*pkgcounts), GFP_KERNEL);
> +			if (!pkgcounts)
> +				return -ENOMEM;
> +		}
> +		pkgcounts[tr->plat_info.package_id]++;
> +	}
> +
> +	if (!pkgcounts)
> +		return -ENODEV;
> +
> +	return 0;
>  }
>  
>  DEFINE_FREE(intel_pmt_put_feature_group, struct pmt_feature_group *,

Reinette
Re: [PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Luck, Tony 3 months ago
On Tue, Jul 08, 2025 at 07:20:35PM -0700, Reinette Chatre wrote:
> As I understand there is 1:1 relationship between struct event_group and struct pmt_feature_group.
> It thus seems unnecessary to loop through all the telemetry regions of a struct pmt_feature_group
> if it is known to not be associated with the "event group"?
> Could it be helpful to add a new (hardcoded) event_group::id that is of type enum pmt_feature_id
> that can be used to ensure that only relevant struct pmt_feature_group is used to discover events
> for a particular struct event_group?
> 
> Another consideration is that this implementation seems to require that guids are unique across
> all telemetry regions of all RMID telemetry features, is this guaranteed?

The guids are unique. The XML file tags them like this:

	<TELEM:uniqueid>26557651</TELEM:uniqueid>

the "guid" naming of the value comes from the Intel PMT_DISCOVERY driver.

An alternative to adding the new event_group::id field would be to
separate the arrays of known event groups. I.e. change from:

static struct event_group *known_event_groups[] = {
        &energy_0x26696143,
        &perf_0x26557651,
};

to

static struct event_group *known_energy_event_groups[] = {
        &energy_0x26696143,
};

static struct event_group *known_perf_event_groups[] = {
        &perf_0x26557651,
};

then only scan the appropriate array that matches the
enum pmt_feature_id passed to get_pmt_feature().


With only one option in each array today this looks
like extra infrasctruture. But I already have a patch
for the next generation system that adds another guid.

-Tony
Re: [PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Reinette Chatre 3 months ago
Hi Tony,

On 7/9/25 11:12 AM, Luck, Tony wrote:
> On Tue, Jul 08, 2025 at 07:20:35PM -0700, Reinette Chatre wrote:
>> As I understand there is 1:1 relationship between struct event_group and struct pmt_feature_group.
>> It thus seems unnecessary to loop through all the telemetry regions of a struct pmt_feature_group
>> if it is known to not be associated with the "event group"?
>> Could it be helpful to add a new (hardcoded) event_group::id that is of type enum pmt_feature_id
>> that can be used to ensure that only relevant struct pmt_feature_group is used to discover events
>> for a particular struct event_group?
>>
>> Another consideration is that this implementation seems to require that guids are unique across
>> all telemetry regions of all RMID telemetry features, is this guaranteed?
> 
> The guids are unique. The XML file tags them like this:
> 
> 	<TELEM:uniqueid>26557651</TELEM:uniqueid>

I interpret above that guid is expected to be unique for one
telemetry feature. It is not clear to me that it implies that the guid
is unique across all telemetry features. For example, what prevents
a platform from using the same guid for all the telemetry features it
supports?

> 
> the "guid" naming of the value comes from the Intel PMT_DISCOVERY driver.
> 
> An alternative to adding the new event_group::id field would be to
> separate the arrays of known event groups. I.e. change from:
> 
> static struct event_group *known_event_groups[] = {
>         &energy_0x26696143,
>         &perf_0x26557651,
> };
> 
> to
> 
> static struct event_group *known_energy_event_groups[] = {
>         &energy_0x26696143,
> };
> 
> static struct event_group *known_perf_event_groups[] = {
>         &perf_0x26557651,
> };
> 
> then only scan the appropriate array that matches the
> enum pmt_feature_id passed to get_pmt_feature().
> 
> 
> With only one option in each array today this looks
> like extra infrasctruture. But I already have a patch
> for the next generation system that adds another guid.

This also sounds good. Thank you.

Reinette
Re: [PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Luck, Tony 3 months ago
On Wed, Jul 09, 2025 at 03:13:20PM -0700, Reinette Chatre wrote:
> Hi Tony,
> 
> On 7/9/25 11:12 AM, Luck, Tony wrote:
> > On Tue, Jul 08, 2025 at 07:20:35PM -0700, Reinette Chatre wrote:
> >> As I understand there is 1:1 relationship between struct event_group and struct pmt_feature_group.
> >> It thus seems unnecessary to loop through all the telemetry regions of a struct pmt_feature_group
> >> if it is known to not be associated with the "event group"?
> >> Could it be helpful to add a new (hardcoded) event_group::id that is of type enum pmt_feature_id
> >> that can be used to ensure that only relevant struct pmt_feature_group is used to discover events
> >> for a particular struct event_group?
> >>
> >> Another consideration is that this implementation seems to require that guids are unique across
> >> all telemetry regions of all RMID telemetry features, is this guaranteed?
> > 
> > The guids are unique. The XML file tags them like this:
> > 
> > 	<TELEM:uniqueid>26557651</TELEM:uniqueid>
> 
> I interpret above that guid is expected to be unique for one
> telemetry feature. It is not clear to me that it implies that the guid
> is unique across all telemetry features. For example, what prevents
> a platform from using the same guid for all the telemetry features it
> supports?

There are several non-RMID based telemetry MMIO regions in addition to
the two used by this patch series.

Think of the uniqueid as a signature for the format of the region.
Which event counters are present, in which order? How many total
counters? What is the binary format of each counter?

Or think of it as a key. Usermode telemetry tools that access these MMIO
regions use the uniqueid to choose which XML file to use to interpret
the data. I'm effectively doing this, but without including an XML
parser in the kernel. Just distilling each XML file to the basic
essence described in the event_group structure.

It would be a catastrophic failure if Intel assigned the same uniqueid
to regions that had different formats.

> 
> > 
> > the "guid" naming of the value comes from the Intel PMT_DISCOVERY driver.
> > 
> > An alternative to adding the new event_group::id field would be to
> > separate the arrays of known event groups. I.e. change from:
> > 
> > static struct event_group *known_event_groups[] = {
> >         &energy_0x26696143,
> >         &perf_0x26557651,
> > };
> > 
> > to
> > 
> > static struct event_group *known_energy_event_groups[] = {
> >         &energy_0x26696143,
> > };
> > 
> > static struct event_group *known_perf_event_groups[] = {
> >         &perf_0x26557651,
> > };
> > 
> > then only scan the appropriate array that matches the
> > enum pmt_feature_id passed to get_pmt_feature().
> > 
> > 
> > With only one option in each array today this looks
> > like extra infrasctruture. But I already have a patch
> > for the next generation system that adds another guid.
> 
> This also sounds good. Thank you.

Ok. Thanks. I'll put this idea into code in the next version.

> 
> Reinette
> 

-Tony
Re: [PATCH v6 18/30] x86/resctrl: Count valid telemetry aggregators per package
Posted by Reinette Chatre 3 months ago
Hi Tony,

On 7/9/25 3:48 PM, Luck, Tony wrote:
> On Wed, Jul 09, 2025 at 03:13:20PM -0700, Reinette Chatre wrote:
>> Hi Tony,
>>
>> On 7/9/25 11:12 AM, Luck, Tony wrote:
>>> On Tue, Jul 08, 2025 at 07:20:35PM -0700, Reinette Chatre wrote:
>>>> As I understand there is 1:1 relationship between struct event_group and struct pmt_feature_group.
>>>> It thus seems unnecessary to loop through all the telemetry regions of a struct pmt_feature_group
>>>> if it is known to not be associated with the "event group"?
>>>> Could it be helpful to add a new (hardcoded) event_group::id that is of type enum pmt_feature_id
>>>> that can be used to ensure that only relevant struct pmt_feature_group is used to discover events
>>>> for a particular struct event_group?
>>>>
>>>> Another consideration is that this implementation seems to require that guids are unique across
>>>> all telemetry regions of all RMID telemetry features, is this guaranteed?
>>>
>>> The guids are unique. The XML file tags them like this:
>>>
>>> 	<TELEM:uniqueid>26557651</TELEM:uniqueid>
>>
>> I interpret above that guid is expected to be unique for one
>> telemetry feature. It is not clear to me that it implies that the guid
>> is unique across all telemetry features. For example, what prevents
>> a platform from using the same guid for all the telemetry features it
>> supports?
> 
> There are several non-RMID based telemetry MMIO regions in addition to
> the two used by this patch series.
> 
> Think of the uniqueid as a signature for the format of the region.
> Which event counters are present, in which order? How many total
> counters? What is the binary format of each counter?

I understand this.

> 
> Or think of it as a key. Usermode telemetry tools that access these MMIO
> regions use the uniqueid to choose which XML file to use to interpret
> the data. I'm effectively doing this, but without including an XML
> parser in the kernel. Just distilling each XML file to the basic
> essence described in the event_group structure.

Right. If the analogy it is about ID used to pick the right XML to use then the
question is whether the ID is unique per directory, i.e all XML files in
RMID-ENERGY directory can be expected to have unique ID, or across all directories,
i.e XML files across RMID-ENERGY and RMID-PERF can be expected to have unique ID.

> It would be a catastrophic failure if Intel assigned the same uniqueid
> to regions that had different formats.

I think we may be speaking past each other. New code will address concern
anyway though.

Reinette