[PATCH v13 17/27] x86/resctrl: Add the support for reading ABMC counters

Babu Moger posted 27 patches 7 months ago
[PATCH v13 17/27] x86/resctrl: Add the support for reading ABMC counters
Posted by Babu Moger 7 months ago
Software can read the assignable counters using the QM_EVTSEL and QM_CTR
register pair.

QM_EVTSEL Register definition:
=======================================================
Bits	Mnemonic	Description
=======================================================
63:44	--		Reserved
43:32   RMID		Resource Monitoring Identifier
31	ExtEvtID	Extended Event Identifier
30:8	--		Reserved
7:0	EvtID		Event Identifier
=======================================================

The contents of a specific counter can be read by setting the following
fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
and setting [RMID] to the desired counter ID. Reading QM_CTR will then
return the contents of the specified counter. The E bit will be set if the
counter configuration was invalid, or if an invalid counter ID was set
in the QM_EVTSEL[RMID] field.

Introduce __cntr_id_read_phys() to read the counter ID event data.

Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
Signed-off-by: Babu Moger <babu.moger@amd.com>
---
v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
     functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
     Added new function __cntr_id_read_phys() to handle ABMC event reading.
     Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
     Resolved conflicts caused by the recent FS/ARCH code restructure.
     The monitor.c file has now been split between the FS and ARCH directories.

v12: New patch to support extended event mode when ABMC is enabled.
---
 arch/x86/kernel/cpu/resctrl/internal.h |  6 +++
 arch/x86/kernel/cpu/resctrl/monitor.c  | 66 ++++++++++++++++++++++----
 fs/resctrl/monitor.c                   | 14 ++++--
 include/linux/resctrl.h                |  9 ++--
 4 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index db6b0c28ee6b..3b0cdb5520c7 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -40,6 +40,12 @@ struct arch_mbm_state {
 /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
 #define ABMC_ENABLE_BIT			0
 
+/*
+ * ABMC Qos Event Identifiers.
+ */
+#define ABMC_EXTENDED_EVT_ID		BIT(31)
+#define ABMC_EVT_ID			1
+
 /**
  * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
  *			       a resource for a control function
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index e31084f7babd..36a03dae6d8e 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
 	return 0;
 }
 
+static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
+{
+	u64 msr_val;
+
+	/*
+	 * QM_EVTSEL Register definition:
+	 * =======================================================
+	 * Bits    Mnemonic        Description
+	 * =======================================================
+	 * 63:44   --              Reserved
+	 * 43:32   RMID            Resource Monitoring Identifier
+	 * 31      ExtEvtID        Extended Event Identifier
+	 * 30:8    --              Reserved
+	 * 7:0     EvtID           Event Identifier
+	 * =======================================================
+	 * The contents of a specific counter can be read by setting the
+	 * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
+	 * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
+	 * desired counter ID. Reading QM_CTR will then return the
+	 * contents of the specified counter. The E bit will be set if the
+	 * counter configuration was invalid, or if an invalid counter ID
+	 * was set in the QM_EVTSEL[RMID] field.
+	 */
+	wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
+	rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+	if (msr_val & RMID_VAL_ERROR)
+		return -EIO;
+	if (msr_val & RMID_VAL_UNAVAIL)
+		return -EINVAL;
+
+	*val = msr_val;
+	return 0;
+}
+
 static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
 						 u32 rmid,
 						 enum resctrl_event_id eventid)
@@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
 }
 
 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
-			     u32 unused, u32 rmid,
+			     u32 unused, u32 rmid, int cntr_id,
 			     enum resctrl_event_id eventid)
 {
 	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
@@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
 	if (am) {
 		memset(am, 0, sizeof(*am));
 
-		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
-		/* Record any initial, non-zero count value. */
-		__rmid_read_phys(prmid, eventid, &am->prev_msr);
+		if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
+		    resctrl_is_mbm_event(eventid)) {
+			if (cntr_id < 0)
+				return;
+			__cntr_id_read_phys(cntr_id, &am->prev_msr);
+		} else {
+			prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+			/* Record any initial, non-zero count value. */
+			__rmid_read_phys(prmid, eventid, &am->prev_msr);
+		}
 	}
 }
 
@@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
 }
 
 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
-			   u32 unused, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *ignored)
+			   u32 unused, u32 rmid, int cntr_id,
+			   enum resctrl_event_id eventid, u64 *val, void *ignored)
 {
 	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
 
 	resctrl_arch_rmid_read_context_check();
 
-	prmid = logical_rmid_to_physical_rmid(cpu, rmid);
-	ret = __rmid_read_phys(prmid, eventid, &msr_val);
+	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
+	    resctrl_is_mbm_event(eventid)) {
+		if (cntr_id < 0)
+			return cntr_id;
+		ret = __cntr_id_read_phys(cntr_id, &msr_val);
+	} else {
+		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+		ret = __rmid_read_phys(prmid, eventid, &msr_val);
+	}
+
 	if (ret)
 		return ret;
 
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index a477be9cdb66..72f3dfb5b903 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
 			break;
 
 		entry = __rmid_entry(idx);
-		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+		/*
+		 * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
+		 * Pass dummy value -1.
+		 */
+		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
 					   QOS_L3_OCCUP_EVENT_ID, &val,
 					   arch_mon_ctx)) {
 			rmid_dirty = true;
@@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
 
 static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 {
+	int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
 	int cpu = smp_processor_id();
 	u32 closid = rdtgrp->closid;
 	u32 rmid = rdtgrp->mon.rmid;
@@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	u64 tval = 0;
 
 	if (rr->first) {
-		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
 		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
 		if (m)
 			memset(m, 0, sizeof(struct mbm_state));
@@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 		/* Reading a single domain, must be on a CPU in that domain. */
 		if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
 			return -EINVAL;
-		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
 						 rr->evtid, &tval, rr->arch_mon_ctx);
 		if (rr->err)
 			return rr->err;
@@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
 	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
 		if (d->ci->id != rr->ci->id)
 			continue;
-		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+		cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
+		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
 					     rr->evtid, &tval, rr->arch_mon_ctx);
 		if (!err) {
 			rr->val += tval;
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index f78b6064230c..cd24d1577e0a 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
  *			counter may match traffic of both @closid and @rmid, or @rmid
  *			only.
  * @rmid:		rmid of the counter to read.
+ * @cntr_id:		cntr_id to read MBM events with mbm_cntr_assign mode.
  * @eventid:		eventid to read, e.g. L3 occupancy.
  * @val:		result of the counter read in bytes.
  * @arch_mon_ctx:	An architecture specific value from
@@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
  * 0 on success, or -EIO, -EINVAL etc on error.
  */
 int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
-			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
-			   u64 *val, void *arch_mon_ctx);
+			   u32 closid, u32 rmid, int cntr_id,
+			   enum resctrl_event_id eventid, u64 *val,
+			   void *arch_mon_ctx);
 
 /**
  * resctrl_arch_rmid_read_context_check()  - warn about invalid contexts
@@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
  * @closid:	closid that matches the rmid. Depending on the architecture, the
  *		counter may match traffic of both @closid and @rmid, or @rmid only.
  * @rmid:	The rmid whose counter values should be reset.
+ * @cntr_id:	The cntr_id to read MBM events with mbm_cntr_assign mode.
  * @eventid:	The eventid whose counter values should be reset.
  *
  * This can be called from any CPU.
  */
 void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
-			     u32 closid, u32 rmid,
+			     u32 closid, u32 rmid, int cntr_id,
 			     enum resctrl_event_id eventid);
 
 /**
-- 
2.34.1
Re: [PATCH v13 17/27] x86/resctrl: Add the support for reading ABMC counters
Posted by Reinette Chatre 6 months, 3 weeks ago
Hi Babu,

On 5/15/25 3:52 PM, Babu Moger wrote:
> Software can read the assignable counters using the QM_EVTSEL and QM_CTR
> register pair.

Please append with more context on how register pair is used to support the
changes in this patch.

> 
> QM_EVTSEL Register definition:
> =======================================================
> Bits	Mnemonic	Description
> =======================================================
> 63:44	--		Reserved
> 43:32   RMID		Resource Monitoring Identifier
> 31	ExtEvtID	Extended Event Identifier
> 30:8	--		Reserved
> 7:0	EvtID		Event Identifier
> =======================================================
> 
> The contents of a specific counter can be read by setting the following
> fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
> and setting [RMID] to the desired counter ID. Reading QM_CTR will then
> return the contents of the specified counter. The E bit will be set if the
> counter configuration was invalid, or if an invalid counter ID was set
> in the QM_EVTSEL[RMID] field.

Please rewrite above in imperative tone.

> 
> Introduce __cntr_id_read_phys() to read the counter ID event data.
> 
> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
> v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
>      functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
>      Added new function __cntr_id_read_phys() to handle ABMC event reading.
>      Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
>      Resolved conflicts caused by the recent FS/ARCH code restructure.
>      The monitor.c file has now been split between the FS and ARCH directories.
> 
> v12: New patch to support extended event mode when ABMC is enabled.
> ---
>  arch/x86/kernel/cpu/resctrl/internal.h |  6 +++
>  arch/x86/kernel/cpu/resctrl/monitor.c  | 66 ++++++++++++++++++++++----
>  fs/resctrl/monitor.c                   | 14 ++++--
>  include/linux/resctrl.h                |  9 ++--
>  4 files changed, 80 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index db6b0c28ee6b..3b0cdb5520c7 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -40,6 +40,12 @@ struct arch_mbm_state {
>  /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
>  #define ABMC_ENABLE_BIT			0
>  
> +/*
> + * ABMC Qos Event Identifiers.

QoS?

> + */
> +#define ABMC_EXTENDED_EVT_ID		BIT(31)
> +#define ABMC_EVT_ID			1

Please use BIT(0) to be consistent.

> +
>  /**
>   * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
>   *			       a resource for a control function
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index e31084f7babd..36a03dae6d8e 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
>  	return 0;
>  }
>  
> +static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
> +{
> +	u64 msr_val;
> +
> +	/*
> +	 * QM_EVTSEL Register definition:
> +	 * =======================================================
> +	 * Bits    Mnemonic        Description
> +	 * =======================================================
> +	 * 63:44   --              Reserved
> +	 * 43:32   RMID            Resource Monitoring Identifier
> +	 * 31      ExtEvtID        Extended Event Identifier
> +	 * 30:8    --              Reserved
> +	 * 7:0     EvtID           Event Identifier
> +	 * =======================================================
> +	 * The contents of a specific counter can be read by setting the
> +	 * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
> +	 * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
> +	 * desired counter ID. Reading QM_CTR will then return the
> +	 * contents of the specified counter. The E bit will be set if the
> +	 * counter configuration was invalid, or if an invalid counter ID
> +	 * was set in the QM_EVTSEL[RMID] field.
> +	 */
> +	wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
> +	rdmsrl(MSR_IA32_QM_CTR, msr_val);
> +
> +	if (msr_val & RMID_VAL_ERROR)
> +		return -EIO;
> +	if (msr_val & RMID_VAL_UNAVAIL)
> +		return -EINVAL;
> +
> +	*val = msr_val;
> +	return 0;
> +}
> +
>  static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
>  						 u32 rmid,
>  						 enum resctrl_event_id eventid)
> @@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
>  }
>  
>  void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> -			     u32 unused, u32 rmid,
> +			     u32 unused, u32 rmid, int cntr_id,
>  			     enum resctrl_event_id eventid)
>  {
>  	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
> @@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>  	if (am) {
>  		memset(am, 0, sizeof(*am));
>  
> -		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> -		/* Record any initial, non-zero count value. */
> -		__rmid_read_phys(prmid, eventid, &am->prev_msr);
> +		if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
> +		    resctrl_is_mbm_event(eventid)) {
> +			if (cntr_id < 0)

This would be a bug, no? how about WARN_ON_ONCE()?

> +				return;
> +			__cntr_id_read_phys(cntr_id, &am->prev_msr);
> +		} else {
> +			prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> +			/* Record any initial, non-zero count value. */
> +			__rmid_read_phys(prmid, eventid, &am->prev_msr);
> +		}
>  	}
>  }
>  
> @@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
>  }
>  
>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
> -			   u32 unused, u32 rmid, enum resctrl_event_id eventid,
> -			   u64 *val, void *ignored)
> +			   u32 unused, u32 rmid, int cntr_id,
> +			   enum resctrl_event_id eventid, u64 *val, void *ignored)
>  {
>  	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> @@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>  
>  	resctrl_arch_rmid_read_context_check();
>  
> -	prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> -	ret = __rmid_read_phys(prmid, eventid, &msr_val);
> +	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
> +	    resctrl_is_mbm_event(eventid)) {
> +		if (cntr_id < 0)

WARN_ON_ONCE()?

> +			return cntr_id;
> +		ret = __cntr_id_read_phys(cntr_id, &msr_val);
> +	} else {
> +		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> +		ret = __rmid_read_phys(prmid, eventid, &msr_val);
> +	}
> +
>  	if (ret)
>  		return ret;
>  
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index a477be9cdb66..72f3dfb5b903 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
>  			break;
>  
>  		entry = __rmid_entry(idx);
> -		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> +		/*
> +		 * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
> +		 * Pass dummy value -1.
> +		 */
> +		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
>  					   QOS_L3_OCCUP_EVENT_ID, &val,
>  					   arch_mon_ctx)) {
>  			rmid_dirty = true;
> @@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>  
>  static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>  {
> +	int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);

So mbm_cntr_get() is called on *all* events (even non MBM) whether assignable counters
are supported or not. I assume it relies on num_mbm_cntrs to be zero on non-ABMC systems
but I think this needs to be explicit that mbm_cntr_get() returns -ENOENT in these cases.
Any developer attempting to modify mbm_cntr_get() needs to be aware of this usage.

This is quite subtle that resctrl_arch_reset_rmid() and resctrl_arch_rmid_read()
can be called with a negative counter ID. To help with code health this needs to
be highlighted (more later). 

>  	int cpu = smp_processor_id();
>  	u32 closid = rdtgrp->closid;
>  	u32 rmid = rdtgrp->mon.rmid;
> @@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>  	u64 tval = 0;
>  
>  	if (rr->first) {
> -		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
> +		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
>  		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
>  		if (m)
>  			memset(m, 0, sizeof(struct mbm_state));
> @@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>  		/* Reading a single domain, must be on a CPU in that domain. */
>  		if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
>  			return -EINVAL;
> -		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
> +		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
>  						 rr->evtid, &tval, rr->arch_mon_ctx);
>  		if (rr->err)
>  			return rr->err;
> @@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>  	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>  		if (d->ci->id != rr->ci->id)
>  			continue;
> -		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
> +		cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
> +		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
>  					     rr->evtid, &tval, rr->arch_mon_ctx);
>  		if (!err) {
>  			rr->val += tval;
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index f78b6064230c..cd24d1577e0a 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
>   *			counter may match traffic of both @closid and @rmid, or @rmid
>   *			only.
>   * @rmid:		rmid of the counter to read.
> + * @cntr_id:		cntr_id to read MBM events with mbm_cntr_assign mode.

"Counter ID used to read MBM events in mbm_cntr_evt_assign mode. Only valid when
 mbm_cntr_evt_assign mode is enabled and @eventid is an MBM event. Can be negative
 when invalid." (Please feel free to improve)

>   * @eventid:		eventid to read, e.g. L3 occupancy.
>   * @val:		result of the counter read in bytes.
>   * @arch_mon_ctx:	An architecture specific value from
> @@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
>   * 0 on success, or -EIO, -EINVAL etc on error.
>   */
>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
> -			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
> -			   u64 *val, void *arch_mon_ctx);
> +			   u32 closid, u32 rmid, int cntr_id,
> +			   enum resctrl_event_id eventid, u64 *val,
> +			   void *arch_mon_ctx);
>  
>  /**
>   * resctrl_arch_rmid_read_context_check()  - warn about invalid contexts
> @@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
>   * @closid:	closid that matches the rmid. Depending on the architecture, the
>   *		counter may match traffic of both @closid and @rmid, or @rmid only.
>   * @rmid:	The rmid whose counter values should be reset.
> + * @cntr_id:	The cntr_id to read MBM events with mbm_cntr_assign mode.

Same as above.

>   * @eventid:	The eventid whose counter values should be reset.
>   *
>   * This can be called from any CPU.
>   */
>  void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> -			     u32 closid, u32 rmid,
> +			     u32 closid, u32 rmid, int cntr_id,
>  			     enum resctrl_event_id eventid);
>  
>  /**

Reinette
Re: [PATCH v13 17/27] x86/resctrl: Add the support for reading ABMC counters
Posted by Moger, Babu 6 months, 2 weeks ago
Hi Reinette,

On 5/22/25 18:31, Reinette Chatre wrote:
> Hi Babu,
> 
> On 5/15/25 3:52 PM, Babu Moger wrote:
>> Software can read the assignable counters using the QM_EVTSEL and QM_CTR
>> register pair.
> 
> Please append with more context on how register pair is used to support the
> changes in this patch.

Sure.

> 
>>
>> QM_EVTSEL Register definition:
>> =======================================================
>> Bits	Mnemonic	Description
>> =======================================================
>> 63:44	--		Reserved
>> 43:32   RMID		Resource Monitoring Identifier
>> 31	ExtEvtID	Extended Event Identifier
>> 30:8	--		Reserved
>> 7:0	EvtID		Event Identifier
>> =======================================================
>>
>> The contents of a specific counter can be read by setting the following
>> fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
>> and setting [RMID] to the desired counter ID. Reading QM_CTR will then
>> return the contents of the specified counter. The E bit will be set if the
>> counter configuration was invalid, or if an invalid counter ID was set
>> in the QM_EVTSEL[RMID] field.
> 
> Please rewrite above in imperative tone.

Sure.

> 
>>
>> Introduce __cntr_id_read_phys() to read the counter ID event data.
>>
>> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
>> Signed-off-by: Babu Moger <babu.moger@amd.com>
>> ---
>> v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
>>      functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
>>      Added new function __cntr_id_read_phys() to handle ABMC event reading.
>>      Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
>>      Resolved conflicts caused by the recent FS/ARCH code restructure.
>>      The monitor.c file has now been split between the FS and ARCH directories.
>>
>> v12: New patch to support extended event mode when ABMC is enabled.
>> ---
>>  arch/x86/kernel/cpu/resctrl/internal.h |  6 +++
>>  arch/x86/kernel/cpu/resctrl/monitor.c  | 66 ++++++++++++++++++++++----
>>  fs/resctrl/monitor.c                   | 14 ++++--
>>  include/linux/resctrl.h                |  9 ++--
>>  4 files changed, 80 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index db6b0c28ee6b..3b0cdb5520c7 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -40,6 +40,12 @@ struct arch_mbm_state {
>>  /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
>>  #define ABMC_ENABLE_BIT			0
>>  
>> +/*
>> + * ABMC Qos Event Identifiers.
> 
> QoS?

Sure.

> 
>> + */
>> +#define ABMC_EXTENDED_EVT_ID		BIT(31)
>> +#define ABMC_EVT_ID			1
> 
> Please use BIT(0) to be consistent.
> 
Sure.

>> +
>>  /**
>>   * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
>>   *			       a resource for a control function
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index e31084f7babd..36a03dae6d8e 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
>>  	return 0;
>>  }
>>  
>> +static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
>> +{
>> +	u64 msr_val;
>> +
>> +	/*
>> +	 * QM_EVTSEL Register definition:
>> +	 * =======================================================
>> +	 * Bits    Mnemonic        Description
>> +	 * =======================================================
>> +	 * 63:44   --              Reserved
>> +	 * 43:32   RMID            Resource Monitoring Identifier
>> +	 * 31      ExtEvtID        Extended Event Identifier
>> +	 * 30:8    --              Reserved
>> +	 * 7:0     EvtID           Event Identifier
>> +	 * =======================================================
>> +	 * The contents of a specific counter can be read by setting the
>> +	 * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
>> +	 * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
>> +	 * desired counter ID. Reading QM_CTR will then return the
>> +	 * contents of the specified counter. The E bit will be set if the
>> +	 * counter configuration was invalid, or if an invalid counter ID
>> +	 * was set in the QM_EVTSEL[RMID] field.
>> +	 */
>> +	wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
>> +	rdmsrl(MSR_IA32_QM_CTR, msr_val);
>> +
>> +	if (msr_val & RMID_VAL_ERROR)
>> +		return -EIO;
>> +	if (msr_val & RMID_VAL_UNAVAIL)
>> +		return -EINVAL;
>> +
>> +	*val = msr_val;
>> +	return 0;
>> +}
>> +
>>  static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
>>  						 u32 rmid,
>>  						 enum resctrl_event_id eventid)
>> @@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
>>  }
>>  
>>  void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> -			     u32 unused, u32 rmid,
>> +			     u32 unused, u32 rmid, int cntr_id,
>>  			     enum resctrl_event_id eventid)
>>  {
>>  	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>> @@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>>  	if (am) {
>>  		memset(am, 0, sizeof(*am));
>>  
>> -		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> -		/* Record any initial, non-zero count value. */
>> -		__rmid_read_phys(prmid, eventid, &am->prev_msr);
>> +		if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> +		    resctrl_is_mbm_event(eventid)) {
>> +			if (cntr_id < 0)
> 
> This would be a bug, no? how about WARN_ON_ONCE()?

Yes. Will do that.

> 
>> +				return;
>> +			__cntr_id_read_phys(cntr_id, &am->prev_msr);
>> +		} else {
>> +			prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> +			/* Record any initial, non-zero count value. */
>> +			__rmid_read_phys(prmid, eventid, &am->prev_msr);
>> +		}
>>  	}
>>  }
>>  
>> @@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
>>  }
>>  
>>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> -			   u32 unused, u32 rmid, enum resctrl_event_id eventid,
>> -			   u64 *val, void *ignored)
>> +			   u32 unused, u32 rmid, int cntr_id,
>> +			   enum resctrl_event_id eventid, u64 *val, void *ignored)
>>  {
>>  	struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>>  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>> @@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>>  
>>  	resctrl_arch_rmid_read_context_check();
>>  
>> -	prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> -	ret = __rmid_read_phys(prmid, eventid, &msr_val);
>> +	if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> +	    resctrl_is_mbm_event(eventid)) {
>> +		if (cntr_id < 0)
> 
> WARN_ON_ONCE()?
> 

Yes.

>> +			return cntr_id;
>> +		ret = __cntr_id_read_phys(cntr_id, &msr_val);
>> +	} else {
>> +		prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> +		ret = __rmid_read_phys(prmid, eventid, &msr_val);
>> +	}
>> +
>>  	if (ret)
>>  		return ret;
>>  
>> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
>> index a477be9cdb66..72f3dfb5b903 100644
>> --- a/fs/resctrl/monitor.c
>> +++ b/fs/resctrl/monitor.c
>> @@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
>>  			break;
>>  
>>  		entry = __rmid_entry(idx);
>> -		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
>> +		/*
>> +		 * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
>> +		 * Pass dummy value -1.
>> +		 */
>> +		if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
>>  					   QOS_L3_OCCUP_EVENT_ID, &val,
>>  					   arch_mon_ctx)) {
>>  			rmid_dirty = true;
>> @@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>>  
>>  static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>>  {
>> +	int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
> 
> So mbm_cntr_get() is called on *all* events (even non MBM) whether assignable counters
> are supported or not. I assume it relies on num_mbm_cntrs to be zero on non-ABMC systems
> but I think this needs to be explicit that mbm_cntr_get() returns -ENOENT in these cases.
> Any developer attempting to modify mbm_cntr_get() needs to be aware of this usage.
> 

Yes. Good point.

> This is quite subtle that resctrl_arch_reset_rmid() and resctrl_arch_rmid_read()
> can be called with a negative counter ID. To help with code health this needs to
> be highlighted (more later). 

Sure.

> 
>>  	int cpu = smp_processor_id();
>>  	u32 closid = rdtgrp->closid;
>>  	u32 rmid = rdtgrp->mon.rmid;
>> @@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>>  	u64 tval = 0;
>>  
>>  	if (rr->first) {
>> -		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
>> +		resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
>>  		m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
>>  		if (m)
>>  			memset(m, 0, sizeof(struct mbm_state));
>> @@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>>  		/* Reading a single domain, must be on a CPU in that domain. */
>>  		if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
>>  			return -EINVAL;
>> -		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
>> +		rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
>>  						 rr->evtid, &tval, rr->arch_mon_ctx);
>>  		if (rr->err)
>>  			return rr->err;
>> @@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>>  	list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>>  		if (d->ci->id != rr->ci->id)
>>  			continue;
>> -		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
>> +		cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
>> +		err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
>>  					     rr->evtid, &tval, rr->arch_mon_ctx);
>>  		if (!err) {
>>  			rr->val += tval;
>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index f78b6064230c..cd24d1577e0a 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
>>   *			counter may match traffic of both @closid and @rmid, or @rmid
>>   *			only.
>>   * @rmid:		rmid of the counter to read.
>> + * @cntr_id:		cntr_id to read MBM events with mbm_cntr_assign mode.
> 
> "Counter ID used to read MBM events in mbm_cntr_evt_assign mode. Only valid when
>  mbm_cntr_evt_assign mode is enabled and @eventid is an MBM event. Can be negative
>  when invalid." (Please feel free to improve)

Looks good.

> 
>>   * @eventid:		eventid to read, e.g. L3 occupancy.
>>   * @val:		result of the counter read in bytes.
>>   * @arch_mon_ctx:	An architecture specific value from
>> @@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
>>   * 0 on success, or -EIO, -EINVAL etc on error.
>>   */
>>  int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> -			   u32 closid, u32 rmid, enum resctrl_event_id eventid,
>> -			   u64 *val, void *arch_mon_ctx);
>> +			   u32 closid, u32 rmid, int cntr_id,
>> +			   enum resctrl_event_id eventid, u64 *val,
>> +			   void *arch_mon_ctx);
>>  
>>  /**
>>   * resctrl_arch_rmid_read_context_check()  - warn about invalid contexts
>> @@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
>>   * @closid:	closid that matches the rmid. Depending on the architecture, the
>>   *		counter may match traffic of both @closid and @rmid, or @rmid only.
>>   * @rmid:	The rmid whose counter values should be reset.
>> + * @cntr_id:	The cntr_id to read MBM events with mbm_cntr_assign mode.
> 
> Same as above.
> 

Sure.

>>   * @eventid:	The eventid whose counter values should be reset.
>>   *
>>   * This can be called from any CPU.
>>   */
>>  void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> -			     u32 closid, u32 rmid,
>> +			     u32 closid, u32 rmid, int cntr_id,
>>  			     enum resctrl_event_id eventid);
>>  
>>  /**
> 
> Reinette
> 

-- 
Thanks
Babu Moger