Software can read the assignable counters using the QM_EVTSEL and QM_CTR
register pair.
QM_EVTSEL Register definition:
=======================================================
Bits Mnemonic Description
=======================================================
63:44 -- Reserved
43:32 RMID Resource Monitoring Identifier
31 ExtEvtID Extended Event Identifier
30:8 -- Reserved
7:0 EvtID Event Identifier
=======================================================
The contents of a specific counter can be read by setting the following
fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
and setting [RMID] to the desired counter ID. Reading QM_CTR will then
return the contents of the specified counter. The E bit will be set if the
counter configuration was invalid, or if an invalid counter ID was set
in the QM_EVTSEL[RMID] field.
Introduce __cntr_id_read_phys() to read the counter ID event data.
Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
Signed-off-by: Babu Moger <babu.moger@amd.com>
---
v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
Added new function __cntr_id_read_phys() to handle ABMC event reading.
Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
Resolved conflicts caused by the recent FS/ARCH code restructure.
The monitor.c file has now been split between the FS and ARCH directories.
v12: New patch to support extended event mode when ABMC is enabled.
---
arch/x86/kernel/cpu/resctrl/internal.h | 6 +++
arch/x86/kernel/cpu/resctrl/monitor.c | 66 ++++++++++++++++++++++----
fs/resctrl/monitor.c | 14 ++++--
include/linux/resctrl.h | 9 ++--
4 files changed, 80 insertions(+), 15 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index db6b0c28ee6b..3b0cdb5520c7 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -40,6 +40,12 @@ struct arch_mbm_state {
/* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
#define ABMC_ENABLE_BIT 0
+/*
+ * ABMC Qos Event Identifiers.
+ */
+#define ABMC_EXTENDED_EVT_ID BIT(31)
+#define ABMC_EVT_ID 1
+
/**
* struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
* a resource for a control function
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index e31084f7babd..36a03dae6d8e 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
return 0;
}
+static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
+{
+ u64 msr_val;
+
+ /*
+ * QM_EVTSEL Register definition:
+ * =======================================================
+ * Bits Mnemonic Description
+ * =======================================================
+ * 63:44 -- Reserved
+ * 43:32 RMID Resource Monitoring Identifier
+ * 31 ExtEvtID Extended Event Identifier
+ * 30:8 -- Reserved
+ * 7:0 EvtID Event Identifier
+ * =======================================================
+ * The contents of a specific counter can be read by setting the
+ * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
+ * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
+ * desired counter ID. Reading QM_CTR will then return the
+ * contents of the specified counter. The E bit will be set if the
+ * counter configuration was invalid, or if an invalid counter ID
+ * was set in the QM_EVTSEL[RMID] field.
+ */
+ wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
+ rdmsrl(MSR_IA32_QM_CTR, msr_val);
+
+ if (msr_val & RMID_VAL_ERROR)
+ return -EIO;
+ if (msr_val & RMID_VAL_UNAVAIL)
+ return -EINVAL;
+
+ *val = msr_val;
+ return 0;
+}
+
static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
u32 rmid,
enum resctrl_event_id eventid)
@@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
}
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 unused, u32 rmid,
+ u32 unused, u32 rmid, int cntr_id,
enum resctrl_event_id eventid)
{
struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
@@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
if (am) {
memset(am, 0, sizeof(*am));
- prmid = logical_rmid_to_physical_rmid(cpu, rmid);
- /* Record any initial, non-zero count value. */
- __rmid_read_phys(prmid, eventid, &am->prev_msr);
+ if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
+ resctrl_is_mbm_event(eventid)) {
+ if (cntr_id < 0)
+ return;
+ __cntr_id_read_phys(cntr_id, &am->prev_msr);
+ } else {
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+ /* Record any initial, non-zero count value. */
+ __rmid_read_phys(prmid, eventid, &am->prev_msr);
+ }
}
}
@@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
}
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 unused, u32 rmid, enum resctrl_event_id eventid,
- u64 *val, void *ignored)
+ u32 unused, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val, void *ignored)
{
struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
@@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
resctrl_arch_rmid_read_context_check();
- prmid = logical_rmid_to_physical_rmid(cpu, rmid);
- ret = __rmid_read_phys(prmid, eventid, &msr_val);
+ if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
+ resctrl_is_mbm_event(eventid)) {
+ if (cntr_id < 0)
+ return cntr_id;
+ ret = __cntr_id_read_phys(cntr_id, &msr_val);
+ } else {
+ prmid = logical_rmid_to_physical_rmid(cpu, rmid);
+ ret = __rmid_read_phys(prmid, eventid, &msr_val);
+ }
+
if (ret)
return ret;
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index a477be9cdb66..72f3dfb5b903 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
break;
entry = __rmid_entry(idx);
- if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
+ /*
+ * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
+ * Pass dummy value -1.
+ */
+ if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
QOS_L3_OCCUP_EVENT_ID, &val,
arch_mon_ctx)) {
rmid_dirty = true;
@@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
{
+ int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
int cpu = smp_processor_id();
u32 closid = rdtgrp->closid;
u32 rmid = rdtgrp->mon.rmid;
@@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
u64 tval = 0;
if (rr->first) {
- resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
+ resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
if (m)
memset(m, 0, sizeof(struct mbm_state));
@@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
/* Reading a single domain, must be on a CPU in that domain. */
if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
return -EINVAL;
- rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
+ rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
rr->evtid, &tval, rr->arch_mon_ctx);
if (rr->err)
return rr->err;
@@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
if (d->ci->id != rr->ci->id)
continue;
- err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
+ cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
+ err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
rr->evtid, &tval, rr->arch_mon_ctx);
if (!err) {
rr->val += tval;
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index f78b6064230c..cd24d1577e0a 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
* counter may match traffic of both @closid and @rmid, or @rmid
* only.
* @rmid: rmid of the counter to read.
+ * @cntr_id: cntr_id to read MBM events with mbm_cntr_assign mode.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
* @arch_mon_ctx: An architecture specific value from
@@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
* 0 on success, or -EIO, -EINVAL etc on error.
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid, enum resctrl_event_id eventid,
- u64 *val, void *arch_mon_ctx);
+ u32 closid, u32 rmid, int cntr_id,
+ enum resctrl_event_id eventid, u64 *val,
+ void *arch_mon_ctx);
/**
* resctrl_arch_rmid_read_context_check() - warn about invalid contexts
@@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
* @closid: closid that matches the rmid. Depending on the architecture, the
* counter may match traffic of both @closid and @rmid, or @rmid only.
* @rmid: The rmid whose counter values should be reset.
+ * @cntr_id: The cntr_id to read MBM events with mbm_cntr_assign mode.
* @eventid: The eventid whose counter values should be reset.
*
* This can be called from any CPU.
*/
void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
- u32 closid, u32 rmid,
+ u32 closid, u32 rmid, int cntr_id,
enum resctrl_event_id eventid);
/**
--
2.34.1
Hi Babu,
On 5/15/25 3:52 PM, Babu Moger wrote:
> Software can read the assignable counters using the QM_EVTSEL and QM_CTR
> register pair.
Please append with more context on how register pair is used to support the
changes in this patch.
>
> QM_EVTSEL Register definition:
> =======================================================
> Bits Mnemonic Description
> =======================================================
> 63:44 -- Reserved
> 43:32 RMID Resource Monitoring Identifier
> 31 ExtEvtID Extended Event Identifier
> 30:8 -- Reserved
> 7:0 EvtID Event Identifier
> =======================================================
>
> The contents of a specific counter can be read by setting the following
> fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
> and setting [RMID] to the desired counter ID. Reading QM_CTR will then
> return the contents of the specified counter. The E bit will be set if the
> counter configuration was invalid, or if an invalid counter ID was set
> in the QM_EVTSEL[RMID] field.
Please rewrite above in imperative tone.
>
> Introduce __cntr_id_read_phys() to read the counter ID event data.
>
> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
> v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
> functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
> Added new function __cntr_id_read_phys() to handle ABMC event reading.
> Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
> Resolved conflicts caused by the recent FS/ARCH code restructure.
> The monitor.c file has now been split between the FS and ARCH directories.
>
> v12: New patch to support extended event mode when ABMC is enabled.
> ---
> arch/x86/kernel/cpu/resctrl/internal.h | 6 +++
> arch/x86/kernel/cpu/resctrl/monitor.c | 66 ++++++++++++++++++++++----
> fs/resctrl/monitor.c | 14 ++++--
> include/linux/resctrl.h | 9 ++--
> 4 files changed, 80 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
> index db6b0c28ee6b..3b0cdb5520c7 100644
> --- a/arch/x86/kernel/cpu/resctrl/internal.h
> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
> @@ -40,6 +40,12 @@ struct arch_mbm_state {
> /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
> #define ABMC_ENABLE_BIT 0
>
> +/*
> + * ABMC Qos Event Identifiers.
QoS?
> + */
> +#define ABMC_EXTENDED_EVT_ID BIT(31)
> +#define ABMC_EVT_ID 1
Please use BIT(0) to be consistent.
> +
> /**
> * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
> * a resource for a control function
> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
> index e31084f7babd..36a03dae6d8e 100644
> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
> @@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
> return 0;
> }
>
> +static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
> +{
> + u64 msr_val;
> +
> + /*
> + * QM_EVTSEL Register definition:
> + * =======================================================
> + * Bits Mnemonic Description
> + * =======================================================
> + * 63:44 -- Reserved
> + * 43:32 RMID Resource Monitoring Identifier
> + * 31 ExtEvtID Extended Event Identifier
> + * 30:8 -- Reserved
> + * 7:0 EvtID Event Identifier
> + * =======================================================
> + * The contents of a specific counter can be read by setting the
> + * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
> + * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
> + * desired counter ID. Reading QM_CTR will then return the
> + * contents of the specified counter. The E bit will be set if the
> + * counter configuration was invalid, or if an invalid counter ID
> + * was set in the QM_EVTSEL[RMID] field.
> + */
> + wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
> + rdmsrl(MSR_IA32_QM_CTR, msr_val);
> +
> + if (msr_val & RMID_VAL_ERROR)
> + return -EIO;
> + if (msr_val & RMID_VAL_UNAVAIL)
> + return -EINVAL;
> +
> + *val = msr_val;
> + return 0;
> +}
> +
> static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
> u32 rmid,
> enum resctrl_event_id eventid)
> @@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
> }
>
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> - u32 unused, u32 rmid,
> + u32 unused, u32 rmid, int cntr_id,
> enum resctrl_event_id eventid)
> {
> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
> @@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> if (am) {
> memset(am, 0, sizeof(*am));
>
> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> - /* Record any initial, non-zero count value. */
> - __rmid_read_phys(prmid, eventid, &am->prev_msr);
> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
> + resctrl_is_mbm_event(eventid)) {
> + if (cntr_id < 0)
This would be a bug, no? how about WARN_ON_ONCE()?
> + return;
> + __cntr_id_read_phys(cntr_id, &am->prev_msr);
> + } else {
> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> + /* Record any initial, non-zero count value. */
> + __rmid_read_phys(prmid, eventid, &am->prev_msr);
> + }
> }
> }
>
> @@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
> }
>
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
> - u32 unused, u32 rmid, enum resctrl_event_id eventid,
> - u64 *val, void *ignored)
> + u32 unused, u32 rmid, int cntr_id,
> + enum resctrl_event_id eventid, u64 *val, void *ignored)
> {
> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
> @@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>
> resctrl_arch_rmid_read_context_check();
>
> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> - ret = __rmid_read_phys(prmid, eventid, &msr_val);
> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
> + resctrl_is_mbm_event(eventid)) {
> + if (cntr_id < 0)
WARN_ON_ONCE()?
> + return cntr_id;
> + ret = __cntr_id_read_phys(cntr_id, &msr_val);
> + } else {
> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
> + ret = __rmid_read_phys(prmid, eventid, &msr_val);
> + }
> +
> if (ret)
> return ret;
>
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index a477be9cdb66..72f3dfb5b903 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
> break;
>
> entry = __rmid_entry(idx);
> - if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
> + /*
> + * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
> + * Pass dummy value -1.
> + */
> + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
> QOS_L3_OCCUP_EVENT_ID, &val,
> arch_mon_ctx)) {
> rmid_dirty = true;
> @@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>
> static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
> {
> + int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
So mbm_cntr_get() is called on *all* events (even non MBM) whether assignable counters
are supported or not. I assume it relies on num_mbm_cntrs to be zero on non-ABMC systems
but I think this needs to be explicit that mbm_cntr_get() returns -ENOENT in these cases.
Any developer attempting to modify mbm_cntr_get() needs to be aware of this usage.
This is quite subtle that resctrl_arch_reset_rmid() and resctrl_arch_rmid_read()
can be called with a negative counter ID. To help with code health this needs to
be highlighted (more later).
> int cpu = smp_processor_id();
> u32 closid = rdtgrp->closid;
> u32 rmid = rdtgrp->mon.rmid;
> @@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
> u64 tval = 0;
>
> if (rr->first) {
> - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
> + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
> m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
> if (m)
> memset(m, 0, sizeof(struct mbm_state));
> @@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
> /* Reading a single domain, must be on a CPU in that domain. */
> if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
> return -EINVAL;
> - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
> + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
> rr->evtid, &tval, rr->arch_mon_ctx);
> if (rr->err)
> return rr->err;
> @@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
> list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
> if (d->ci->id != rr->ci->id)
> continue;
> - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
> + cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
> + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
> rr->evtid, &tval, rr->arch_mon_ctx);
> if (!err) {
> rr->val += tval;
> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
> index f78b6064230c..cd24d1577e0a 100644
> --- a/include/linux/resctrl.h
> +++ b/include/linux/resctrl.h
> @@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
> * counter may match traffic of both @closid and @rmid, or @rmid
> * only.
> * @rmid: rmid of the counter to read.
> + * @cntr_id: cntr_id to read MBM events with mbm_cntr_assign mode.
"Counter ID used to read MBM events in mbm_cntr_evt_assign mode. Only valid when
mbm_cntr_evt_assign mode is enabled and @eventid is an MBM event. Can be negative
when invalid." (Please feel free to improve)
> * @eventid: eventid to read, e.g. L3 occupancy.
> * @val: result of the counter read in bytes.
> * @arch_mon_ctx: An architecture specific value from
> @@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
> * 0 on success, or -EIO, -EINVAL etc on error.
> */
> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
> - u32 closid, u32 rmid, enum resctrl_event_id eventid,
> - u64 *val, void *arch_mon_ctx);
> + u32 closid, u32 rmid, int cntr_id,
> + enum resctrl_event_id eventid, u64 *val,
> + void *arch_mon_ctx);
>
> /**
> * resctrl_arch_rmid_read_context_check() - warn about invalid contexts
> @@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
> * @closid: closid that matches the rmid. Depending on the architecture, the
> * counter may match traffic of both @closid and @rmid, or @rmid only.
> * @rmid: The rmid whose counter values should be reset.
> + * @cntr_id: The cntr_id to read MBM events with mbm_cntr_assign mode.
Same as above.
> * @eventid: The eventid whose counter values should be reset.
> *
> * This can be called from any CPU.
> */
> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
> - u32 closid, u32 rmid,
> + u32 closid, u32 rmid, int cntr_id,
> enum resctrl_event_id eventid);
>
> /**
Reinette
Hi Reinette,
On 5/22/25 18:31, Reinette Chatre wrote:
> Hi Babu,
>
> On 5/15/25 3:52 PM, Babu Moger wrote:
>> Software can read the assignable counters using the QM_EVTSEL and QM_CTR
>> register pair.
>
> Please append with more context on how register pair is used to support the
> changes in this patch.
Sure.
>
>>
>> QM_EVTSEL Register definition:
>> =======================================================
>> Bits Mnemonic Description
>> =======================================================
>> 63:44 -- Reserved
>> 43:32 RMID Resource Monitoring Identifier
>> 31 ExtEvtID Extended Event Identifier
>> 30:8 -- Reserved
>> 7:0 EvtID Event Identifier
>> =======================================================
>>
>> The contents of a specific counter can be read by setting the following
>> fields in QM_EVTSEL.ExtendedEvtID = 1, QM_EVTSEL.EvtID = L3CacheABMC (=1)
>> and setting [RMID] to the desired counter ID. Reading QM_CTR will then
>> return the contents of the specified counter. The E bit will be set if the
>> counter configuration was invalid, or if an invalid counter ID was set
>> in the QM_EVTSEL[RMID] field.
>
> Please rewrite above in imperative tone.
Sure.
>
>>
>> Introduce __cntr_id_read_phys() to read the counter ID event data.
>>
>> Link: https://www.amd.com/content/dam/amd/en/documents/processor-tech-docs/programmer-references/40332.pdf
>> Signed-off-by: Babu Moger <babu.moger@amd.com>
>> ---
>> v13: Split the patch into 2. First one to handle the passing of rdtgroup structure to few
>> functions( __mon_event_count and mbm_update(). Second one to handle ABMC counter reading.
>> Added new function __cntr_id_read_phys() to handle ABMC event reading.
>> Updated kernel doc for resctrl_arch_reset_rmid() and resctrl_arch_rmid_read().
>> Resolved conflicts caused by the recent FS/ARCH code restructure.
>> The monitor.c file has now been split between the FS and ARCH directories.
>>
>> v12: New patch to support extended event mode when ABMC is enabled.
>> ---
>> arch/x86/kernel/cpu/resctrl/internal.h | 6 +++
>> arch/x86/kernel/cpu/resctrl/monitor.c | 66 ++++++++++++++++++++++----
>> fs/resctrl/monitor.c | 14 ++++--
>> include/linux/resctrl.h | 9 ++--
>> 4 files changed, 80 insertions(+), 15 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
>> index db6b0c28ee6b..3b0cdb5520c7 100644
>> --- a/arch/x86/kernel/cpu/resctrl/internal.h
>> +++ b/arch/x86/kernel/cpu/resctrl/internal.h
>> @@ -40,6 +40,12 @@ struct arch_mbm_state {
>> /* Setting bit 0 in L3_QOS_EXT_CFG enables the ABMC feature. */
>> #define ABMC_ENABLE_BIT 0
>>
>> +/*
>> + * ABMC Qos Event Identifiers.
>
> QoS?
Sure.
>
>> + */
>> +#define ABMC_EXTENDED_EVT_ID BIT(31)
>> +#define ABMC_EVT_ID 1
>
> Please use BIT(0) to be consistent.
>
Sure.
>> +
>> /**
>> * struct rdt_hw_ctrl_domain - Arch private attributes of a set of CPUs that share
>> * a resource for a control function
>> diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
>> index e31084f7babd..36a03dae6d8e 100644
>> --- a/arch/x86/kernel/cpu/resctrl/monitor.c
>> +++ b/arch/x86/kernel/cpu/resctrl/monitor.c
>> @@ -161,6 +161,41 @@ static int __rmid_read_phys(u32 prmid, enum resctrl_event_id eventid, u64 *val)
>> return 0;
>> }
>>
>> +static int __cntr_id_read_phys(u32 cntr_id, u64 *val)
>> +{
>> + u64 msr_val;
>> +
>> + /*
>> + * QM_EVTSEL Register definition:
>> + * =======================================================
>> + * Bits Mnemonic Description
>> + * =======================================================
>> + * 63:44 -- Reserved
>> + * 43:32 RMID Resource Monitoring Identifier
>> + * 31 ExtEvtID Extended Event Identifier
>> + * 30:8 -- Reserved
>> + * 7:0 EvtID Event Identifier
>> + * =======================================================
>> + * The contents of a specific counter can be read by setting the
>> + * following fields in QM_EVTSEL.ExtendedEvtID(=1) and
>> + * QM_EVTSEL.EvtID = L3CacheABMC (=1) and setting [RMID] to the
>> + * desired counter ID. Reading QM_CTR will then return the
>> + * contents of the specified counter. The E bit will be set if the
>> + * counter configuration was invalid, or if an invalid counter ID
>> + * was set in the QM_EVTSEL[RMID] field.
>> + */
>> + wrmsr(MSR_IA32_QM_EVTSEL, ABMC_EXTENDED_EVT_ID | ABMC_EVT_ID, cntr_id);
>> + rdmsrl(MSR_IA32_QM_CTR, msr_val);
>> +
>> + if (msr_val & RMID_VAL_ERROR)
>> + return -EIO;
>> + if (msr_val & RMID_VAL_UNAVAIL)
>> + return -EINVAL;
>> +
>> + *val = msr_val;
>> + return 0;
>> +}
>> +
>> static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_dom,
>> u32 rmid,
>> enum resctrl_event_id eventid)
>> @@ -180,7 +215,7 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do
>> }
>>
>> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 unused, u32 rmid,
>> + u32 unused, u32 rmid, int cntr_id,
>> enum resctrl_event_id eventid)
>> {
>> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>> @@ -192,9 +227,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> if (am) {
>> memset(am, 0, sizeof(*am));
>>
>> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> - /* Record any initial, non-zero count value. */
>> - __rmid_read_phys(prmid, eventid, &am->prev_msr);
>> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> + resctrl_is_mbm_event(eventid)) {
>> + if (cntr_id < 0)
>
> This would be a bug, no? how about WARN_ON_ONCE()?
Yes. Will do that.
>
>> + return;
>> + __cntr_id_read_phys(cntr_id, &am->prev_msr);
>> + } else {
>> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> + /* Record any initial, non-zero count value. */
>> + __rmid_read_phys(prmid, eventid, &am->prev_msr);
>> + }
>> }
>> }
>>
>> @@ -224,8 +266,8 @@ static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width)
>> }
>>
>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 unused, u32 rmid, enum resctrl_event_id eventid,
>> - u64 *val, void *ignored)
>> + u32 unused, u32 rmid, int cntr_id,
>> + enum resctrl_event_id eventid, u64 *val, void *ignored)
>> {
>> struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d);
>> struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
>> @@ -237,8 +279,16 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>>
>> resctrl_arch_rmid_read_context_check();
>>
>> - prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> - ret = __rmid_read_phys(prmid, eventid, &msr_val);
>> + if (resctrl_arch_mbm_cntr_assign_enabled(r) &&
>> + resctrl_is_mbm_event(eventid)) {
>> + if (cntr_id < 0)
>
> WARN_ON_ONCE()?
>
Yes.
>> + return cntr_id;
>> + ret = __cntr_id_read_phys(cntr_id, &msr_val);
>> + } else {
>> + prmid = logical_rmid_to_physical_rmid(cpu, rmid);
>> + ret = __rmid_read_phys(prmid, eventid, &msr_val);
>> + }
>> +
>> if (ret)
>> return ret;
>>
>> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
>> index a477be9cdb66..72f3dfb5b903 100644
>> --- a/fs/resctrl/monitor.c
>> +++ b/fs/resctrl/monitor.c
>> @@ -159,7 +159,11 @@ void __check_limbo(struct rdt_mon_domain *d, bool force_free)
>> break;
>>
>> entry = __rmid_entry(idx);
>> - if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
>> + /*
>> + * cntr_id is not relevant for QOS_L3_OCCUP_EVENT_ID.
>> + * Pass dummy value -1.
>> + */
>> + if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid, -1,
>> QOS_L3_OCCUP_EVENT_ID, &val,
>> arch_mon_ctx)) {
>> rmid_dirty = true;
>> @@ -359,6 +363,7 @@ static struct mbm_state *get_mbm_state(struct rdt_mon_domain *d, u32 closid,
>>
>> static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>> {
>> + int cntr_id = mbm_cntr_get(rr->r, rr->d, rdtgrp, rr->evtid);
>
> So mbm_cntr_get() is called on *all* events (even non MBM) whether assignable counters
> are supported or not. I assume it relies on num_mbm_cntrs to be zero on non-ABMC systems
> but I think this needs to be explicit that mbm_cntr_get() returns -ENOENT in these cases.
> Any developer attempting to modify mbm_cntr_get() needs to be aware of this usage.
>
Yes. Good point.
> This is quite subtle that resctrl_arch_reset_rmid() and resctrl_arch_rmid_read()
> can be called with a negative counter ID. To help with code health this needs to
> be highlighted (more later).
Sure.
>
>> int cpu = smp_processor_id();
>> u32 closid = rdtgrp->closid;
>> u32 rmid = rdtgrp->mon.rmid;
>> @@ -368,7 +373,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>> u64 tval = 0;
>>
>> if (rr->first) {
>> - resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);
>> + resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, cntr_id, rr->evtid);
>> m = get_mbm_state(rr->d, closid, rmid, rr->evtid);
>> if (m)
>> memset(m, 0, sizeof(struct mbm_state));
>> @@ -379,7 +384,7 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>> /* Reading a single domain, must be on a CPU in that domain. */
>> if (!cpumask_test_cpu(cpu, &rr->d->hdr.cpu_mask))
>> return -EINVAL;
>> - rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid,
>> + rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, cntr_id,
>> rr->evtid, &tval, rr->arch_mon_ctx);
>> if (rr->err)
>> return rr->err;
>> @@ -404,7 +409,8 @@ static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
>> list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
>> if (d->ci->id != rr->ci->id)
>> continue;
>> - err = resctrl_arch_rmid_read(rr->r, d, closid, rmid,
>> + cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evtid);
>> + err = resctrl_arch_rmid_read(rr->r, d, closid, rmid, cntr_id,
>> rr->evtid, &tval, rr->arch_mon_ctx);
>> if (!err) {
>> rr->val += tval;
>> diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
>> index f78b6064230c..cd24d1577e0a 100644
>> --- a/include/linux/resctrl.h
>> +++ b/include/linux/resctrl.h
>> @@ -473,6 +473,7 @@ void resctrl_offline_cpu(unsigned int cpu);
>> * counter may match traffic of both @closid and @rmid, or @rmid
>> * only.
>> * @rmid: rmid of the counter to read.
>> + * @cntr_id: cntr_id to read MBM events with mbm_cntr_assign mode.
>
> "Counter ID used to read MBM events in mbm_cntr_evt_assign mode. Only valid when
> mbm_cntr_evt_assign mode is enabled and @eventid is an MBM event. Can be negative
> when invalid." (Please feel free to improve)
Looks good.
>
>> * @eventid: eventid to read, e.g. L3 occupancy.
>> * @val: result of the counter read in bytes.
>> * @arch_mon_ctx: An architecture specific value from
>> @@ -490,8 +491,9 @@ void resctrl_offline_cpu(unsigned int cpu);
>> * 0 on success, or -EIO, -EINVAL etc on error.
>> */
>> int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid, enum resctrl_event_id eventid,
>> - u64 *val, void *arch_mon_ctx);
>> + u32 closid, u32 rmid, int cntr_id,
>> + enum resctrl_event_id eventid, u64 *val,
>> + void *arch_mon_ctx);
>>
>> /**
>> * resctrl_arch_rmid_read_context_check() - warn about invalid contexts
>> @@ -532,12 +534,13 @@ struct rdt_domain_hdr *resctrl_find_domain(struct list_head *h, int id,
>> * @closid: closid that matches the rmid. Depending on the architecture, the
>> * counter may match traffic of both @closid and @rmid, or @rmid only.
>> * @rmid: The rmid whose counter values should be reset.
>> + * @cntr_id: The cntr_id to read MBM events with mbm_cntr_assign mode.
>
> Same as above.
>
Sure.
>> * @eventid: The eventid whose counter values should be reset.
>> *
>> * This can be called from any CPU.
>> */
>> void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d,
>> - u32 closid, u32 rmid,
>> + u32 closid, u32 rmid, int cntr_id,
>> enum resctrl_event_id eventid);
>>
>> /**
>
> Reinette
>
--
Thanks
Babu Moger
© 2016 - 2025 Red Hat, Inc.