[PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported

James Morse posted 29 patches 3 months, 3 weeks ago
There is a newer version of this series
[PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by James Morse 3 months, 3 weeks ago
From: Rohit Mathew <rohit.mathew@arm.com>

Now that the larger counter sizes are probed, make use of them.

Callers of mpam_msmon_read() may not know (or care!) about the different
counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
driver pick the counter to use.

Only 32bit accesses to the MSC are required to be supported by the
spec, but these registers are 64bits. The lower half may overflow
into the higher half between two 32bit reads. To avoid this, use
a helper that reads the top half multiple times to check for overflow.

Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
[morse: merged multiple patches from Rohit, added explicit counter selection ]
Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Ben Horgan <ben.horgan@arm.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
Tested-by: Fenghua Yu <fenghuay@nvidia.com>
---
Changes since v2:
 * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
   selection.
 * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
   the driver pick a supported counter size.
 * Rephrased commit message.

Changes since v1:
 * Only clear OFLOW_STATUS_L on MBWU counters.

Changes since RFC:
 * Commit message wrangling.
 * Refer to 31 bit counters as opposed to 32 bit (registers).
---
 drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
 1 file changed, 116 insertions(+), 18 deletions(-)

diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
index f4d07234ce10..c207a6d2832c 100644
--- a/drivers/resctrl/mpam_devices.c
+++ b/drivers/resctrl/mpam_devices.c
@@ -897,6 +897,48 @@ struct mon_read {
 	int				err;
 };
 
+static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
+{
+	return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
+		mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
+}
+
+static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
+{
+	int retry = 3;
+	u32 mbwu_l_low;
+	u64 mbwu_l_high1, mbwu_l_high2;
+
+	mpam_mon_sel_lock_held(msc);
+
+	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
+	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
+
+	mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
+	do {
+		mbwu_l_high1 = mbwu_l_high2;
+		mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
+		mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
+
+		retry--;
+	} while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
+
+	if (mbwu_l_high1 == mbwu_l_high2)
+		return (mbwu_l_high1 << 32) | mbwu_l_low;
+	return MSMON___NRDY_L;
+}
+
+static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
+{
+	mpam_mon_sel_lock_held(msc);
+
+	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
+	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
+
+	__mpam_write_reg(msc, MSMON_MBWU_L, 0);
+	__mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
+}
+
 static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
 				   u32 *flt_val)
 {
@@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
 					       ctx->csu_exclude_clean);
 
 		break;
-	case mpam_feat_msmon_mbwu:
+	case mpam_feat_msmon_mbwu_31counter:
+	case mpam_feat_msmon_mbwu_44counter:
+	case mpam_feat_msmon_mbwu_63counter:
 		*ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
 
 		if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
@@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
 		*ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
 		*flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
 		return;
-	case mpam_feat_msmon_mbwu:
+	case mpam_feat_msmon_mbwu_31counter:
+	case mpam_feat_msmon_mbwu_44counter:
+	case mpam_feat_msmon_mbwu_63counter:
 		*ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
 		*flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
 		return;
@@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
 static void clean_msmon_ctl_val(u32 *cur_ctl)
 {
 	*cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
+
+	if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
+		*cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
 }
 
 static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
@@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
 		mpam_write_monsel_reg(msc, CSU, 0);
 		mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
 		break;
-	case mpam_feat_msmon_mbwu:
+	case mpam_feat_msmon_mbwu_44counter:
+	case mpam_feat_msmon_mbwu_63counter:
+		mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
+		fallthrough;
+	case mpam_feat_msmon_mbwu_31counter:
 		mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
 		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
 		mpam_write_monsel_reg(msc, MBWU, 0);
+
 		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
 
 		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
@@ -993,10 +1047,19 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
 	}
 }
 
-static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
+static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
 {
-	/* TODO: scaling, and long counters */
-	return GENMASK_ULL(30, 0);
+	/* TODO: implement scaling counters */
+	switch (type) {
+	case mpam_feat_msmon_mbwu_63counter:
+		return GENMASK_ULL(62, 0);
+	case mpam_feat_msmon_mbwu_44counter:
+		return GENMASK_ULL(43, 0);
+	case mpam_feat_msmon_mbwu_31counter:
+		return GENMASK_ULL(30, 0);
+	default:
+		return 0;
+	}
 }
 
 /* Call with MSC lock held */
@@ -1037,11 +1100,24 @@ static void __ris_msmon_read(void *arg)
 			nrdy = now & MSMON___NRDY;
 		now = FIELD_GET(MSMON___VALUE, now);
 		break;
-	case mpam_feat_msmon_mbwu:
-		now = mpam_read_monsel_reg(msc, MBWU);
-		if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
-			nrdy = now & MSMON___NRDY;
-		now = FIELD_GET(MSMON___VALUE, now);
+	case mpam_feat_msmon_mbwu_31counter:
+	case mpam_feat_msmon_mbwu_44counter:
+	case mpam_feat_msmon_mbwu_63counter:
+		if (m->type != mpam_feat_msmon_mbwu_31counter) {
+			now = mpam_msc_read_mbwu_l(msc);
+			if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
+				nrdy = now & MSMON___NRDY_L;
+
+			if (m->type == mpam_feat_msmon_mbwu_63counter)
+				now = FIELD_GET(MSMON___LWD_VALUE, now);
+			else
+				now = FIELD_GET(MSMON___L_VALUE, now);
+		} else {
+			now = mpam_read_monsel_reg(msc, MBWU);
+			if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
+				nrdy = now & MSMON___NRDY;
+			now = FIELD_GET(MSMON___VALUE, now);
+		}
 
 		if (nrdy)
 			break;
@@ -1050,7 +1126,7 @@ static void __ris_msmon_read(void *arg)
 
 		/* Add any pre-overflow value to the mbwu_state->val */
 		if (mbwu_state->prev_val > now)
-			overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
+			overflow_val = mpam_msmon_overflow_val(m->type) - mbwu_state->prev_val;
 
 		mbwu_state->prev_val = now;
 		mbwu_state->correction += overflow_val;
@@ -1106,13 +1182,26 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg)
 	return any_err;
 }
 
+static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class)
+{
+	struct mpam_props *cprops = &class->props;
+
+	if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops))
+		return mpam_feat_msmon_mbwu_44counter;
+	if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops))
+		return mpam_feat_msmon_mbwu_63counter;
+
+	return mpam_feat_msmon_mbwu_31counter;
+}
+
 int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 		    enum mpam_device_features type, u64 *val)
 {
 	int err;
 	struct mon_read arg;
 	u64 wait_jiffies = 0;
-	struct mpam_props *cprops = &comp->class->props;
+	struct mpam_class *class = comp->class;
+	struct mpam_props *cprops = &class->props;
 
 	might_sleep();
 
@@ -1129,9 +1218,12 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
 	};
 	*val = 0;
 
+	if (type == mpam_feat_msmon_mbwu)
+		type = mpam_msmon_choose_counter(class);
+
 	err = _msmon_read(comp, &arg);
-	if (err == -EBUSY && comp->class->nrdy_usec)
-		wait_jiffies = usecs_to_jiffies(comp->class->nrdy_usec);
+	if (err == -EBUSY && class->nrdy_usec)
+		wait_jiffies = usecs_to_jiffies(class->nrdy_usec);
 
 	while (wait_jiffies)
 		wait_jiffies = schedule_timeout_uninterruptible(wait_jiffies);
@@ -1293,12 +1385,13 @@ static int mpam_restore_mbwu_state(void *_ris)
 	int i;
 	struct mon_read mwbu_arg;
 	struct mpam_msc_ris *ris = _ris;
+	struct mpam_class *class = ris->vmsc->comp->class;
 
 	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
 		if (ris->mbwu_state[i].enabled) {
 			mwbu_arg.ris = ris;
 			mwbu_arg.ctx = &ris->mbwu_state[i].cfg;
-			mwbu_arg.type = mpam_feat_msmon_mbwu;
+			mwbu_arg.type = mpam_msmon_choose_counter(class);
 
 			__ris_msmon_read(&mwbu_arg);
 		}
@@ -1333,8 +1426,13 @@ static int mpam_save_mbwu_state(void *arg)
 		cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
 		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0);
 
-		val = mpam_read_monsel_reg(msc, MBWU);
-		mpam_write_monsel_reg(msc, MBWU, 0);
+		if (mpam_ris_has_mbwu_long_counter(ris)) {
+			val = mpam_msc_read_mbwu_l(msc);
+			mpam_msc_zero_mbwu_l(msc);
+		} else {
+			val = mpam_read_monsel_reg(msc, MBWU);
+			mpam_write_monsel_reg(msc, MBWU, 0);
+		}
 
 		cfg->mon = i;
 		cfg->pmg = FIELD_GET(MSMON_CFG_x_FLT_PMG, cur_flt);
-- 
2.39.5
Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Peter Newman 3 months ago
Hi Ben (and James),

On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
>
> From: Rohit Mathew <rohit.mathew@arm.com>
>
> Now that the larger counter sizes are probed, make use of them.
>
> Callers of mpam_msmon_read() may not know (or care!) about the different
> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
> driver pick the counter to use.
>
> Only 32bit accesses to the MSC are required to be supported by the
> spec, but these registers are 64bits. The lower half may overflow
> into the higher half between two 32bit reads. To avoid this, use
> a helper that reads the top half multiple times to check for overflow.
>
> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
> [morse: merged multiple patches from Rohit, added explicit counter selection ]
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
> ---
> Changes since v2:
>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>    selection.
>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>    the driver pick a supported counter size.
>  * Rephrased commit message.
>
> Changes since v1:
>  * Only clear OFLOW_STATUS_L on MBWU counters.
>
> Changes since RFC:
>  * Commit message wrangling.
>  * Refer to 31 bit counters as opposed to 32 bit (registers).
> ---
>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>  1 file changed, 116 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index f4d07234ce10..c207a6d2832c 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -897,6 +897,48 @@ struct mon_read {
>         int                             err;
>  };
>
> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
> +{
> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
> +}
> +
> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
> +{
> +       int retry = 3;
> +       u32 mbwu_l_low;
> +       u64 mbwu_l_high1, mbwu_l_high2;
> +
> +       mpam_mon_sel_lock_held(msc);
> +
> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +       do {
> +               mbwu_l_high1 = mbwu_l_high2;
> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +
> +               retry--;
> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
> +
> +       if (mbwu_l_high1 == mbwu_l_high2)
> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
> +       return MSMON___NRDY_L;
> +}
> +
> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
> +{
> +       mpam_mon_sel_lock_held(msc);
> +
> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
> +}
> +
>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                                    u32 *flt_val)
>  {
> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                                                ctx->csu_exclude_clean);
>
>                 break;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
>
>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
>                 return;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
>                 return;
> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>  static void clean_msmon_ctl_val(u32 *cur_ctl)
>  {
>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
> +
> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
>  }
>
>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>                 mpam_write_monsel_reg(msc, CSU, 0);
>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>                 break;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
> +               fallthrough;
> +       case mpam_feat_msmon_mbwu_31counter:
>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>                 mpam_write_monsel_reg(msc, MBWU, 0);

The fallthrough above seems to be problematic, assuming the MBWU=0
being last for 31-bit was intentional. For long counters, this is
zeroing the counter before updating the filter/control registers, but
then clearing the 32-bit version of the counter. This fails to clear
the NRDY bit on the long counter, which isn't cleared by software
anywhere else.

From section 10.3.2 from the MPAM spec shared:

 "On a counting monitor, the NRDY bit remains set until it is reset by
software writing it as 0 in the monitor register, or automatically
after the monitor is captured in the capture register by a capture
event"

If I update the 63-bit case to call
mpam_msc_zero_mbwu_l(m->ris->vmsc->msc) after updating the
control/filter registers (in addition to the other items I pointed in
my last reply), I'm able to read MBWU counts from my hardware through
mbm_total_bytes.

Thanks,
-Peter
Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Ben Horgan 3 months ago
Hi Peter,

On 11/6/25 16:15, Peter Newman wrote:
> Hi Ben (and James),
> 
> On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
>>
>> From: Rohit Mathew <rohit.mathew@arm.com>
>>
>> Now that the larger counter sizes are probed, make use of them.
>>
>> Callers of mpam_msmon_read() may not know (or care!) about the different
>> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
>> driver pick the counter to use.
>>
>> Only 32bit accesses to the MSC are required to be supported by the
>> spec, but these registers are 64bits. The lower half may overflow
>> into the higher half between two 32bit reads. To avoid this, use
>> a helper that reads the top half multiple times to check for overflow.
>>
>> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
>> [morse: merged multiple patches from Rohit, added explicit counter selection ]
>> Signed-off-by: James Morse <james.morse@arm.com>
>> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
>> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
>> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
>> ---
>> Changes since v2:
>>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>>    selection.
>>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>>    the driver pick a supported counter size.
>>  * Rephrased commit message.
>>
>> Changes since v1:
>>  * Only clear OFLOW_STATUS_L on MBWU counters.
>>
>> Changes since RFC:
>>  * Commit message wrangling.
>>  * Refer to 31 bit counters as opposed to 32 bit (registers).
>> ---
>>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>>  1 file changed, 116 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
>> index f4d07234ce10..c207a6d2832c 100644
>> --- a/drivers/resctrl/mpam_devices.c
>> +++ b/drivers/resctrl/mpam_devices.c
>> @@ -897,6 +897,48 @@ struct mon_read {
>>         int                             err;
>>  };
>>
>> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
>> +{
>> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
>> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
>> +}
>> +
>> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
>> +{
>> +       int retry = 3;
>> +       u32 mbwu_l_low;
>> +       u64 mbwu_l_high1, mbwu_l_high2;
>> +
>> +       mpam_mon_sel_lock_held(msc);
>> +
>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>> +
>> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>> +       do {
>> +               mbwu_l_high1 = mbwu_l_high2;
>> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
>> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>> +
>> +               retry--;
>> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
>> +
>> +       if (mbwu_l_high1 == mbwu_l_high2)
>> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
>> +       return MSMON___NRDY_L;
>> +}
>> +
>> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
>> +{
>> +       mpam_mon_sel_lock_held(msc);
>> +
>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>> +
>> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
>> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
>> +}
>> +
>>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                                    u32 *flt_val)
>>  {
>> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                                                ctx->csu_exclude_clean);
>>
>>                 break;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
>>
>>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
>> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
>>                 return;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
>>                 return;
>> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>  static void clean_msmon_ctl_val(u32 *cur_ctl)
>>  {
>>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
>> +
>> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
>> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
>>  }
>>
>>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>>                 mpam_write_monsel_reg(msc, CSU, 0);
>>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>>                 break;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
>> +               fallthrough;
>> +       case mpam_feat_msmon_mbwu_31counter:
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>>                 mpam_write_monsel_reg(msc, MBWU, 0);
> 
> The fallthrough above seems to be problematic, assuming the MBWU=0
> being last for 31-bit was intentional. For long counters, this is
> zeroing the counter before updating the filter/control registers, but
> then clearing the 32-bit version of the counter. This fails to clear
> the NRDY bit on the long counter, which isn't cleared by software
> anywhere else.
> 
> From section 10.3.2 from the MPAM spec shared:
> 
>  "On a counting monitor, the NRDY bit remains set until it is reset by
> software writing it as 0 in the monitor register, or automatically
> after the monitor is captured in the capture register by a capture
> event"
> 
> If I update the 63-bit case to call
> mpam_msc_zero_mbwu_l(m->ris->vmsc->msc) after updating the
> control/filter registers (in addition to the other items I pointed in
> my last reply), I'm able to read MBWU counts from my hardware through
> mbm_total_bytes.
> 
> Thanks,
> -Peter

Thanks for the testing and flagging the problem. We should do the
configuration in the same order for all the monitors.

I'll change the case to:

	case mpam_feat_msmon_mbwu_31counter:
	case mpam_feat_msmon_mbwu_44counter:
	case mpam_feat_msmon_mbwu_63counter:
		mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);

		if (m->type == mpam_feat_msmon_mbwu_31counter)
			mpam_write_monsel_reg(msc, MBWU, 0);
		else
			mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);

		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
		break;

Thanks,

Ben

Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Peter Newman 3 months ago
Hi Ben

On Thu, Nov 6, 2025 at 5:41 PM Ben Horgan <ben.horgan@arm.com> wrote:
>
> Hi Peter,
>
> On 11/6/25 16:15, Peter Newman wrote:
> > Hi Ben (and James),
> >
> > On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
> >>
> >> From: Rohit Mathew <rohit.mathew@arm.com>
> >>
> >> Now that the larger counter sizes are probed, make use of them.
> >>
> >> Callers of mpam_msmon_read() may not know (or care!) about the different
> >> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
> >> driver pick the counter to use.
> >>
> >> Only 32bit accesses to the MSC are required to be supported by the
> >> spec, but these registers are 64bits. The lower half may overflow
> >> into the higher half between two 32bit reads. To avoid this, use
> >> a helper that reads the top half multiple times to check for overflow.
> >>
> >> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
> >> [morse: merged multiple patches from Rohit, added explicit counter selection ]
> >> Signed-off-by: James Morse <james.morse@arm.com>
> >> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
> >> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> >> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
> >> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
> >> ---
> >> Changes since v2:
> >>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
> >>    selection.
> >>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
> >>    the driver pick a supported counter size.
> >>  * Rephrased commit message.
> >>
> >> Changes since v1:
> >>  * Only clear OFLOW_STATUS_L on MBWU counters.
> >>
> >> Changes since RFC:
> >>  * Commit message wrangling.
> >>  * Refer to 31 bit counters as opposed to 32 bit (registers).
> >> ---
> >>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
> >>  1 file changed, 116 insertions(+), 18 deletions(-)
> >>
> >> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> >> index f4d07234ce10..c207a6d2832c 100644
> >> --- a/drivers/resctrl/mpam_devices.c
> >> +++ b/drivers/resctrl/mpam_devices.c
> >> @@ -897,6 +897,48 @@ struct mon_read {
> >>         int                             err;
> >>  };
> >>
> >> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
> >> +{
> >> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
> >> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
> >> +}
> >> +
> >> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
> >> +{
> >> +       int retry = 3;
> >> +       u32 mbwu_l_low;
> >> +       u64 mbwu_l_high1, mbwu_l_high2;
> >> +
> >> +       mpam_mon_sel_lock_held(msc);
> >> +
> >> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> >> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> >> +
> >> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> >> +       do {
> >> +               mbwu_l_high1 = mbwu_l_high2;
> >> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
> >> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> >> +
> >> +               retry--;
> >> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
> >> +
> >> +       if (mbwu_l_high1 == mbwu_l_high2)
> >> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
> >> +       return MSMON___NRDY_L;
> >> +}
> >> +
> >> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
> >> +{
> >> +       mpam_mon_sel_lock_held(msc);
> >> +
> >> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> >> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> >> +
> >> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
> >> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
> >> +}
> >> +
> >>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
> >>                                    u32 *flt_val)
> >>  {
> >> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
> >>                                                ctx->csu_exclude_clean);
> >>
> >>                 break;
> >> -       case mpam_feat_msmon_mbwu:
> >> +       case mpam_feat_msmon_mbwu_31counter:
> >> +       case mpam_feat_msmon_mbwu_44counter:
> >> +       case mpam_feat_msmon_mbwu_63counter:
> >>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
> >>
> >>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
> >> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
> >>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
> >>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
> >>                 return;
> >> -       case mpam_feat_msmon_mbwu:
> >> +       case mpam_feat_msmon_mbwu_31counter:
> >> +       case mpam_feat_msmon_mbwu_44counter:
> >> +       case mpam_feat_msmon_mbwu_63counter:
> >>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
> >>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
> >>                 return;
> >> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
> >>  static void clean_msmon_ctl_val(u32 *cur_ctl)
> >>  {
> >>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
> >> +
> >> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
> >> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
> >>  }
> >>
> >>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
> >> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
> >>                 mpam_write_monsel_reg(msc, CSU, 0);
> >>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
> >>                 break;
> >> -       case mpam_feat_msmon_mbwu:
> >> +       case mpam_feat_msmon_mbwu_44counter:
> >> +       case mpam_feat_msmon_mbwu_63counter:
> >> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
> >> +               fallthrough;
> >> +       case mpam_feat_msmon_mbwu_31counter:
> >>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
> >>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
> >>                 mpam_write_monsel_reg(msc, MBWU, 0);
> >
> > The fallthrough above seems to be problematic, assuming the MBWU=0
> > being last for 31-bit was intentional. For long counters, this is
> > zeroing the counter before updating the filter/control registers, but
> > then clearing the 32-bit version of the counter. This fails to clear
> > the NRDY bit on the long counter, which isn't cleared by software
> > anywhere else.
> >
> > From section 10.3.2 from the MPAM spec shared:
> >
> >  "On a counting monitor, the NRDY bit remains set until it is reset by
> > software writing it as 0 in the monitor register, or automatically
> > after the monitor is captured in the capture register by a capture
> > event"
> >
> > If I update the 63-bit case to call
> > mpam_msc_zero_mbwu_l(m->ris->vmsc->msc) after updating the
> > control/filter registers (in addition to the other items I pointed in
> > my last reply), I'm able to read MBWU counts from my hardware through
> > mbm_total_bytes.
> >
> > Thanks,
> > -Peter
>
> Thanks for the testing and flagging the problem. We should do the
> configuration in the same order for all the monitors.
>
> I'll change the case to:
>
>         case mpam_feat_msmon_mbwu_31counter:
>         case mpam_feat_msmon_mbwu_44counter:
>         case mpam_feat_msmon_mbwu_63counter:
>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>
>                 if (m->type == mpam_feat_msmon_mbwu_31counter)
>                         mpam_write_monsel_reg(msc, MBWU, 0);
>                 else
>                         mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
>
>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>                 break;

I tried this out but wasn't able to read the counters. I needed to
move the MBWU[_L] write to the end. Writing the registers directly on
the hardware I'm testing with, I confirmed that just flipping
MBWU_CTL.EN sets NRDY:

MBWU_L=0x880
MBWU_CTL=0x828

 / # mmio_read32 $((msc + MBWU_CTL))
0x80030042
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x03ecb2c0
0x00000000
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x03f70580
0x00000000

Clear MBWU_CTL.EN:

 / # mmio_write32 $((msc + MBWU_CTL)) 0x00030042
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x05004680
0x80000000
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x05004680
0x80000000

Clear NRDY and reenable MBWU_CTL.EN:

 / # mmio_write32 $((msc + MBWU_L)) 0; mmio_write32 $((msc + MBWU_L + 4)) 0
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x00000000
0x00000000
 / # mmio_write32 $((msc + MBWU_CTL)) 0x80030042
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x001dee80
0x80000000

In fact, re-writing the same value back into MBWU_CTL.EN also sets NRDY:

 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x00253e00
0x00000000
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x00b1a6c0
0x00000000
 / # mmio_write32 $((msc + MBWU_CTL)) 0x80030042
 / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
0x018d1d40
0x80000000

Thanks,
-Peter
Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Ben Horgan 3 months ago
Hi Peter,

On 11/7/25 10:30, Peter Newman wrote:
> Hi Ben
> 
> On Thu, Nov 6, 2025 at 5:41 PM Ben Horgan <ben.horgan@arm.com> wrote:
>>
>> Hi Peter,
>>
>> On 11/6/25 16:15, Peter Newman wrote:
>>> Hi Ben (and James),
>>>
>>> On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
>>>>
>>>> From: Rohit Mathew <rohit.mathew@arm.com>
>>>>
>>>> Now that the larger counter sizes are probed, make use of them.
>>>>
>>>> Callers of mpam_msmon_read() may not know (or care!) about the different
>>>> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
>>>> driver pick the counter to use.
>>>>
>>>> Only 32bit accesses to the MSC are required to be supported by the
>>>> spec, but these registers are 64bits. The lower half may overflow
>>>> into the higher half between two 32bit reads. To avoid this, use
>>>> a helper that reads the top half multiple times to check for overflow.
>>>>
>>>> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
>>>> [morse: merged multiple patches from Rohit, added explicit counter selection ]
>>>> Signed-off-by: James Morse <james.morse@arm.com>
>>>> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
>>>> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>>>> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
>>>> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
>>>> ---
>>>> Changes since v2:
>>>>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>>>>    selection.
>>>>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>>>>    the driver pick a supported counter size.
>>>>  * Rephrased commit message.
>>>>
>>>> Changes since v1:
>>>>  * Only clear OFLOW_STATUS_L on MBWU counters.
>>>>
>>>> Changes since RFC:
>>>>  * Commit message wrangling.
>>>>  * Refer to 31 bit counters as opposed to 32 bit (registers).
>>>> ---
>>>>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>>>>  1 file changed, 116 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
>>>> index f4d07234ce10..c207a6d2832c 100644
>>>> --- a/drivers/resctrl/mpam_devices.c
>>>> +++ b/drivers/resctrl/mpam_devices.c
>>>> @@ -897,6 +897,48 @@ struct mon_read {
>>>>         int                             err;
>>>>  };
>>>>
>>>> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
>>>> +{
>>>> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
>>>> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
>>>> +}
>>>> +
>>>> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
>>>> +{
>>>> +       int retry = 3;
>>>> +       u32 mbwu_l_low;
>>>> +       u64 mbwu_l_high1, mbwu_l_high2;
>>>> +
>>>> +       mpam_mon_sel_lock_held(msc);
>>>> +
>>>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>>>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>>>> +
>>>> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>>>> +       do {
>>>> +               mbwu_l_high1 = mbwu_l_high2;
>>>> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
>>>> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>>>> +
>>>> +               retry--;
>>>> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
>>>> +
>>>> +       if (mbwu_l_high1 == mbwu_l_high2)
>>>> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
>>>> +       return MSMON___NRDY_L;
>>>> +}
>>>> +
>>>> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
>>>> +{
>>>> +       mpam_mon_sel_lock_held(msc);
>>>> +
>>>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>>>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>>>> +
>>>> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
>>>> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
>>>> +}
>>>> +
>>>>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>>>                                    u32 *flt_val)
>>>>  {
>>>> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>>>                                                ctx->csu_exclude_clean);
>>>>
>>>>                 break;
>>>> -       case mpam_feat_msmon_mbwu:
>>>> +       case mpam_feat_msmon_mbwu_31counter:
>>>> +       case mpam_feat_msmon_mbwu_44counter:
>>>> +       case mpam_feat_msmon_mbwu_63counter:
>>>>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
>>>>
>>>>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
>>>> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
>>>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
>>>>                 return;
>>>> -       case mpam_feat_msmon_mbwu:
>>>> +       case mpam_feat_msmon_mbwu_31counter:
>>>> +       case mpam_feat_msmon_mbwu_44counter:
>>>> +       case mpam_feat_msmon_mbwu_63counter:
>>>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
>>>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
>>>>                 return;
>>>> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>>>  static void clean_msmon_ctl_val(u32 *cur_ctl)
>>>>  {
>>>>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
>>>> +
>>>> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
>>>> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
>>>>  }
>>>>
>>>>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>>>> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>>>>                 mpam_write_monsel_reg(msc, CSU, 0);
>>>>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>>>>                 break;
>>>> -       case mpam_feat_msmon_mbwu:
>>>> +       case mpam_feat_msmon_mbwu_44counter:
>>>> +       case mpam_feat_msmon_mbwu_63counter:
>>>> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
>>>> +               fallthrough;
>>>> +       case mpam_feat_msmon_mbwu_31counter:
>>>>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>>>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>>>>                 mpam_write_monsel_reg(msc, MBWU, 0);
>>>
>>> The fallthrough above seems to be problematic, assuming the MBWU=0
>>> being last for 31-bit was intentional. For long counters, this is
>>> zeroing the counter before updating the filter/control registers, but
>>> then clearing the 32-bit version of the counter. This fails to clear
>>> the NRDY bit on the long counter, which isn't cleared by software
>>> anywhere else.
>>>
>>> From section 10.3.2 from the MPAM spec shared:
>>>
>>>  "On a counting monitor, the NRDY bit remains set until it is reset by
>>> software writing it as 0 in the monitor register, or automatically
>>> after the monitor is captured in the capture register by a capture
>>> event"
>>>
>>> If I update the 63-bit case to call
>>> mpam_msc_zero_mbwu_l(m->ris->vmsc->msc) after updating the
>>> control/filter registers (in addition to the other items I pointed in
>>> my last reply), I'm able to read MBWU counts from my hardware through
>>> mbm_total_bytes.
>>>
>>> Thanks,
>>> -Peter
>>
>> Thanks for the testing and flagging the problem. We should do the
>> configuration in the same order for all the monitors.
>>
>> I'll change the case to:
>>
>>         case mpam_feat_msmon_mbwu_31counter:
>>         case mpam_feat_msmon_mbwu_44counter:
>>         case mpam_feat_msmon_mbwu_63counter:
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>>
>>                 if (m->type == mpam_feat_msmon_mbwu_31counter)
>>                         mpam_write_monsel_reg(msc, MBWU, 0);
>>                 else
>>                         mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
>>
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>>                 break;
> 
> I tried this out but wasn't able to read the counters. I needed to
> move the MBWU[_L] write to the end. Writing the registers directly on
> the hardware I'm testing with, I confirmed that just flipping
> MBWU_CTL.EN sets NRDY:
> 
> MBWU_L=0x880
> MBWU_CTL=0x828
> 
>  / # mmio_read32 $((msc + MBWU_CTL))
> 0x80030042
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x03ecb2c0
> 0x00000000
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x03f70580
> 0x00000000
> 
> Clear MBWU_CTL.EN:
> 
>  / # mmio_write32 $((msc + MBWU_CTL)) 0x00030042
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x05004680
> 0x80000000
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x05004680
> 0x80000000
> 
> Clear NRDY and reenable MBWU_CTL.EN:
> 
>  / # mmio_write32 $((msc + MBWU_L)) 0; mmio_write32 $((msc + MBWU_L + 4)) 0
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x00000000
> 0x00000000
>  / # mmio_write32 $((msc + MBWU_CTL)) 0x80030042
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x001dee80
> 0x80000000
> 
> In fact, re-writing the same value back into MBWU_CTL.EN also sets NRDY:
> 
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x00253e00
> 0x00000000
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x00b1a6c0
> 0x00000000
>  / # mmio_write32 $((msc + MBWU_CTL)) 0x80030042
>  / # mmio_read32 $((msc + MBWU_L)); mmio_read32 $((msc + MBWU_L + 4))
> 0x018d1d40
> 0x80000000
> 
> Thanks,
> -Peter


Thank you very much for the quick testing and diagnosis. It does seeem
reasonable that the .EN flip would be considered a configuration change
and so indeed the writing NRDY (and the value) should happend after for
counting monitors (mbwu). I'll make this change now.

Thanks,

Ben

Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Peter Newman 3 months ago
Hi James,

On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
>
> From: Rohit Mathew <rohit.mathew@arm.com>
>
> Now that the larger counter sizes are probed, make use of them.
>
> Callers of mpam_msmon_read() may not know (or care!) about the different
> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
> driver pick the counter to use.
>
> Only 32bit accesses to the MSC are required to be supported by the
> spec, but these registers are 64bits. The lower half may overflow
> into the higher half between two 32bit reads. To avoid this, use
> a helper that reads the top half multiple times to check for overflow.
>
> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
> [morse: merged multiple patches from Rohit, added explicit counter selection ]
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
> ---
> Changes since v2:
>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>    selection.
>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>    the driver pick a supported counter size.
>  * Rephrased commit message.
>
> Changes since v1:
>  * Only clear OFLOW_STATUS_L on MBWU counters.
>
> Changes since RFC:
>  * Commit message wrangling.
>  * Refer to 31 bit counters as opposed to 32 bit (registers).
> ---
>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>  1 file changed, 116 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index f4d07234ce10..c207a6d2832c 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -897,6 +897,48 @@ struct mon_read {
>         int                             err;
>  };
>
> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
> +{
> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
> +}
> +
> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
> +{
> +       int retry = 3;
> +       u32 mbwu_l_low;
> +       u64 mbwu_l_high1, mbwu_l_high2;
> +
> +       mpam_mon_sel_lock_held(msc);
> +
> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +       do {
> +               mbwu_l_high1 = mbwu_l_high2;
> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +
> +               retry--;
> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
> +
> +       if (mbwu_l_high1 == mbwu_l_high2)
> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
> +       return MSMON___NRDY_L;
> +}
> +
> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
> +{
> +       mpam_mon_sel_lock_held(msc);
> +
> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
> +}
> +
>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                                    u32 *flt_val)
>  {
> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                                                ctx->csu_exclude_clean);
>
>                 break;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
>
>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
>                 return;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
>                 return;
> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>  static void clean_msmon_ctl_val(u32 *cur_ctl)
>  {
>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
> +
> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
>  }
>
>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>                 mpam_write_monsel_reg(msc, CSU, 0);
>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>                 break;
> -       case mpam_feat_msmon_mbwu:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
> +               fallthrough;
> +       case mpam_feat_msmon_mbwu_31counter:
>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>                 mpam_write_monsel_reg(msc, MBWU, 0);
> +
>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>
>                 mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
> @@ -993,10 +1047,19 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>         }
>  }
>
> -static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
> +static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
>  {
> -       /* TODO: scaling, and long counters */
> -       return GENMASK_ULL(30, 0);
> +       /* TODO: implement scaling counters */
> +       switch (type) {
> +       case mpam_feat_msmon_mbwu_63counter:
> +               return GENMASK_ULL(62, 0);
> +       case mpam_feat_msmon_mbwu_44counter:
> +               return GENMASK_ULL(43, 0);
> +       case mpam_feat_msmon_mbwu_31counter:
> +               return GENMASK_ULL(30, 0);
> +       default:
> +               return 0;
> +       }
>  }
>
>  /* Call with MSC lock held */
> @@ -1037,11 +1100,24 @@ static void __ris_msmon_read(void *arg)
>                         nrdy = now & MSMON___NRDY;
>                 now = FIELD_GET(MSMON___VALUE, now);
>                 break;
> -       case mpam_feat_msmon_mbwu:
> -               now = mpam_read_monsel_reg(msc, MBWU);
> -               if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
> -                       nrdy = now & MSMON___NRDY;
> -               now = FIELD_GET(MSMON___VALUE, now);
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:

Should you check for one of these three features instead of
mpam_feat_msmon_mbwu further up in this function when checking for
reset_on_next_read?

-       if (m->type == mpam_feat_msmon_mbwu) {
+       switch (m->type) {
+       case mpam_feat_msmon_mbwu_31counter:
+       case mpam_feat_msmon_mbwu_44counter:
+       case mpam_feat_msmon_mbwu_63counter:
                mbwu_state = &ris->mbwu_state[ctx->mon];
                if (mbwu_state) {
                        reset_on_next_read = mbwu_state->reset_on_next_read;
                        mbwu_state->reset_on_next_read = false;
                }
+               break;
+       default:
+               break;
        }

> +               if (m->type != mpam_feat_msmon_mbwu_31counter) {
> +                       now = mpam_msc_read_mbwu_l(msc);
> +                       if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
> +                               nrdy = now & MSMON___NRDY_L;
> +
> +                       if (m->type == mpam_feat_msmon_mbwu_63counter)
> +                               now = FIELD_GET(MSMON___LWD_VALUE, now);
> +                       else
> +                               now = FIELD_GET(MSMON___L_VALUE, now);
> +               } else {
> +                       now = mpam_read_monsel_reg(msc, MBWU);
> +                       if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
> +                               nrdy = now & MSMON___NRDY;
> +                       now = FIELD_GET(MSMON___VALUE, now);
> +               }
>
>                 if (nrdy)
>                         break;
> @@ -1050,7 +1126,7 @@ static void __ris_msmon_read(void *arg)
>
>                 /* Add any pre-overflow value to the mbwu_state->val */
>                 if (mbwu_state->prev_val > now)
> -                       overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
> +                       overflow_val = mpam_msmon_overflow_val(m->type) - mbwu_state->prev_val;
>
>                 mbwu_state->prev_val = now;
>                 mbwu_state->correction += overflow_val;
> @@ -1106,13 +1182,26 @@ static int _msmon_read(struct mpam_component *comp, struct mon_read *arg)
>         return any_err;
>  }
>
> +static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class)
> +{
> +       struct mpam_props *cprops = &class->props;
> +
> +       if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops))
> +               return mpam_feat_msmon_mbwu_44counter;
> +       if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops))
> +               return mpam_feat_msmon_mbwu_63counter;
> +
> +       return mpam_feat_msmon_mbwu_31counter;
> +}
> +
>  int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
>                     enum mpam_device_features type, u64 *val)
>  {
>         int err;
>         struct mon_read arg;
>         u64 wait_jiffies = 0;
> -       struct mpam_props *cprops = &comp->class->props;
> +       struct mpam_class *class = comp->class;
> +       struct mpam_props *cprops = &class->props;
>
>         might_sleep();
>
> @@ -1129,9 +1218,12 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
>         };
>         *val = 0;
>
> +       if (type == mpam_feat_msmon_mbwu)
> +               type = mpam_msmon_choose_counter(class);

`type` was already recorded in arg->type, so the result of this lookup
will be ignored on the first call to _msmon_read()

If mpam_feat_msmon_mbwu can somehow still result in -EBUSY, then the
repeat call may use the right type.

Thanks,
-Peter
Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Ben Horgan 3 months ago
Hi Peter,

On 11/6/25 15:18, Peter Newman wrote:
> Hi James,
> 
> On Fri, Oct 17, 2025 at 8:59 PM James Morse <james.morse@arm.com> wrote:
>>
>> From: Rohit Mathew <rohit.mathew@arm.com>
>>
>> Now that the larger counter sizes are probed, make use of them.
>>
>> Callers of mpam_msmon_read() may not know (or care!) about the different
>> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
>> driver pick the counter to use.
>>
>> Only 32bit accesses to the MSC are required to be supported by the
>> spec, but these registers are 64bits. The lower half may overflow
>> into the higher half between two 32bit reads. To avoid this, use
>> a helper that reads the top half multiple times to check for overflow.
>>
>> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
>> [morse: merged multiple patches from Rohit, added explicit counter selection ]
>> Signed-off-by: James Morse <james.morse@arm.com>
>> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
>> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
>> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
>> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
>> ---
>> Changes since v2:
>>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>>    selection.
>>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>>    the driver pick a supported counter size.
>>  * Rephrased commit message.
>>
>> Changes since v1:
>>  * Only clear OFLOW_STATUS_L on MBWU counters.
>>
>> Changes since RFC:
>>  * Commit message wrangling.
>>  * Refer to 31 bit counters as opposed to 32 bit (registers).
>> ---
>>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>>  1 file changed, 116 insertions(+), 18 deletions(-)
>>
>> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
>> index f4d07234ce10..c207a6d2832c 100644
>> --- a/drivers/resctrl/mpam_devices.c
>> +++ b/drivers/resctrl/mpam_devices.c
>> @@ -897,6 +897,48 @@ struct mon_read {
>>         int                             err;
>>  };
>>
>> +static bool mpam_ris_has_mbwu_long_counter(struct mpam_msc_ris *ris)
>> +{
>> +       return (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, &ris->props) ||
>> +               mpam_has_feature(mpam_feat_msmon_mbwu_44counter, &ris->props));
>> +}
>> +
>> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
>> +{
>> +       int retry = 3;
>> +       u32 mbwu_l_low;
>> +       u64 mbwu_l_high1, mbwu_l_high2;
>> +
>> +       mpam_mon_sel_lock_held(msc);
>> +
>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>> +
>> +       mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>> +       do {
>> +               mbwu_l_high1 = mbwu_l_high2;
>> +               mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
>> +               mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
>> +
>> +               retry--;
>> +       } while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);
>> +
>> +       if (mbwu_l_high1 == mbwu_l_high2)
>> +               return (mbwu_l_high1 << 32) | mbwu_l_low;
>> +       return MSMON___NRDY_L;
>> +}
>> +
>> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
>> +{
>> +       mpam_mon_sel_lock_held(msc);
>> +
>> +       WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
>> +       WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
>> +
>> +       __mpam_write_reg(msc, MSMON_MBWU_L, 0);
>> +       __mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
>> +}
>> +
>>  static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                                    u32 *flt_val)
>>  {
>> @@ -924,7 +966,9 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                                                ctx->csu_exclude_clean);
>>
>>                 break;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>>                 *ctl_val |= MSMON_CFG_MBWU_CTL_TYPE_MBWU;
>>
>>                 if (mpam_has_feature(mpam_feat_msmon_mbwu_rwbw, &m->ris->props))
>> @@ -946,7 +990,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_CSU_CTL);
>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_CSU_FLT);
>>                 return;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>>                 *ctl_val = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
>>                 *flt_val = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
>>                 return;
>> @@ -959,6 +1005,9 @@ static void read_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>>  static void clean_msmon_ctl_val(u32 *cur_ctl)
>>  {
>>         *cur_ctl &= ~MSMON_CFG_x_CTL_OFLOW_STATUS;
>> +
>> +       if (FIELD_GET(MSMON_CFG_x_CTL_TYPE, *cur_ctl) == MSMON_CFG_MBWU_CTL_TYPE_MBWU)
>> +               *cur_ctl &= ~MSMON_CFG_MBWU_CTL_OFLOW_STATUS_L;
>>  }
>>
>>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>>                 mpam_write_monsel_reg(msc, CSU, 0);
>>                 mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>>                 break;
>> -       case mpam_feat_msmon_mbwu:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
>> +               mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
>> +               fallthrough;
>> +       case mpam_feat_msmon_mbwu_31counter:
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>>                 mpam_write_monsel_reg(msc, MBWU, 0);
>> +
>>                 mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>>
>>                 mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
>> @@ -993,10 +1047,19 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>>         }
>>  }
>>
>> -static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
>> +static u64 mpam_msmon_overflow_val(enum mpam_device_features type)
>>  {
>> -       /* TODO: scaling, and long counters */
>> -       return GENMASK_ULL(30, 0);
>> +       /* TODO: implement scaling counters */
>> +       switch (type) {
>> +       case mpam_feat_msmon_mbwu_63counter:
>> +               return GENMASK_ULL(62, 0);
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +               return GENMASK_ULL(43, 0);
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +               return GENMASK_ULL(30, 0);
>> +       default:
>> +               return 0;
>> +       }
>>  }
>>
>>  /* Call with MSC lock held */
>> @@ -1037,11 +1100,24 @@ static void __ris_msmon_read(void *arg)
>>                         nrdy = now & MSMON___NRDY;
>>                 now = FIELD_GET(MSMON___VALUE, now);
>>                 break;
>> -       case mpam_feat_msmon_mbwu:
>> -               now = mpam_read_monsel_reg(msc, MBWU);
>> -               if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
>> -                       nrdy = now & MSMON___NRDY;
>> -               now = FIELD_GET(MSMON___VALUE, now);
>> +       case mpam_feat_msmon_mbwu_31counter:
>> +       case mpam_feat_msmon_mbwu_44counter:
>> +       case mpam_feat_msmon_mbwu_63counter:
> 
> Should you check for one of these three features instead of
> mpam_feat_msmon_mbwu further up in this function when checking for
> reset_on_next_read?
> 
> -       if (m->type == mpam_feat_msmon_mbwu) {
> +       switch (m->type) {
> +       case mpam_feat_msmon_mbwu_31counter:
> +       case mpam_feat_msmon_mbwu_44counter:
> +       case mpam_feat_msmon_mbwu_63counter:
>                 mbwu_state = &ris->mbwu_state[ctx->mon];
>                 if (mbwu_state) {
>                         reset_on_next_read = mbwu_state->reset_on_next_read;
>                         mbwu_state->reset_on_next_read = false;
>                 }
> +               break;
> +       default:
> +               break;
>         }
> 

Yes, this looks like a correct change to me.

[...]
>>  int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
>>                     enum mpam_device_features type, u64 *val)
>>  {
>>         int err;
>>         struct mon_read arg;
>>         u64 wait_jiffies = 0;
>> -       struct mpam_props *cprops = &comp->class->props;
>> +       struct mpam_class *class = comp->class;
>> +       struct mpam_props *cprops = &class->props;
>>
>>         might_sleep();
>>
>> @@ -1129,9 +1218,12 @@ int mpam_msmon_read(struct mpam_component *comp, struct mon_cfg *ctx,
>>         };
>>         *val = 0;
>>
>> +       if (type == mpam_feat_msmon_mbwu)
>> +               type = mpam_msmon_choose_counter(class);
> 
> `type` was already recorded in arg->type, so the result of this lookup
> will be ignored on the first call to _msmon_read()
> 
> If mpam_feat_msmon_mbwu can somehow still result in -EBUSY, then the
> repeat call may use the right type.

Good spot. I think we can just move the 'if' further up. I'll make these
changes when I do the repost for James.

> 
> Thanks,
> -Peter

Thanks,

Ben

Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Jonathan Cameron 3 months, 2 weeks ago
On Fri, 17 Oct 2025 18:56:42 +0000
James Morse <james.morse@arm.com> wrote:

> From: Rohit Mathew <rohit.mathew@arm.com>
> 
> Now that the larger counter sizes are probed, make use of them.
> 
> Callers of mpam_msmon_read() may not know (or care!) about the different
> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
> driver pick the counter to use.
> 
> Only 32bit accesses to the MSC are required to be supported by the
> spec, but these registers are 64bits. The lower half may overflow
> into the higher half between two 32bit reads. To avoid this, use
> a helper that reads the top half multiple times to check for overflow.
> 
> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
> [morse: merged multiple patches from Rohit, added explicit counter selection ]
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
> Tested-by: Fenghua Yu <fenghuay@nvidia.com>

A few tiny things on a fresh look.

> +static u64 mpam_msc_read_mbwu_l(struct mpam_msc *msc)
> +{
> +	int retry = 3;
> +	u32 mbwu_l_low;
> +	u64 mbwu_l_high1, mbwu_l_high2;
> +
> +	mpam_mon_sel_lock_held(msc);
> +
> +	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +	mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +	do {
> +		mbwu_l_high1 = mbwu_l_high2;
> +		mbwu_l_low = __mpam_read_reg(msc, MSMON_MBWU_L);
> +		mbwu_l_high2 = __mpam_read_reg(msc, MSMON_MBWU_L + 4);
> +
> +		retry--;
> +	} while (mbwu_l_high1 != mbwu_l_high2 && retry > 0);

Just carrying on if it tore repeatedly without screaming seems unwise...
I can't see it actually happening more than once but still seems like
we'd want to know if it did.

> +
> +	if (mbwu_l_high1 == mbwu_l_high2)
> +		return (mbwu_l_high1 << 32) | mbwu_l_low;
> +	return MSMON___NRDY_L;
> +}

>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  		mpam_write_monsel_reg(msc, CSU, 0);
>  		mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>  		break;
> -	case mpam_feat_msmon_mbwu:
> +	case mpam_feat_msmon_mbwu_44counter:
> +	case mpam_feat_msmon_mbwu_63counter:
> +		mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
> +		fallthrough;
> +	case mpam_feat_msmon_mbwu_31counter:
>  		mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>  		mpam_write_monsel_reg(msc, MBWU, 0);
> +
Stray change to clean up (push to original patch).
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>  
>  		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
> @@ -993,10 +1047,19 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  	}
>  }
Re: [PATCH v3 26/29] arm_mpam: Use long MBWU counters if supported
Posted by Ben Horgan 3 months, 2 weeks ago
Hi James,

On 10/17/25 19:56, James Morse wrote:
> From: Rohit Mathew <rohit.mathew@arm.com>
> 
> Now that the larger counter sizes are probed, make use of them.
> 
> Callers of mpam_msmon_read() may not know (or care!) about the different
> counter sizes. Allow them to specify mpam_feat_msmon_mbwu and have the
> driver pick the counter to use.
> 
> Only 32bit accesses to the MSC are required to be supported by the
> spec, but these registers are 64bits. The lower half may overflow
> into the higher half between two 32bit reads. To avoid this, use
> a helper that reads the top half multiple times to check for overflow.
> 
> Signed-off-by: Rohit Mathew <rohit.mathew@arm.com>
> [morse: merged multiple patches from Rohit, added explicit counter selection ]
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Ben Horgan <ben.horgan@arm.com>
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> Reviewed-by: Fenghua Yu <fenghuay@nvidia.com>
> Tested-by: Fenghua Yu <fenghuay@nvidia.com>
> ---
> Changes since v2:
>  * Removed mpam_feat_msmon_mbwu as a top-level bit for explicit 31bit counter
>    selection.
>  * Allow callers of mpam_msmon_read() to specify mpam_feat_msmon_mbwu and have
>    the driver pick a supported counter size.
>  * Rephrased commit message.
> 
> Changes since v1:
>  * Only clear OFLOW_STATUS_L on MBWU counters.
> 
> Changes since RFC:
>  * Commit message wrangling.
>  * Refer to 31 bit counters as opposed to 32 bit (registers).
> ---
>  drivers/resctrl/mpam_devices.c | 134 ++++++++++++++++++++++++++++-----
>  1 file changed, 116 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index f4d07234ce10..c207a6d2832c 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -897,6 +897,48 @@ struct mon_read {
[...]
> +static void mpam_msc_zero_mbwu_l(struct mpam_msc *msc)
> +{
> +	mpam_mon_sel_lock_held(msc);
> +
> +	WARN_ON_ONCE((MSMON_MBWU_L + sizeof(u64)) > msc->mapped_hwpage_sz);
> +	WARN_ON_ONCE(!cpumask_test_cpu(smp_processor_id(), &msc->accessibility));
> +
> +	__mpam_write_reg(msc, MSMON_MBWU_L, 0);
> +	__mpam_write_reg(msc, MSMON_MBWU_L + 4, 0);
> +}
> +
[...]
> @@ -978,10 +1027,15 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  		mpam_write_monsel_reg(msc, CSU, 0);
>  		mpam_write_monsel_reg(msc, CFG_CSU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>  		break;
> -	case mpam_feat_msmon_mbwu:
> +	case mpam_feat_msmon_mbwu_44counter:
> +	case mpam_feat_msmon_mbwu_63counter:
> +		mpam_msc_zero_mbwu_l(m->ris->vmsc->msc);
> +		fallthrough;
> +	case mpam_feat_msmon_mbwu_31counter:
>  		mpam_write_monsel_reg(msc, CFG_MBWU_FLT, flt_val);
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>  		mpam_write_monsel_reg(msc, MBWU, 0);

Already zeroed if it's a long counter.

> +
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
>  
>  		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
[...]
> +static enum mpam_device_features mpam_msmon_choose_counter(struct mpam_class *class)
> +{
> +	struct mpam_props *cprops = &class->props;
> +
> +	if (mpam_has_feature(mpam_feat_msmon_mbwu_44counter, cprops))
> +		return mpam_feat_msmon_mbwu_44counter;

This should check the longest counter first.

> +	if (mpam_has_feature(mpam_feat_msmon_mbwu_63counter, cprops))
> +		return mpam_feat_msmon_mbwu_63counter;
> +
> +	return mpam_feat_msmon_mbwu_31counter;
> +}
> +
-- 
Thanks,

Ben