[PATCH v3 41/47] arm_mpam: Generate a configuration for min controls

Ben Horgan posted 47 patches 4 weeks ago
There is a newer version of this series
[PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Ben Horgan 4 weeks ago
From: James Morse <james.morse@arm.com>

MPAM supports a minimum and maximum control for memory bandwidth. The
purpose of the minimum control is to give priority to tasks that are below
their minimum value. Resctrl only provides one value for the bandwidth
configuration, which is used for the maximum.

The minimum control is always programmed to zero on hardware that supports
it.

Generate a minimum bandwidth value that is 5% lower than the value provided
by resctrl. This means tasks that are not receiving their target bandwidth
can be prioritised by the hardware.

For component reset reuse the same calculation so that the default is a
value resctrl can set.

CC: Zeng Heng <zengheng4@huawei.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Ben Horgan <ben.horgan@arm.com>
---
Changes since rfc:
Add reset_mbw_min
Clear min cfg when setting max
use mpam_extend_config on component reset

Changes since v2:
bwa_wd limit to 16 moved to earlier patch
restrict scope of min and delta variables
move code out of loop so smaller change for quirking min
move testing into its own commit
---
 drivers/resctrl/mpam_devices.c  | 69 ++++++++++++++++++++++++++++++---
 drivers/resctrl/mpam_internal.h |  3 ++
 drivers/resctrl/mpam_resctrl.c  |  2 +
 3 files changed, 69 insertions(+), 5 deletions(-)

diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
index 9fbe4fe3b13a..37bd8efc6ecf 100644
--- a/drivers/resctrl/mpam_devices.c
+++ b/drivers/resctrl/mpam_devices.c
@@ -1394,8 +1394,12 @@ static void mpam_reprogram_ris_partid(struct mpam_msc_ris *ris, u16 partid,
 	}
 
 	if (mpam_has_feature(mpam_feat_mbw_min, rprops) &&
-	    mpam_has_feature(mpam_feat_mbw_min, cfg))
-		mpam_write_partsel_reg(msc, MBW_MIN, 0);
+	    mpam_has_feature(mpam_feat_mbw_min, cfg)) {
+		if (cfg->reset_mbw_min)
+			mpam_write_partsel_reg(msc, MBW_MIN, 0);
+		else
+			mpam_write_partsel_reg(msc, MBW_MIN, cfg->mbw_min);
+	}
 
 	if (mpam_has_feature(mpam_feat_mbw_max, rprops) &&
 	    mpam_has_feature(mpam_feat_mbw_max, cfg)) {
@@ -1510,6 +1514,7 @@ static void mpam_init_reset_cfg(struct mpam_config *reset_cfg)
 		.reset_cpbm = true,
 		.reset_mbw_pbm = true,
 		.reset_mbw_max = true,
+		.reset_mbw_min = true,
 	};
 	bitmap_fill(reset_cfg->features, MPAM_FEATURE_LAST);
 }
@@ -2408,6 +2413,45 @@ static void __destroy_component_cfg(struct mpam_component *comp)
 	}
 }
 
+static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg)
+{
+	struct mpam_props *cprops = &class->props;
+	u16 min_hw_granule, max_hw_value, res0_bits;
+
+	/*
+	 * Calculate the values the 'min' control can hold.
+	 * e.g. on a platform with bwa_wd = 8, min_hw_granule is 0x00ff because
+	 * those bits are RES0. Configurations of this value are effectively
+	 * zero. But configurations need to saturate at min_hw_granule on
+	 * systems with mismatched bwa_wd, where the 'less than 0' values are
+	 * implemented on some MSC, but not others.
+	 */
+	res0_bits = 16 - cprops->bwa_wd;
+	max_hw_value = ((1 << cprops->bwa_wd) - 1) << res0_bits;
+	min_hw_granule = ~max_hw_value;
+
+	/*
+	 * MAX and MIN should be set together. If only one is provided,
+	 * generate a configuration for the other. If only one control
+	 * type is supported, the other value will be ignored.
+	 *
+	 * Resctrl can only configure the MAX.
+	 */
+	if (mpam_has_feature(mpam_feat_mbw_max, cfg) &&
+	    !mpam_has_feature(mpam_feat_mbw_min, cfg)) {
+		u16 min, delta;
+
+		delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1;
+		if (cfg->mbw_max > delta)
+			min = cfg->mbw_max - delta;
+		else
+			min = 0;
+
+		cfg->mbw_min = max(min, min_hw_granule);
+		mpam_set_feature(mpam_feat_mbw_min, cfg);
+	}
+}
+
 static void mpam_reset_component_cfg(struct mpam_component *comp)
 {
 	int i;
@@ -2426,6 +2470,8 @@ static void mpam_reset_component_cfg(struct mpam_component *comp)
 			comp->cfg[i].mbw_pbm = GENMASK(cprops->mbw_pbm_bits - 1, 0);
 		if (cprops->bwa_wd)
 			comp->cfg[i].mbw_max = GENMASK(15, 16 - cprops->bwa_wd);
+
+		mpam_extend_config(comp->class, &comp->cfg[i]);
 	}
 }
 
@@ -2701,24 +2747,37 @@ static bool mpam_update_config(struct mpam_config *cfg,
 	maybe_update_config(cfg, mpam_feat_cpor_part, newcfg, cpbm, has_changes);
 	maybe_update_config(cfg, mpam_feat_mbw_part, newcfg, mbw_pbm, has_changes);
 	maybe_update_config(cfg, mpam_feat_mbw_max, newcfg, mbw_max, has_changes);
+	maybe_update_config(cfg, mpam_feat_mbw_min, newcfg, mbw_min, has_changes);
 
 	return has_changes;
 }
 
 int mpam_apply_config(struct mpam_component *comp, u16 partid,
-		      struct mpam_config *cfg)
+		      struct mpam_config *user_cfg)
 {
 	struct mpam_write_config_arg arg;
 	struct mpam_msc_ris *ris;
+	struct mpam_config cfg;
 	struct mpam_vmsc *vmsc;
 	struct mpam_msc *msc;
 
 	lockdep_assert_cpus_held();
 
 	/* Don't pass in the current config! */
-	WARN_ON_ONCE(&comp->cfg[partid] == cfg);
+	WARN_ON_ONCE(&comp->cfg[partid] == user_cfg);
+
+	/*
+	 * Copy the config to avoid writing back the 'extended' version to
+	 * the caller.
+	 * This avoids mpam_devices.c setting a mbm_min that mpam_resctrl.c
+	 * is unaware of ... when it then changes mbm_max to be lower than
+	 * mbm_min.
+	 */
+	cfg = *user_cfg;
+
+	mpam_extend_config(comp->class, &cfg);
 
-	if (!mpam_update_config(&comp->cfg[partid], cfg))
+	if (!mpam_update_config(&comp->cfg[partid], &cfg))
 		return 0;
 
 	arg.comp = comp;
diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
index d9f52023d730..69cb75616561 100644
--- a/drivers/resctrl/mpam_internal.h
+++ b/drivers/resctrl/mpam_internal.h
@@ -278,10 +278,12 @@ struct mpam_config {
 	u32	cpbm;
 	u32	mbw_pbm;
 	u16	mbw_max;
+	u16	mbw_min;
 
 	bool	reset_cpbm;
 	bool	reset_mbw_pbm;
 	bool	reset_mbw_max;
+	bool	reset_mbw_min;
 
 	struct mpam_garbage	garbage;
 };
@@ -618,6 +620,7 @@ static inline void mpam_resctrl_teardown_class(struct mpam_class *class) { }
  * MPAMCFG_MBW_MAX - MPAM memory maximum bandwidth partitioning configuration
  *                   register
  */
+#define MPAMCFG_MBW_MAX_MAX_NR_BITS	16
 #define MPAMCFG_MBW_MAX_MAX		GENMASK(15, 0)
 #define MPAMCFG_MBW_MAX_HARDLIM		BIT(31)
 
diff --git a/drivers/resctrl/mpam_resctrl.c b/drivers/resctrl/mpam_resctrl.c
index e7b839c478fd..019f7a1d74fd 100644
--- a/drivers/resctrl/mpam_resctrl.c
+++ b/drivers/resctrl/mpam_resctrl.c
@@ -1446,6 +1446,8 @@ int resctrl_arch_update_one(struct rdt_resource *r, struct rdt_ctrl_domain *d,
 		if (mpam_has_feature(mpam_feat_mbw_max, cprops)) {
 			cfg.mbw_max = percent_to_mbw_max(cfg_val, cprops);
 			mpam_set_feature(mpam_feat_mbw_max, &cfg);
+			/* Allow the min to be calculated from the max */
+			mpam_clear_feature(mpam_feat_mbw_min, &cfg);
 			break;
 		}
 		fallthrough;
-- 
2.43.0
Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Jonathan Cameron 3 weeks, 6 days ago
On Mon, 12 Jan 2026 16:59:08 +0000
Ben Horgan <ben.horgan@arm.com> wrote:

> From: James Morse <james.morse@arm.com>
> 
> MPAM supports a minimum and maximum control for memory bandwidth. The
> purpose of the minimum control is to give priority to tasks that are below
> their minimum value. Resctrl only provides one value for the bandwidth
> configuration, which is used for the maximum.
> 
> The minimum control is always programmed to zero on hardware that supports
> it.
> 
> Generate a minimum bandwidth value that is 5% lower than the value provided
> by resctrl. This means tasks that are not receiving their target bandwidth
> can be prioritised by the hardware.
> 
> For component reset reuse the same calculation so that the default is a
> value resctrl can set.
> 
> CC: Zeng Heng <zengheng4@huawei.com>
> Signed-off-by: James Morse <james.morse@arm.com>
> Signed-off-by: Ben Horgan <ben.horgan@arm.com>

I'm interested to see how this plays out as a default choice
vs what people elect to run.  Seems harmless to start with this.

Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Ben Horgan 1 week, 3 days ago
Hi Fenghua, Jonathan,

On 1/13/26 15:39, Jonathan Cameron wrote:
> On Mon, 12 Jan 2026 16:59:08 +0000
> Ben Horgan <ben.horgan@arm.com> wrote:
> 
>> From: James Morse <james.morse@arm.com>
>>
>> MPAM supports a minimum and maximum control for memory bandwidth. The
>> purpose of the minimum control is to give priority to tasks that are below
>> their minimum value. Resctrl only provides one value for the bandwidth
>> configuration, which is used for the maximum.
>>
>> The minimum control is always programmed to zero on hardware that supports
>> it.
>>
>> Generate a minimum bandwidth value that is 5% lower than the value provided
>> by resctrl. This means tasks that are not receiving their target bandwidth
>> can be prioritised by the hardware.
>>
>> For component reset reuse the same calculation so that the default is a
>> value resctrl can set.
>>
>> CC: Zeng Heng <zengheng4@huawei.com>
>> Signed-off-by: James Morse <james.morse@arm.com>
>> Signed-off-by: Ben Horgan <ben.horgan@arm.com>
> 
> I'm interested to see how this plays out as a default choice
> vs what people elect to run.  Seems harmless to start with this.

I've realised it's not harmless :(

> 
> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>
> 

In the discussion on a platform quirk, arm_mpam: Add workaround for
T241-MPAM-4,  Fenghua raised the following issues.

"
MBW_MIN is 1% or 5% less than MBW_MAX.

The lower MBW_MIN hints hardware to lower mem bandwidth when mem access
contention. That causes memory performance degradation.

Is it possible to do the following changes to fix the performance issue?
1. By default min mbw is equal to max mbw. So hardware won't lower
performance unless it's needed. This can fix the current performance issue.
2. Add a new schemata line (e.g. MBI:<id>=x;<id>=y;...) to specify min
mbw just like max mbw specified by schemata line "MB:...". User can use
this line to change min mbw per partition per node. This could be added
in the future.
"

On 1.
Thinking about this again, I think adding any heuristic tied to mbw_max
to determine what mbw_min is undesirable. Loading the mpam driver or
mounting resctrl shouldn't change the defaults away from the defaults
for h/w partid 0 or performance characteristics may change unexpectedly.
The spec only gives us suggestions for these but we should go with
those. See table 3.8 in IH0099B.a Mpam System Component Specification.
The MBW_MIN that is 0xFFFF. Also, having mbw_min doesn't necessarily
mean that there is mbw_max. A system that doesn't advertise mbw_min
support to the user should act as if there is no mbw_min support.
On 2.
Yes, adding a new user interface in resctrl is the way to deal with
this. See [1] for a discussion on adding new schema.

Hence, I'll drop this patch, and update the mbw_min default to be 0xFFFF
and for the value not to change even if mbw_max changes. I think this
leaves us in the best position going forward without any heuristics that
may come back to bite us later when proper support for a schema
supporting mbw_min is added to resctrl.

[1] https://lore.kernel.org/lkml/aPtfMFfLV1l%2FRB0L@e133380.arm.com/

Thanks,

Ben
Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Shanker Donthineni 1 week, 2 days ago
Hi Ben,

On 1/30/2026 8:17 AM, Ben Horgan wrote:
> External email: Use caution opening links or attachments
>
>
> Hi Fenghua, Jonathan,
>
> On 1/13/26 15:39, Jonathan Cameron wrote:
>> On Mon, 12 Jan 2026 16:59:08 +0000
>> Ben Horgan <ben.horgan@arm.com> wrote:
>>
>>> From: James Morse <james.morse@arm.com>
>>>
>>> MPAM supports a minimum and maximum control for memory bandwidth. The
>>> purpose of the minimum control is to give priority to tasks that are below
>>> their minimum value. Resctrl only provides one value for the bandwidth
>>> configuration, which is used for the maximum.
>>>
>>>
>>> Hence, I'll drop this patch, and update the mbw_min default to be 0xFFFF
>>> and for the value not to change even if mbw_max changes. I think this
>>> leaves us in the best position going forward without any heuristics that
>>> may come back to bite us later when proper support for a schema
>>> supporting mbw_min is added to resctrl.

Background: I previouslyshared original fix(seecodesnippet below) with 
James Morse
~2 years ago to address the errata, which explicitly recommends usinga 
5% gap for
mitigation of the Hardware issue (the problem described in commit text 
of T241-MPAM-4)

For some reason theoriginalimplementationwas splitinto two patches:
   - Generic change applicable toall chips
   - Specific fixfor Graceerrata T241-MPAM-4


Issue: Dropping this patch impacts[PATCH v3 45/47] forthe errata fix. If 
removalis
necessary, please mergethis changeinto the T241-MPAM-4-specific patch.

--- a/drivers/platform/mpam/mpam_devices.c
+++ b/drivers/platform/mpam/mpam_devices.c
@@ -1190,8 +1190,12 @@ static void mpam_reprogram_ris_partid(struct
mpam_msc_ris *ris, u16 partid, rprops->mbw_pbm_bits);
          }
-       if (mpam_has_feature(mpam_feat_mbw_min, rprops))
-               mpam_write_partsel_reg(msc, MBW_MIN, 0);
+       if (mpam_has_feature(mpam_feat_mbw_min, rprops)) {
+               if (mpam_has_feature(mpam_feat_mbw_max, cfg))
+                       mpam_write_partsel_reg(msc, MBW_MIN, cfg->mbw_min);
+               else
+                       mpam_write_partsel_reg(msc, MBW_MIN, 0);
+       }
         if (mpam_has_feature(mpam_feat_mbw_max, rprops)) {
                 if (mpam_has_feature(mpam_feat_mbw_max, cfg)) @@
-2332,6 +2336,31 @@ static int __write_config(void *arg)
          return 0;
   }
+static void mpam_extend_config(struct mpam_class *class, struct mpam_config *cfg)
+{
+       struct mpam_props *cprops = &class->props;
+       u32 min, delta;
+
+       /*
+        * MAX and MIN should be set together. If only one is provided,
+        * generate a configuration for the other. If only one control
+        * type is supported, the other value will be ignored.
+        *
+        * Resctrl can only configure the MAX.
+        *
+        * Parts affected by Nvidia's T241-MPAM-4 depend on this occurring,
+        * and recommend a 5% difference.
+        */
+       if (mpam_has_feature(mpam_feat_mbw_max, cfg) &&
+           !mpam_has_feature(mpam_feat_mbw_min, cfg)){
+               delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1;
+               min = max_t(s32, cfg->mbw_max - delta, BIT(cprops->bwa_wd));
+
+               cfg->mbw_min = max_t(s32, cfg->mbw_max - delta, BIT(16 - cprops->bwa_wd));
+               mpam_set_feature(mpam_feat_mbw_min, cfg);
+       }
+}

Shanker

Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Ben Horgan 1 week ago
Hi Shanker,

On 1/31/26 02:30, Shanker Donthineni wrote:
> Hi Ben,
> 
> On 1/30/2026 8:17 AM, Ben Horgan wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Fenghua, Jonathan,
>>
>> On 1/13/26 15:39, Jonathan Cameron wrote:
>>> On Mon, 12 Jan 2026 16:59:08 +0000
>>> Ben Horgan <ben.horgan@arm.com> wrote:
>>>
>>>> From: James Morse <james.morse@arm.com>
>>>>
>>>> MPAM supports a minimum and maximum control for memory bandwidth. The
>>>> purpose of the minimum control is to give priority to tasks that are
>>>> below
>>>> their minimum value. Resctrl only provides one value for the bandwidth
>>>> configuration, which is used for the maximum.
>>>>
>>>>
>>>> Hence, I'll drop this patch, and update the mbw_min default to be
>>>> 0xFFFF
>>>> and for the value not to change even if mbw_max changes. I think this
>>>> leaves us in the best position going forward without any heuristics
>>>> that
>>>> may come back to bite us later when proper support for a schema
>>>> supporting mbw_min is added to resctrl.
> 
> Background: I previouslyshared original fix(seecodesnippet below) with
> James Morse
> ~2 years ago to address the errata, which explicitly recommends usinga
> 5% gap for
> mitigation of the Hardware issue (the problem described in commit text
> of T241-MPAM-4)
> 
> For some reason theoriginalimplementationwas splitinto two patches:
>   - Generic change applicable toall chips
>   - Specific fixfor Graceerrata T241-MPAM-4
> >
> Issue: Dropping this patch impacts[PATCH v3 45/47] forthe errata fix. If
> removalis
> necessary, please mergethis changeinto the T241-MPAM-4-specific patch.


What's the behaviour on T241 when MBW_MIN is always 0xFFFF?

I'm worried if we make a policy decision of how to set MBW_MIN based on
MBW_MAX for this platform then we won't be able to support a
configurable MBW_MIN in the future for this platform. As when MBW_MIN
support is added in resctrl the user's configuration for this platform
would change meaning on kernel upgrade.

> 
> --- a/drivers/platform/mpam/mpam_devices.c
> +++ b/drivers/platform/mpam/mpam_devices.c
> @@ -1190,8 +1190,12 @@ static void mpam_reprogram_ris_partid(struct
> mpam_msc_ris *ris, u16 partid, rprops->mbw_pbm_bits);
>          }
> -       if (mpam_has_feature(mpam_feat_mbw_min, rprops))
> -               mpam_write_partsel_reg(msc, MBW_MIN, 0);
> +       if (mpam_has_feature(mpam_feat_mbw_min, rprops)) {
> +               if (mpam_has_feature(mpam_feat_mbw_max, cfg))
> +                       mpam_write_partsel_reg(msc, MBW_MIN, cfg->mbw_min);
> +               else
> +                       mpam_write_partsel_reg(msc, MBW_MIN, 0);
> +       }
>         if (mpam_has_feature(mpam_feat_mbw_max, rprops)) {
>                 if (mpam_has_feature(mpam_feat_mbw_max, cfg)) @@
> -2332,6 +2336,31 @@ static int __write_config(void *arg)
>          return 0;
>   }
> +static void mpam_extend_config(struct mpam_class *class, struct
> mpam_config *cfg)
> +{
> +       struct mpam_props *cprops = &class->props;
> +       u32 min, delta;
> +
> +       /*
> +        * MAX and MIN should be set together. If only one is provided,
> +        * generate a configuration for the other. If only one control
> +        * type is supported, the other value will be ignored.
> +        *
> +        * Resctrl can only configure the MAX.
> +        *
> +        * Parts affected by Nvidia's T241-MPAM-4 depend on this occurring,
> +        * and recommend a 5% difference.
> +        */
> +       if (mpam_has_feature(mpam_feat_mbw_max, cfg) &&
> +           !mpam_has_feature(mpam_feat_mbw_min, cfg)){
> +               delta = ((5 * MPAMCFG_MBW_MAX_MAX) / 100) - 1;
> +               min = max_t(s32, cfg->mbw_max - delta, BIT(cprops-
>>bwa_wd));
> +
> +               cfg->mbw_min = max_t(s32, cfg->mbw_max - delta, BIT(16 -
> cprops->bwa_wd));
> +               mpam_set_feature(mpam_feat_mbw_min, cfg);
> +       }
> +}
> 
> Shanker
> 

Thanks,

Ben

Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Shanker Donthineni 1 week ago
Hi Ben,

On 2/2/2026 4:21 AM, Ben Horgan wrote:
> External email: Use caution opening links or attachments
>
>
> Hi Shanker,
>
> On 1/31/26 02:30, Shanker Donthineni wrote:
>> Hi Ben,
>>
>> On 1/30/2026 8:17 AM, Ben Horgan wrote:
>>> External email: Use caution opening links or attachments
>>>
>>>
>>> Hi Fenghua, Jonathan,
>>>
>>> On 1/13/26 15:39, Jonathan Cameron wrote:
>>>> On Mon, 12 Jan 2026 16:59:08 +0000
>>>> Ben Horgan <ben.horgan@arm.com> wrote:
>>>>
>>>>> From: James Morse <james.morse@arm.com>
>>>>>
>>>>> MPAM supports a minimum and maximum control for memory bandwidth. The
>>>>> purpose of the minimum control is to give priority to tasks that are
>>>>> below
>>>>> their minimum value. Resctrl only provides one value for the bandwidth
>>>>> configuration, which is used for the maximum.
>>>>>
>>>>>
>>>>> Hence, I'll drop this patch, and update the mbw_min default to be
>>>>> 0xFFFF
>>>>> and for the value not to change even if mbw_max changes. I think this
>>>>> leaves us in the best position going forward without any heuristics
>>>>> that
>>>>> may come back to bite us later when proper support for a schema
>>>>> supporting mbw_min is added to resctrl.
>> Background: I previouslyshared original fix(seecodesnippet below) with
>> James Morse
>> ~2 years ago to address the errata, which explicitly recommends usinga
>> 5% gap for
>> mitigation of the Hardware issue (the problem described in commit text
>> of T241-MPAM-4)
>>
>> For some reason theoriginalimplementationwas splitinto two patches:
>>    - Generic change applicable toall chips
>>    - Specific fixfor Graceerrata T241-MPAM-4
>> Issue: Dropping this patch impacts[PATCH v3 45/47] forthe errata fix. If
>> removalis
>> necessary, please mergethis changeinto the T241-MPAM-4-specific patch.
>
> What's the behaviour on T241 when MBW_MIN is always 0xFFFF?

Memory bandwidth throttling will not function correctly. The MPAM hardware
monitors MIN and MAX values for each active partition to maintain memory
bandwidth usage between MBW_MIN and MBW_MAX. Therefore, MBW_MIN must be
less than MBW_MAX (IMO, setting MBW_MIN to always 0xFFFF is incorrect)

Grace errata T241-MPAM-4 has two issues:
- MBW_MIN must be greater than 0 (WAR set to one when when it's zero) - In the Grace implementation of memory-bandwidth partitioning (MPAM),
    in the absence of contention for bandwidth, the minimum bandwidth
    setting can affect the amount of achieved bandwidth. Specifically,
    the achieved bandwidth in the absence of contention can settle to any
    value between the values of MIN and MAX. This means if the gap between
    MIN and MAX is large then the BW can settle closer to MIN. To achieve
    BW closer to MAX in the absence of contention, software should configure
    a relatively narrow gap between MPAMCFG_MBW_MIN and MPAMCFG_MBW_MAX.
    The recommendation is to use a 5% gap, corresponding to an absolute
    difference of (0xFFFF * 0.05) = 0xCCC between MPAMCFG_MBW_MIN and
    MPAMCFG_MBW_MAX.

> I'm worried if we make a policy decision of how to set MBW_MIN based on
> MBW_MAX for this platform then we won't be able to support a
> configurable MBW_MIN in the future for this platform.

Yes, we can't support generic programmable MBW_MIN for Grace chip. The 
currentresctrl interface doesnot exposeMBW_MIN, preventingusers from 
configuring the recommended5% gap. Without this interfacesupport, 
theonly wayto applytheworkaround is through driver-level changes.

>   As when MBW_MIN
> support is added in resctrl the user's configuration for this platform
> would change meaning on kernel upgrade.

What is the timelineforaddingMBW_MIN support? We have two options.
  Option-A: Keep the current WAR 5% gap and don't allow users to program MBW_MIN.
  Option-B:Remove the5% gap workaround and relyon usersto program MBW_MIN           
   accordingto the Grace recommendations whentheinterfacebecomes available.

We'll prefer option-B.

Thanks,
Shanker

Re: [PATCH v3 41/47] arm_mpam: Generate a configuration for min controls
Posted by Ben Horgan 6 days, 9 hours ago
Hi Shanker, Fenghua,

On 2/2/26 16:34, Shanker Donthineni wrote:
> Hi Ben,
> 
> On 2/2/2026 4:21 AM, Ben Horgan wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> Hi Shanker,
>>
>> On 1/31/26 02:30, Shanker Donthineni wrote:
>>> Hi Ben,
>>>
>>> On 1/30/2026 8:17 AM, Ben Horgan wrote:
>>>> External email: Use caution opening links or attachments
>>>>
>>>>
>>>> Hi Fenghua, Jonathan,
>>>>
>>>> On 1/13/26 15:39, Jonathan Cameron wrote:
>>>>> On Mon, 12 Jan 2026 16:59:08 +0000
>>>>> Ben Horgan <ben.horgan@arm.com> wrote:
>>>>>
>>>>>> From: James Morse <james.morse@arm.com>
>>>>>>
>>>>>> MPAM supports a minimum and maximum control for memory bandwidth. The
>>>>>> purpose of the minimum control is to give priority to tasks that are
>>>>>> below
>>>>>> their minimum value. Resctrl only provides one value for the
>>>>>> bandwidth
>>>>>> configuration, which is used for the maximum.
>>>>>>
>>>>>>
>>>>>> Hence, I'll drop this patch, and update the mbw_min default to be
>>>>>> 0xFFFF
>>>>>> and for the value not to change even if mbw_max changes. I think this
>>>>>> leaves us in the best position going forward without any heuristics
>>>>>> that
>>>>>> may come back to bite us later when proper support for a schema
>>>>>> supporting mbw_min is added to resctrl.
>>> Background: I previouslyshared original fix(seecodesnippet below) with
>>> James Morse
>>> ~2 years ago to address the errata, which explicitly recommends usinga
>>> 5% gap for
>>> mitigation of the Hardware issue (the problem described in commit text
>>> of T241-MPAM-4)
>>>
>>> For some reason theoriginalimplementationwas splitinto two patches:
>>>    - Generic change applicable toall chips
>>>    - Specific fixfor Graceerrata T241-MPAM-4
>>> Issue: Dropping this patch impacts[PATCH v3 45/47] forthe errata fix. If
>>> removalis
>>> necessary, please mergethis changeinto the T241-MPAM-4-specific patch.
>>
>> What's the behaviour on T241 when MBW_MIN is always 0xFFFF?
> 
> Memory bandwidth throttling will not function correctly. The MPAM hardware
> monitors MIN and MAX values for each active partition to maintain memory
> bandwidth usage between MBW_MIN and MBW_MAX. Therefore, MBW_MIN must be
> less than MBW_MAX (IMO, setting MBW_MIN to always 0xFFFF is incorrect)

Ah, yes. 0xFFFF is indeed a bad default. Looking at Table 5-3 in Mpam
system component B.a I see that as all bandwidth will be below the
minimum and so high preference the MBW_MAX will have no effect. I'll
keep the default for MBW_MIN as 0 (or the minimum for grace).

> 
> Grace errata T241-MPAM-4 has two issues:
> - MBW_MIN must be greater than 0 (WAR set to one when when it's zero) -
> In the Grace implementation of memory-bandwidth partitioning (MPAM),
>    in the absence of contention for bandwidth, the minimum bandwidth
>    setting can affect the amount of achieved bandwidth. Specifically,
>    the achieved bandwidth in the absence of contention can settle to any
>    value between the values of MIN and MAX. This means if the gap between
>    MIN and MAX is large then the BW can settle closer to MIN. To achieve
>    BW closer to MAX in the absence of contention, software should configure
>    a relatively narrow gap between MPAMCFG_MBW_MIN and MPAMCFG_MBW_MAX.
>    The recommendation is to use a 5% gap, corresponding to an absolute
>    difference of (0xFFFF * 0.05) = 0xCCC between MPAMCFG_MBW_MIN and
>    MPAMCFG_MBW_MAX.

Ok, thanks. I understand the issue more now.

> 
>> I'm worried if we make a policy decision of how to set MBW_MIN based on
>> MBW_MAX for this platform then we won't be able to support a
>> configurable MBW_MIN in the future for this platform.
> 
> Yes, we can't support generic programmable MBW_MIN for Grace chip. The
> currentresctrl interface doesnot exposeMBW_MIN, preventingusers from
> configuring the recommended5% gap. Without this interfacesupport,
> theonly wayto applytheworkaround is through driver-level changes.
> 
>>   As when MBW_MIN
>> support is added in resctrl the user's configuration for this platform
>> would change meaning on kernel upgrade.
> 
> What is the timelineforaddingMBW_MIN support? We have two options.
>  Option-A: Keep the current WAR 5% gap and don't allow users to program
> MBW_MIN.
>  Option-B:Remove the5% gap workaround and relyon usersto program MBW_MIN
>             accordingto the Grace recommendations
> whentheinterfacebecomes available.
> 
> We'll prefer option-B.

The problem with option-B is that the transition introduces a change in
user visible
for any existing MBW_MAX configuration.

If option-A is preferable to disabling MBW_MAX on grace until we have
proper MBW_MIN support in resctrl then I think we should assume option-A.

The work to decide how new schema is underway but it's difficult to say
how long it will take.
See: https://lore.kernel.org/lkml/aPtfMFfLV1l%2FRB0L@e133380.arm.com/

Assuming that you're sure that the 5% gap is the best policy and that
there are no other objections I'll add that policy back into the
T241-MPAM-4 workaround and look into a way to ensure that we don't
accidentally enable MBW_MIN support for grace comes when the proper
support is added.

> 
> Thanks,
> Shanker
> 

Thanks,

Ben