Hardware provides a set of counters when the ABMC feature is supported.
These counters are used for enabling the events in resctrl group when
the feature is enabled.
Introduce mbm_cntrs_free_map bitmap to track available and free counters.
Signed-off-by: Babu Moger <babu.moger@amd.com>
---
v5:
Updated the comments and commit log.
Few renames
num_cntrs_free_map -> mbm_cntrs_free_map
num_cntrs_init -> mbm_cntrs_init
Added initialization in rdt_get_tree because the default ABMC
enablement happens during the init.
v4: Changed the name to num_cntrs where applicable.
Used bitmap apis.
Added more comments for the globals.
v3: Changed the bitmap name to assign_cntrs_free_map. Removed abmc
from the name.
v2: Changed the bitmap name to assignable_counter_free_map from
abmc_counter_free_map.
---
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 29 ++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 4f47f52e01c2..b3d3fa048f15 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid)
return !test_bit(closid, &closid_free_map);
}
+/*
+ * Counter bitmap and its length for tracking available counters.
+ * ABMC feature provides set of hardware counters for enabling events.
+ * Each event takes one hardware counter. Kernel needs to keep track
+ * of number of available counters.
+ */
+static unsigned long mbm_cntrs_free_map;
+static unsigned int mbm_cntrs_free_map_len;
+
+static void mbm_cntrs_init(void)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
+
+ bitmap_fill(&mbm_cntrs_free_map, r->mon.num_mbm_cntrs);
+ mbm_cntrs_free_map_len = r->mon.num_mbm_cntrs;
+}
+
/**
* rdtgroup_mode_by_closid - Return mode of resource group with closid
* @closid: closid if the resource group
@@ -2466,6 +2483,12 @@ static int _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
{
struct rdt_mon_domain *d;
+ /*
+ * Clear all the previous assignments while switching the monitor
+ * mode.
+ */
+ mbm_cntrs_init();
+
/*
* Hardware counters will reset after switching the monitor mode.
* Reset the architectural state so that reading of hardware
@@ -2724,10 +2747,10 @@ static void schemata_list_destroy(void)
static int rdt_get_tree(struct fs_context *fc)
{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_fs_context *ctx = rdt_fc2context(fc);
unsigned long flags = RFTYPE_CTRL_BASE;
struct rdt_mon_domain *dom;
- struct rdt_resource *r;
int ret;
cpus_read_lock();
@@ -2756,6 +2779,9 @@ static int rdt_get_tree(struct fs_context *fc)
closid_init();
+ if (r->mon.abmc_capable)
+ mbm_cntrs_init();
+
if (resctrl_arch_mon_capable())
flags |= RFTYPE_MON;
@@ -2800,7 +2826,6 @@ static int rdt_get_tree(struct fs_context *fc)
resctrl_mounted = true;
if (is_mbm_enabled()) {
- r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
list_for_each_entry(dom, &r->mon_domains, hdr.list)
mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
RESCTRL_PICK_ANY_CPU);
--
2.34.1
Hi Babu, On Wed, Jul 3, 2024 at 2:50 PM Babu Moger <babu.moger@amd.com> wrote: > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > index 4f47f52e01c2..b3d3fa048f15 100644 > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > @@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid) > return !test_bit(closid, &closid_free_map); > } > > +/* > + * Counter bitmap and its length for tracking available counters. > + * ABMC feature provides set of hardware counters for enabling events. > + * Each event takes one hardware counter. Kernel needs to keep track > + * of number of available counters. > + */ > +static unsigned long mbm_cntrs_free_map; > +static unsigned int mbm_cntrs_free_map_len; If counter assignment is supported at a per-domain granularity, then counter id allocation needs to be done per-domain rather than globally. For example, if I free a counter from one group in a particular domain, it should be available to allocate to another group only in that domain. When I attempt this using the current series, the resulting behavior is quite interesting. I noticed Reinette also commented on this later in the series, noticing that counters are only allocated permanently to groups and never move as a result of writing to mbm_control. # grep 'g1[45]' info/L3_MON/mbm_control test/g14/0=tl;1=tl;2=tl;3=tl;4=tl;5=tl;6=tl;7=tl;8=tl;9=tl;10=tl;11=tl;12=tl;13=tl;14=tl;15=tl;16=tl;17=tl;18=tl;19=tl;20=tl;21=tl;22=tl;23=tl;24=tl;25=tl;26=tl;27=tl;28=tl;29=tl;30=tl;31=tl; test/g15/0=_;1=_;2=_;3=_;4=_;5=_;6=_;7=_;8=_;9=_;10=_;11=_;12=_;13=_;14=_;15=_;16=_;17=_;18=_;19=_;20=_;21=_;22=_;23=_;24=_;25=_;26=_;27=_;28=_;29=_;30=_;31=_; [domains 2-31 omitted for clarity below] # echo 'test/g14/1-t' > info/L3_MON/mbm_control # grep 'g1[45]' info/L3_MON/mbm_control test/g14/0=tl;1=l; test/g15/0=_;1=_; # echo "test/g15/1+t" > info/L3_MON/mbm_control # grep 'g1[45]' info/L3_MON/mbm_control test/g14/0=tl;1=_; test/g15/0=_;1=_; -Peter
Hi Peter, On 7/26/24 3:48 PM, Peter Newman wrote: > Hi Babu, > > On Wed, Jul 3, 2024 at 2:50 PM Babu Moger <babu.moger@amd.com> wrote: >> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> index 4f47f52e01c2..b3d3fa048f15 100644 >> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> @@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid) >> return !test_bit(closid, &closid_free_map); >> } >> >> +/* >> + * Counter bitmap and its length for tracking available counters. >> + * ABMC feature provides set of hardware counters for enabling events. >> + * Each event takes one hardware counter. Kernel needs to keep track >> + * of number of available counters. >> + */ >> +static unsigned long mbm_cntrs_free_map; >> +static unsigned int mbm_cntrs_free_map_len; > > If counter assignment is supported at a per-domain granularity, then > counter id allocation needs to be done per-domain rather than > globally. For example, if I free a counter from one group in a It is not obvious to me that counter assignment supported per-domain requires allocation per-domain. I think this may get complicated when resources are monitored with one counter when tasks run in one domain and another counter when the same tasks run in another domain. > particular domain, it should be available to allocate to another group > only in that domain. > > When I attempt this using the current series, the resulting behavior > is quite interesting. I noticed Reinette also commented on this later > in the series, noticing that counters are only allocated permanently > to groups and never move as a result of writing to mbm_control. As I understand this is separate from how the counter is allocated, but instead just a gap in current implementation of intended interface. > > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=tl;2=tl;3=tl;4=tl;5=tl;6=tl;7=tl;8=tl;9=tl;10=tl;11=tl;12=tl;13=tl;14=tl;15=tl;16=tl;17=tl;18=tl;19=tl;20=tl;21=tl;22=tl;23=tl;24=tl;25=tl;26=tl;27=tl;28=tl;29=tl;30=tl;31=tl; > test/g15/0=_;1=_;2=_;3=_;4=_;5=_;6=_;7=_;8=_;9=_;10=_;11=_;12=_;13=_;14=_;15=_;16=_;17=_;18=_;19=_;20=_;21=_;22=_;23=_;24=_;25=_;26=_;27=_;28=_;29=_;30=_;31=_; > > [domains 2-31 omitted for clarity below] > > # echo 'test/g14/1-t' > info/L3_MON/mbm_control > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=l; > test/g15/0=_;1=_; > > # echo "test/g15/1+t" > info/L3_MON/mbm_control > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=_; > test/g15/0=_;1=_; Thank you very much for trying this out. Reinette
Hi Peter, On 7/26/2024 5:48 PM, Peter Newman wrote: > Hi Babu, > > On Wed, Jul 3, 2024 at 2:50 PM Babu Moger <babu.moger@amd.com> wrote: >> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> index 4f47f52e01c2..b3d3fa048f15 100644 >> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c >> @@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid) >> return !test_bit(closid, &closid_free_map); >> } >> >> +/* >> + * Counter bitmap and its length for tracking available counters. >> + * ABMC feature provides set of hardware counters for enabling events. >> + * Each event takes one hardware counter. Kernel needs to keep track >> + * of number of available counters. >> + */ >> +static unsigned long mbm_cntrs_free_map; >> +static unsigned int mbm_cntrs_free_map_len; > > If counter assignment is supported at a per-domain granularity, then > counter id allocation needs to be done per-domain rather than > globally. For example, if I free a counter from one group in a > particular domain, it should be available to allocate to another group > only in that domain. Yes. I noticed the problem. > > When I attempt this using the current series, the resulting behavior > is quite interesting. I noticed Reinette also commented on this later > in the series, noticing that counters are only allocated permanently > to groups and never move as a result of writing to mbm_control. Working on fixing it right now. We need to have bitmap at group level(global) as well as at domain level. We need to set/clear bits at both the places when assigned/unassigned. If all the domsins are cleared then we need to free the counter at group level. Will address it v6. Thanks for the comments. > > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=tl;2=tl;3=tl;4=tl;5=tl;6=tl;7=tl;8=tl;9=tl;10=tl;11=tl;12=tl;13=tl;14=tl;15=tl;16=tl;17=tl;18=tl;19=tl;20=tl;21=tl;22=tl;23=tl;24=tl;25=tl;26=tl;27=tl;28=tl;29=tl;30=tl;31=tl; > test/g15/0=_;1=_;2=_;3=_;4=_;5=_;6=_;7=_;8=_;9=_;10=_;11=_;12=_;13=_;14=_;15=_;16=_;17=_;18=_;19=_;20=_;21=_;22=_;23=_;24=_;25=_;26=_;27=_;28=_;29=_;30=_;31=_; > > [domains 2-31 omitted for clarity below] > > # echo 'test/g14/1-t' > info/L3_MON/mbm_control > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=l; > test/g15/0=_;1=_; > > # echo "test/g15/1+t" > info/L3_MON/mbm_control > # grep 'g1[45]' info/L3_MON/mbm_control > test/g14/0=tl;1=_; > test/g15/0=_;1=_; > > > -Peter > -- - Babu Moger
Hi Babu,
On 7/3/24 2:48 PM, Babu Moger wrote:
> Hardware provides a set of counters when the ABMC feature is supported.
> These counters are used for enabling the events in resctrl group when
> the feature is enabled.
>
> Introduce mbm_cntrs_free_map bitmap to track available and free counters.
>
> Signed-off-by: Babu Moger <babu.moger@amd.com>
> ---
> v5:
> Updated the comments and commit log.
> Few renames
> num_cntrs_free_map -> mbm_cntrs_free_map
> num_cntrs_init -> mbm_cntrs_init
> Added initialization in rdt_get_tree because the default ABMC
> enablement happens during the init.
>
> v4: Changed the name to num_cntrs where applicable.
> Used bitmap apis.
> Added more comments for the globals.
>
> v3: Changed the bitmap name to assign_cntrs_free_map. Removed abmc
> from the name.
>
> v2: Changed the bitmap name to assignable_counter_free_map from
> abmc_counter_free_map.
> ---
> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 29 ++++++++++++++++++++++++--
> 1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> index 4f47f52e01c2..b3d3fa048f15 100644
> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
> @@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid)
> return !test_bit(closid, &closid_free_map);
> }
>
> +/*
> + * Counter bitmap and its length for tracking available counters.
> + * ABMC feature provides set of hardware counters for enabling events.
> + * Each event takes one hardware counter. Kernel needs to keep track
What is meant with "Kernel" here? It looks to be the fs code but the
implementation has both fs and arch code reaching into the counter
management. This should not be the case, either the fs code or the
arch code needs to manage the counters, not both.
> + * of number of available counters.
> + */
> +static unsigned long mbm_cntrs_free_map;
With the lengths involved this needs a proper DECLARE_BITMAP()
> +static unsigned int mbm_cntrs_free_map_len;
> +
> +static void mbm_cntrs_init(void)
> +{
> + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> +
> + bitmap_fill(&mbm_cntrs_free_map, r->mon.num_mbm_cntrs);
> + mbm_cntrs_free_map_len = r->mon.num_mbm_cntrs;
> +}
> +
> /**
> * rdtgroup_mode_by_closid - Return mode of resource group with closid
> * @closid: closid if the resource group
> @@ -2466,6 +2483,12 @@ static int _resctrl_abmc_enable(struct rdt_resource *r, bool enable)
> {
> struct rdt_mon_domain *d;
>
> + /*
> + * Clear all the previous assignments while switching the monitor
> + * mode.
> + */
> + mbm_cntrs_init();
> +
If the counters are managed by fs code then the arch code should not be
doing this. If needed the fs code should init the counters before calling
the arch helpers.
> /*
> * Hardware counters will reset after switching the monitor mode.
> * Reset the architectural state so that reading of hardware
> @@ -2724,10 +2747,10 @@ static void schemata_list_destroy(void)
>
> static int rdt_get_tree(struct fs_context *fc)
> {
> + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> struct rdt_fs_context *ctx = rdt_fc2context(fc);
> unsigned long flags = RFTYPE_CTRL_BASE;
> struct rdt_mon_domain *dom;
> - struct rdt_resource *r;
> int ret;
>
> cpus_read_lock();
> @@ -2756,6 +2779,9 @@ static int rdt_get_tree(struct fs_context *fc)
>
> closid_init();
>
> + if (r->mon.abmc_capable)
> + mbm_cntrs_init();
> +
> if (resctrl_arch_mon_capable())
> flags |= RFTYPE_MON;
>
> @@ -2800,7 +2826,6 @@ static int rdt_get_tree(struct fs_context *fc)
> resctrl_mounted = true;
>
> if (is_mbm_enabled()) {
> - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
> list_for_each_entry(dom, &r->mon_domains, hdr.list)
> mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
> RESCTRL_PICK_ANY_CPU);
Reinette
Hi Reinette,
On 7/12/24 17:07, Reinette Chatre wrote:
> Hi Babu,
>
> On 7/3/24 2:48 PM, Babu Moger wrote:
>> Hardware provides a set of counters when the ABMC feature is supported.
>> These counters are used for enabling the events in resctrl group when
>> the feature is enabled.
>>
>> Introduce mbm_cntrs_free_map bitmap to track available and free counters.
>>
>> Signed-off-by: Babu Moger <babu.moger@amd.com>
>> ---
>> v5:
>> Updated the comments and commit log.
>> Few renames
>> num_cntrs_free_map -> mbm_cntrs_free_map
>> num_cntrs_init -> mbm_cntrs_init
>> Added initialization in rdt_get_tree because the default ABMC
>> enablement happens during the init.
>>
>> v4: Changed the name to num_cntrs where applicable.
>> Used bitmap apis.
>> Added more comments for the globals.
>>
>> v3: Changed the bitmap name to assign_cntrs_free_map. Removed abmc
>> from the name.
>>
>> v2: Changed the bitmap name to assignable_counter_free_map from
>> abmc_counter_free_map.
>> ---
>> arch/x86/kernel/cpu/resctrl/rdtgroup.c | 29 ++++++++++++++++++++++++--
>> 1 file changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> index 4f47f52e01c2..b3d3fa048f15 100644
>> --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
>> @@ -185,6 +185,23 @@ bool closid_allocated(unsigned int closid)
>> return !test_bit(closid, &closid_free_map);
>> }
>> +/*
>> + * Counter bitmap and its length for tracking available counters.
>> + * ABMC feature provides set of hardware counters for enabling events.
>> + * Each event takes one hardware counter. Kernel needs to keep track
>
> What is meant with "Kernel" here? It looks to be the fs code but the
> implementation has both fs and arch code reaching into the counter
> management. This should not be the case, either the fs code or the
> arch code needs to manage the counters, not both.
Yes. This needs to be done at FS code.
>
>> + * of number of available counters.
>> + */
>> +static unsigned long mbm_cntrs_free_map;
>
> With the lengths involved this needs a proper DECLARE_BITMAP()
Sure.
>
>> +static unsigned int mbm_cntrs_free_map_len;
>> +
>> +static void mbm_cntrs_init(void)
>> +{
>> + struct rdt_resource *r =
>> &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
>> +
>> + bitmap_fill(&mbm_cntrs_free_map, r->mon.num_mbm_cntrs);
>> + mbm_cntrs_free_map_len = r->mon.num_mbm_cntrs;
>> +}
>> +
>> /**
>> * rdtgroup_mode_by_closid - Return mode of resource group with closid
>> * @closid: closid if the resource group
>> @@ -2466,6 +2483,12 @@ static int _resctrl_abmc_enable(struct
>> rdt_resource *r, bool enable)
>> {
>> struct rdt_mon_domain *d;
>> + /*
>> + * Clear all the previous assignments while switching the monitor
>> + * mode.
>> + */
>> + mbm_cntrs_init();
>> +
>
> If the counters are managed by fs code then the arch code should not be
> doing this. If needed the fs code should init the counters before calling
> the arch helpers.
Yes. We cannot make this call from here. I need to move this call to FS
layer before coming to this code. Thanks for good point.
>
>> /*
>> * Hardware counters will reset after switching the monitor mode.
>> * Reset the architectural state so that reading of hardware
>> @@ -2724,10 +2747,10 @@ static void schemata_list_destroy(void)
>> static int rdt_get_tree(struct fs_context *fc)
>> {
>> + struct rdt_resource *r =
>> &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
>> struct rdt_fs_context *ctx = rdt_fc2context(fc);
>> unsigned long flags = RFTYPE_CTRL_BASE;
>> struct rdt_mon_domain *dom;
>> - struct rdt_resource *r;
>> int ret;
>> cpus_read_lock();
>> @@ -2756,6 +2779,9 @@ static int rdt_get_tree(struct fs_context *fc)
>> closid_init();
>> + if (r->mon.abmc_capable)
>> + mbm_cntrs_init();
>> +
>> if (resctrl_arch_mon_capable())
>> flags |= RFTYPE_MON;
>> @@ -2800,7 +2826,6 @@ static int rdt_get_tree(struct fs_context *fc)
>> resctrl_mounted = true;
>> if (is_mbm_enabled()) {
>> - r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
>> list_for_each_entry(dom, &r->mon_domains, hdr.list)
>> mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
>> RESCTRL_PICK_ANY_CPU);
>
> Reinette
>
--
Thanks
Babu Moger
© 2016 - 2026 Red Hat, Inc.