Resctrl uses local memory bandwidth event as input to the feedback
loop when the mba_MBps mount option is used. This means that this
mount option cannot be used on systems that only support monitoring
of total bandwidth.
Prepare to allow users to choose the input event independently for
each ctrl_mon group.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/resctrl.h | 2 ++
arch/x86/kernel/cpu/resctrl/internal.h | 2 ++
arch/x86/kernel/cpu/resctrl/core.c | 3 +++
arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++++++
4 files changed, 13 insertions(+)
diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index d94abba1c716..fd05b937e2f4 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -49,6 +49,8 @@ enum resctrl_event_id {
QOS_L3_MBM_LOCAL_EVENT_ID = 0x03,
};
+extern enum resctrl_event_id mba_mbps_default_event;
+
/**
* struct resctrl_staged_config - parsed configuration to be applied
* @new_ctrl: new ctrl value to be loaded
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index faaff9d64102..485800055a7d 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -283,6 +283,7 @@ struct pseudo_lock_region {
* monitor only or ctrl_mon group
* @mon: mongroup related data
* @mode: mode of resource group
+ * @mba_mbps_event: input monitoring event id when mba_sc is enabled
* @plr: pseudo-locked region
*/
struct rdtgroup {
@@ -295,6 +296,7 @@ struct rdtgroup {
enum rdt_group_type type;
struct mongroup mon;
enum rdtgrp_mode mode;
+ enum resctrl_event_id mba_mbps_event;
struct pseudo_lock_region *plr;
};
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index f3ee5859b69d..94bf559966d6 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -963,6 +963,9 @@ static __init bool get_rdt_mon_resources(void)
if (!rdt_mon_features)
return false;
+ if (is_mbm_local_enabled())
+ mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
+
return !rdt_get_mon_l3_config(r);
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 2b198ef95e1e..a8022bddf9f7 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -65,6 +65,8 @@ static void rdtgroup_destroy_root(void);
struct dentry *debugfs_resctrl;
+enum resctrl_event_id mba_mbps_default_event;
+
static bool resctrl_debug;
void rdt_last_cmd_clear(void)
@@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
rdt_last_cmd_puts("kernfs subdir error\n");
goto out_del_list;
}
+ if (is_mba_sc(NULL))
+ rdtgrp->mba_mbps_event = mba_mbps_default_event;
}
goto out_unlock;
@@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void)
rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
rdtgroup_default.type = RDTCTRL_GROUP;
+ if (supports_mba_mbps())
+ rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
--
2.47.0
Hi Tony, On 11/13/24 4:17 PM, Tony Luck wrote: > Resctrl uses local memory bandwidth event as input to the feedback > loop when the mba_MBps mount option is used. This means that this > mount option cannot be used on systems that only support monitoring > of total bandwidth. > > Prepare to allow users to choose the input event independently for > each ctrl_mon group. The lack of detail on design and implementation leaves a lot for the reader to decipher. For example, * the change appears to create a contract that rdtgroup.mba_mbps_event is only valid if mba_sc is enabled, this is "documented" in the structure member comment but not connected to the rest of implementation, not here nor later in series. * the patch uses *three* different checks to manage new variables: is_mbm_local_enabled(), is_mba_sc(), and supports_mba_mbps(). Reader is left to decipher that all checks are built on is_mbm_local_enabled() and thus it is ok to use these checks before using the value that is only assigned when is_mbm_local_enabled(). * clearly mba_mbps_default_event cannot always have a value making reader wonder if enum resctrl_event_id needs a "0", takes some deciphering to get confidence that its assignment when is_mbm_local_enabled() fits under the contract that values are only value when is_mba_sc() and thus any code following contract by first checking for mba_sc should never encounter a 0. * based on premise of this work reader may consider what happens if system does not support local MBM. more deciphering needed to get confidence that while mba_mbps_default_event will not be set, since is_mba_sc() still depends on local MBM this still fits under contract that mba_mbps_default_event cannot be used in this case. Of course, it may just me that needs more help to understand what a patch is doing while having little insight into what it intends to do. I thought by sharing some of the questions I felt needed to investigated may give some insight into the difficulty a cryptic changelog creates. Review could be helped significantly if the changelog provides insight into the design decisions. ... > @@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, > rdt_last_cmd_puts("kernfs subdir error\n"); > goto out_del_list; > } > + if (is_mba_sc(NULL)) > + rdtgrp->mba_mbps_event = mba_mbps_default_event; > } > > goto out_unlock; > @@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void) > rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; > rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; > rdtgroup_default.type = RDTCTRL_GROUP; > + if (supports_mba_mbps()) > + rdtgroup_default.mba_mbps_event = mba_mbps_default_event; > INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); > > list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); I do not see the default resource group's mba_mbps_event ever being reset. This means that if the user mounts resctrl, changes mba_mbps_event, umount resctrl, remount resctrl, then the default resource group will not have the default mba_mbps_event but whatever was set on previous mount. Is this intended? No mention of this behavior in changelog. Reinette
On Tue, Nov 19, 2024 at 05:08:42PM -0800, Reinette Chatre wrote: > Hi Tony, > > On 11/13/24 4:17 PM, Tony Luck wrote: > > Resctrl uses local memory bandwidth event as input to the feedback > > loop when the mba_MBps mount option is used. This means that this > > mount option cannot be used on systems that only support monitoring > > of total bandwidth. > > > > Prepare to allow users to choose the input event independently for > > each ctrl_mon group. > Hi Reinette, > The lack of detail on design and implementation leaves a lot for the > reader to decipher. For example, > * the change appears to create a contract that rdtgroup.mba_mbps_event > is only valid if mba_sc is enabled, this is "documented" in the > structure member comment but not connected to the rest of implementation, not > here nor later in series. I'll add text documenting this to the commit comment here, and also a comment in the code that defines mba_mbps_default_event. > * the patch uses *three* different checks to manage new variables: > is_mbm_local_enabled(), is_mba_sc(), and supports_mba_mbps(). Reader is > left to decipher that all checks are built on is_mbm_local_enabled() > and thus it is ok to use these checks before using the value that is only > assigned when is_mbm_local_enabled(). With some refactoring I've got that down to just one new additon of "is_mba_sc()" (protecting the assignment of rdtgrp->mba_mbps_event in rdtgroup_mkdir_ctrl_mon(). > * clearly mba_mbps_default_event cannot always have a value making reader wonder > if enum resctrl_event_id needs a "0", takes some deciphering to get confidence > that its assignment when is_mbm_local_enabled() fits under the contract > that values are only value when is_mba_sc() and thus any code following contract by > first checking for mba_sc should never encounter a 0. Documented that mba_mbps_default_event remaining "0" is not an issue in the comment at the definition of mba_mbps_default_event. > * based on premise of this work reader may consider what happens if > system does not support local MBM. more deciphering needed to get confidence > that while mba_mbps_default_event will not be set, since is_mba_sc() still > depends on local MBM this still fits under contract that mba_mbps_default_event > cannot be used in this case. I think I've cleared up this mystery with commit and code comments. > Of course, it may just me that needs more help to understand what a patch is doing > while having little insight into what it intends to do. I thought by sharing some of > the questions I felt needed to investigated may give some insight into the difficulty > a cryptic changelog creates. Review could be helped significantly if the changelog > provides insight into the design decisions. Good point. I've also re-worked the cover letter to provide more information on the problem and solution implemented by this series. > ... > > > @@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, > > rdt_last_cmd_puts("kernfs subdir error\n"); > > goto out_del_list; > > } > > + if (is_mba_sc(NULL)) > > + rdtgrp->mba_mbps_event = mba_mbps_default_event; > > } > > > > goto out_unlock; > > @@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void) > > rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; > > rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; > > rdtgroup_default.type = RDTCTRL_GROUP; > > + if (supports_mba_mbps()) > > + rdtgroup_default.mba_mbps_event = mba_mbps_default_event; > > INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); > > > > list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); > > I do not see the default resource group's mba_mbps_event ever being reset. This means > that if the user mounts resctrl, changes mba_mbps_event, umount resctrl, remount > resctrl, then the default resource group will not have the default mba_mbps_event > but whatever was set on previous mount. Is this intended? No mention of this behavior in > changelog. Good catch. You are correct that a changed event value in the default CTRL_MON group is preserved across unmount/remount. This was not intentional. Moving the initialization of rdtgroup_default.mba_mbps_event into set_mba_sc() fixes this (with the added benefit of removing the supports_mba_mbps() check). > Reinette -Tony
Hi Tony, On 11/21/24 9:33 AM, Luck, Tony wrote: > On Tue, Nov 19, 2024 at 05:08:42PM -0800, Reinette Chatre wrote: >> Hi Tony, >> >> On 11/13/24 4:17 PM, Tony Luck wrote: >>> Resctrl uses local memory bandwidth event as input to the feedback >>> loop when the mba_MBps mount option is used. This means that this >>> mount option cannot be used on systems that only support monitoring >>> of total bandwidth. >>> >>> Prepare to allow users to choose the input event independently for >>> each ctrl_mon group. >> > Hi Reinette, > >> The lack of detail on design and implementation leaves a lot for the >> reader to decipher. For example, >> * the change appears to create a contract that rdtgroup.mba_mbps_event >> is only valid if mba_sc is enabled, this is "documented" in the >> structure member comment but not connected to the rest of implementation, not >> here nor later in series. > > I'll add text documenting this to the commit comment here, and also a > comment in the code that defines mba_mbps_default_event. > >> * the patch uses *three* different checks to manage new variables: >> is_mbm_local_enabled(), is_mba_sc(), and supports_mba_mbps(). Reader is >> left to decipher that all checks are built on is_mbm_local_enabled() >> and thus it is ok to use these checks before using the value that is only >> assigned when is_mbm_local_enabled(). > > With some refactoring I've got that down to just one new additon of > "is_mba_sc()" (protecting the assignment of rdtgrp->mba_mbps_event > in rdtgroup_mkdir_ctrl_mon(). > Just to clarify, I am not stating that there should not be three different checks. Clearly if some initialization is done before resctrl mount then the mba_sc checks may not apply. My goal was to highlight the investigations a reader is forced to do without guidance from the changelog. ... >>> @@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, >>> rdt_last_cmd_puts("kernfs subdir error\n"); >>> goto out_del_list; >>> } >>> + if (is_mba_sc(NULL)) >>> + rdtgrp->mba_mbps_event = mba_mbps_default_event; >>> } >>> >>> goto out_unlock; >>> @@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void) >>> rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; >>> rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; >>> rdtgroup_default.type = RDTCTRL_GROUP; >>> + if (supports_mba_mbps()) >>> + rdtgroup_default.mba_mbps_event = mba_mbps_default_event; >>> INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); >>> >>> list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); >> >> I do not see the default resource group's mba_mbps_event ever being reset. This means >> that if the user mounts resctrl, changes mba_mbps_event, umount resctrl, remount >> resctrl, then the default resource group will not have the default mba_mbps_event >> but whatever was set on previous mount. Is this intended? No mention of this behavior in >> changelog. > > Good catch. You are correct that a changed event value in the default > CTRL_MON group is preserved across unmount/remount. This was not > intentional. Moving the initialization of rdtgroup_default.mba_mbps_event > into set_mba_sc() fixes this (with the added benefit of removing the > supports_mba_mbps() check). I am curious how this will turn out considering the fragmentation of the rdtgroup_default initialization. Reinette
Hi Tony, On 11/13/2024 6:17 PM, Tony Luck wrote: > Resctrl uses local memory bandwidth event as input to the feedback > loop when the mba_MBps mount option is used. This means that this > mount option cannot be used on systems that only support monitoring > of total bandwidth. > > Prepare to allow users to choose the input event independently for > each ctrl_mon group. How about this? Provide users with the ability to select the input event independently for each ctrl_mon group. > > Signed-off-by: Tony Luck <tony.luck@intel.com> > --- > include/linux/resctrl.h | 2 ++ > arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ > arch/x86/kernel/cpu/resctrl/core.c | 3 +++ > arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++++++ > 4 files changed, 13 insertions(+) > > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h > index d94abba1c716..fd05b937e2f4 100644 > --- a/include/linux/resctrl.h > +++ b/include/linux/resctrl.h > @@ -49,6 +49,8 @@ enum resctrl_event_id { > QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, > }; > > +extern enum resctrl_event_id mba_mbps_default_event; > + > /** > * struct resctrl_staged_config - parsed configuration to be applied > * @new_ctrl: new ctrl value to be loaded > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h > index faaff9d64102..485800055a7d 100644 > --- a/arch/x86/kernel/cpu/resctrl/internal.h > +++ b/arch/x86/kernel/cpu/resctrl/internal.h > @@ -283,6 +283,7 @@ struct pseudo_lock_region { > * monitor only or ctrl_mon group > * @mon: mongroup related data > * @mode: mode of resource group > + * @mba_mbps_event: input monitoring event id when mba_sc is enabled > * @plr: pseudo-locked region > */ > struct rdtgroup { > @@ -295,6 +296,7 @@ struct rdtgroup { > enum rdt_group_type type; > struct mongroup mon; > enum rdtgrp_mode mode; > + enum resctrl_event_id mba_mbps_event; > struct pseudo_lock_region *plr; > }; > > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c > index f3ee5859b69d..94bf559966d6 100644 > --- a/arch/x86/kernel/cpu/resctrl/core.c > +++ b/arch/x86/kernel/cpu/resctrl/core.c > @@ -963,6 +963,9 @@ static __init bool get_rdt_mon_resources(void) > if (!rdt_mon_features) > return false; > > + if (is_mbm_local_enabled()) > + mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; Any reason to separate this patch and patch 8? I feel it can be combined. > + > return !rdt_get_mon_l3_config(r); > } > > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > index 2b198ef95e1e..a8022bddf9f7 100644 > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > @@ -65,6 +65,8 @@ static void rdtgroup_destroy_root(void); > > struct dentry *debugfs_resctrl; > > +enum resctrl_event_id mba_mbps_default_event; > + > static bool resctrl_debug; > > void rdt_last_cmd_clear(void) > @@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, > rdt_last_cmd_puts("kernfs subdir error\n"); > goto out_del_list; > } > + if (is_mba_sc(NULL)) > + rdtgrp->mba_mbps_event = mba_mbps_default_event; > } > > goto out_unlock; > @@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void) > rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; > rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; > rdtgroup_default.type = RDTCTRL_GROUP; > + if (supports_mba_mbps()) > + rdtgroup_default.mba_mbps_event = mba_mbps_default_event; > INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); > > list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); -- - Babu Moger
On Fri, Nov 15, 2024 at 10:20:34AM -0600, Moger, Babu wrote: Thanks for looking. Comments below. > Hi Tony, > > On 11/13/2024 6:17 PM, Tony Luck wrote: > > Resctrl uses local memory bandwidth event as input to the feedback > > loop when the mba_MBps mount option is used. This means that this > > mount option cannot be used on systems that only support monitoring > > of total bandwidth. > > > > Prepare to allow users to choose the input event independently for > > each ctrl_mon group. > > How about this? > > Provide users with the ability to select the input event independently for > each ctrl_mon group. That's a description for the series as a whole. This patch doesn't do all the things in that sentence. > > > > > Signed-off-by: Tony Luck <tony.luck@intel.com> > > --- > > include/linux/resctrl.h | 2 ++ > > arch/x86/kernel/cpu/resctrl/internal.h | 2 ++ > > arch/x86/kernel/cpu/resctrl/core.c | 3 +++ > > arch/x86/kernel/cpu/resctrl/rdtgroup.c | 6 ++++++ > > 4 files changed, 13 insertions(+) > > > > diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h > > index d94abba1c716..fd05b937e2f4 100644 > > --- a/include/linux/resctrl.h > > +++ b/include/linux/resctrl.h > > @@ -49,6 +49,8 @@ enum resctrl_event_id { > > QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, > > }; > > +extern enum resctrl_event_id mba_mbps_default_event; > > + > > /** > > * struct resctrl_staged_config - parsed configuration to be applied > > * @new_ctrl: new ctrl value to be loaded > > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h > > index faaff9d64102..485800055a7d 100644 > > --- a/arch/x86/kernel/cpu/resctrl/internal.h > > +++ b/arch/x86/kernel/cpu/resctrl/internal.h > > @@ -283,6 +283,7 @@ struct pseudo_lock_region { > > * monitor only or ctrl_mon group > > * @mon: mongroup related data > > * @mode: mode of resource group > > + * @mba_mbps_event: input monitoring event id when mba_sc is enabled > > * @plr: pseudo-locked region > > */ > > struct rdtgroup { > > @@ -295,6 +296,7 @@ struct rdtgroup { > > enum rdt_group_type type; > > struct mongroup mon; > > enum rdtgrp_mode mode; > > + enum resctrl_event_id mba_mbps_event; > > struct pseudo_lock_region *plr; > > }; > > diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c > > index f3ee5859b69d..94bf559966d6 100644 > > --- a/arch/x86/kernel/cpu/resctrl/core.c > > +++ b/arch/x86/kernel/cpu/resctrl/core.c > > @@ -963,6 +963,9 @@ static __init bool get_rdt_mon_resources(void) > > if (!rdt_mon_features) > > return false; > > + if (is_mbm_local_enabled()) > > + mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID; > > > Any reason to separate this patch and patch 8? I feel it can be combined. patch 8 will set mba_mbps_default_event to QOS_L3_MBM_TOTAL_EVENT_ID on systems witout support for local memory bandwidth monitoring. The rest of the code isn't ready for that until midway through this series when other code has been updated to handle total bandwidth correctly. I may have gone to extremes moving that part out all the way to patch 8. It could potentially happen earlier in the series. > > > + > > return !rdt_get_mon_l3_config(r); > > } > > diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > > index 2b198ef95e1e..a8022bddf9f7 100644 > > --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c > > +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c > > @@ -65,6 +65,8 @@ static void rdtgroup_destroy_root(void); > > struct dentry *debugfs_resctrl; > > +enum resctrl_event_id mba_mbps_default_event; > > + > > static bool resctrl_debug; > > void rdt_last_cmd_clear(void) > > @@ -3611,6 +3613,8 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, > > rdt_last_cmd_puts("kernfs subdir error\n"); > > goto out_del_list; > > } > > + if (is_mba_sc(NULL)) > > + rdtgrp->mba_mbps_event = mba_mbps_default_event; > > } > > goto out_unlock; > > @@ -3970,6 +3974,8 @@ static void __init rdtgroup_setup_default(void) > > rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID; > > rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID; > > rdtgroup_default.type = RDTCTRL_GROUP; > > + if (supports_mba_mbps()) > > + rdtgroup_default.mba_mbps_event = mba_mbps_default_event; > > INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list); > > list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups); > > -- > - Babu Moger -Tony
© 2016 - 2024 Red Hat, Inc.