[PATCH v7 26/31] fs/resctrl: Fix life-cycle of closid_num_dirty_rmid

Tony Luck posted 31 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH v7 26/31] fs/resctrl: Fix life-cycle of closid_num_dirty_rmid
Posted by Tony Luck 2 months, 3 weeks ago
closid_num_dirty_rmid is specific to the L3 resource, but it
is allocated/freed in the more generic dom_data_{init,exit}().

Add helpers to allocate/free closid_num_dirty_rmid.

Rename resctrl_mon_resource_init() to resctrl_mon_l3_resource_init()
and call the closid_num_dirty_rmid_init() here, instead of
allocating in dom_data_init().

Making matching changes to the exit path by renaming
resctrl_mon_resource_exit() to resctrl_mon_l3_resource_exit()
and free closid_num_dirty_rmid here instead of in dom_data_exit().

Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/resctrl/internal.h |  6 ++--
 fs/resctrl/monitor.c  | 69 ++++++++++++++++++++++++-------------------
 fs/resctrl/rdtgroup.c | 12 ++++----
 3 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
index 56fdccb39375..28d505efdb7c 100644
--- a/fs/resctrl/internal.h
+++ b/fs/resctrl/internal.h
@@ -358,7 +358,9 @@ int alloc_rmid(u32 closid);
 
 void free_rmid(u32 closid, u32 rmid);
 
-void resctrl_mon_resource_exit(void);
+int resctrl_mon_l3_resource_init(void);
+
+void resctrl_mon_l3_resource_exit(void);
 
 void mon_event_count(void *info);
 
@@ -368,8 +370,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
 		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
 		    cpumask_t *cpumask, struct mon_evt *evt, int first);
 
-int resctrl_mon_resource_init(void);
-
 void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom,
 				unsigned long delay_ms,
 				int exclude_cpu);
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 92798e1fb5b0..e3eceba70713 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -86,6 +86,37 @@ unsigned int resctrl_rmid_realloc_threshold;
  */
 unsigned int resctrl_rmid_realloc_limit;
 
+static int closid_num_dirty_rmid_init(struct rdt_resource *r)
+{
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
+	    !closid_num_dirty_rmid) {
+		u32 num_closid = resctrl_arch_get_num_closid(r);
+		u32 *tmp;
+
+		/*
+		 * If the architecture hasn't provided a sanitised value here,
+		 * this may result in larger arrays than necessary. Resctrl will
+		 * use a smaller system wide value based on the resources in
+		 * use.
+		 */
+		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
+		if (!tmp)
+			return -ENOMEM;
+
+		closid_num_dirty_rmid = tmp;
+	}
+
+	return 0;
+}
+
+static void closid_num_dirty_rmid_exit(void)
+{
+	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
+		kfree(closid_num_dirty_rmid);
+		closid_num_dirty_rmid = NULL;
+	}
+}
+
 /*
  * x86 and arm64 differ in their handling of monitoring.
  * x86's RMID are independent numbers, there is only one source of traffic
@@ -805,36 +836,14 @@ void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long del
 static int dom_data_init(struct rdt_resource *r)
 {
 	u32 idx_limit = resctrl_arch_system_num_rmid_idx();
-	u32 num_closid = resctrl_arch_get_num_closid(r);
 	struct rmid_entry *entry = NULL;
 	int err = 0, i;
 	u32 idx;
 
 	mutex_lock(&rdtgroup_mutex);
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		u32 *tmp;
-
-		/*
-		 * If the architecture hasn't provided a sanitised value here,
-		 * this may result in larger arrays than necessary. Resctrl will
-		 * use a smaller system wide value based on the resources in
-		 * use.
-		 */
-		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
-		if (!tmp) {
-			err = -ENOMEM;
-			goto out_unlock;
-		}
-
-		closid_num_dirty_rmid = tmp;
-	}
 
 	rmid_ptrs = kcalloc(idx_limit, sizeof(struct rmid_entry), GFP_KERNEL);
 	if (!rmid_ptrs) {
-		if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-			kfree(closid_num_dirty_rmid);
-			closid_num_dirty_rmid = NULL;
-		}
 		err = -ENOMEM;
 		goto out_unlock;
 	}
@@ -870,11 +879,6 @@ static void dom_data_exit(struct rdt_resource *r)
 	if (!r->mon_capable)
 		goto out_unlock;
 
-	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
-		kfree(closid_num_dirty_rmid);
-		closid_num_dirty_rmid = NULL;
-	}
-
 	kfree(rmid_ptrs);
 	rmid_ptrs = NULL;
 
@@ -938,7 +942,7 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
 }
 
 /**
- * resctrl_mon_resource_init() - Initialise global monitoring structures.
+ * resctrl_mon_l3_resource_init() - Initialise global monitoring structures.
  *
  * Allocate and initialise global monitor resources that do not belong to a
  * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
@@ -949,7 +953,7 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
  *
  * Returns 0 for success, or -ENOMEM.
  */
-int resctrl_mon_resource_init(void)
+int resctrl_mon_l3_resource_init(void)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
 	int ret;
@@ -957,6 +961,10 @@ int resctrl_mon_resource_init(void)
 	if (!r->mon_capable)
 		return 0;
 
+	ret = closid_num_dirty_rmid_init(r);
+	if (ret)
+		return ret;
+
 	ret = dom_data_init(r);
 	if (ret)
 		return ret;
@@ -980,9 +988,10 @@ int resctrl_mon_resource_init(void)
 	return 0;
 }
 
-void resctrl_mon_resource_exit(void)
+void resctrl_mon_l3_resource_exit(void)
 {
 	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
 
+	closid_num_dirty_rmid_exit();
 	dom_data_exit(r);
 }
diff --git a/fs/resctrl/rdtgroup.c b/fs/resctrl/rdtgroup.c
index 9e4df213906f..b45f3d63c629 100644
--- a/fs/resctrl/rdtgroup.c
+++ b/fs/resctrl/rdtgroup.c
@@ -4114,7 +4114,7 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *h
  * Called when the first CPU of a domain comes online, regardless of whether
  * the filesystem is mounted.
  * During boot this may be called before global allocations have been made by
- * resctrl_mon_resource_init().
+ * resctrl_mon_l3_resource_init().
  *
  * Returns 0 for success, or -ENOMEM.
  */
@@ -4298,13 +4298,13 @@ int resctrl_init(void)
 
 	thread_throttle_mode_init();
 
-	ret = resctrl_mon_resource_init();
+	ret = resctrl_mon_l3_resource_init();
 	if (ret)
 		return ret;
 
 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
 	if (ret) {
-		resctrl_mon_resource_exit();
+		resctrl_mon_l3_resource_exit();
 		return ret;
 	}
 
@@ -4339,7 +4339,7 @@ int resctrl_init(void)
 
 cleanup_mountpoint:
 	sysfs_remove_mount_point(fs_kobj, "resctrl");
-	resctrl_mon_resource_exit();
+	resctrl_mon_l3_resource_exit();
 
 	return ret;
 }
@@ -4375,7 +4375,7 @@ static bool resctrl_online_domains_exist(void)
  * When called by the architecture code, all CPUs and resctrl domains must be
  * offline. This ensures the limbo and overflow handlers are not scheduled to
  * run, meaning the data structures they access can be freed by
- * resctrl_mon_resource_exit().
+ * resctrl_mon_l3_resource_exit().
  *
  * After resctrl_exit() returns, the architecture code should return an
  * error from all resctrl_arch_ functions that can do this.
@@ -4402,5 +4402,5 @@ void resctrl_exit(void)
 	 * it can be used to umount resctrl.
 	 */
 
-	resctrl_mon_resource_exit();
+	resctrl_mon_l3_resource_exit();
 }
-- 
2.50.0
Re: [PATCH v7 26/31] fs/resctrl: Fix life-cycle of closid_num_dirty_rmid
Posted by Reinette Chatre 2 months, 1 week ago
Hi Tony,

"Fix" is a loaded word to use in a patch subject and its use cannot be
justified for a non-functional change such as this.

Do not make false claims of code being broken as patch motivation.

On 7/11/25 4:53 PM, Tony Luck wrote:
> closid_num_dirty_rmid is specific to the L3 resource, but it
> is allocated/freed in the more generic dom_data_{init,exit}().

Quite bold make this argument when "the more generic
dom_data_{init,exit}()" is only called with L3 resource as argument.

This is a very straight forward change but the description so far
totally obfuscates this.

This patch does two things:
a) Rename resctrl_mon_resource_init()/resctrl_mon_resource_exit() to
   resctrl_mon_l3_resource_init()/resctrl_mon_l3_resource_exit()
   respectively. As mentioned earlier, this can be done as part of
   earlier patch that does the renaming.
b) Separate closid_num_dirty_rmid and rmid_ptrs[] allocation done in
   dom_data_init() in preparation for rmid_ptrs[] to be allocated on
   resctrl mount in support of the new telemetry events.

> 
> Add helpers to allocate/free closid_num_dirty_rmid.
> 
> Rename resctrl_mon_resource_init() to resctrl_mon_l3_resource_init()
> and call the closid_num_dirty_rmid_init() here, instead of
> allocating in dom_data_init().
> 
> Making matching changes to the exit path by renaming
> resctrl_mon_resource_exit() to resctrl_mon_l3_resource_exit()
> and free closid_num_dirty_rmid here instead of in dom_data_exit().
> 
> Suggested-by: Reinette Chatre <reinette.chatre@intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  fs/resctrl/internal.h |  6 ++--
>  fs/resctrl/monitor.c  | 69 ++++++++++++++++++++++++-------------------
>  fs/resctrl/rdtgroup.c | 12 ++++----
>  3 files changed, 48 insertions(+), 39 deletions(-)
> 
> diff --git a/fs/resctrl/internal.h b/fs/resctrl/internal.h
> index 56fdccb39375..28d505efdb7c 100644
> --- a/fs/resctrl/internal.h
> +++ b/fs/resctrl/internal.h
> @@ -358,7 +358,9 @@ int alloc_rmid(u32 closid);
>  
>  void free_rmid(u32 closid, u32 rmid);
>  
> -void resctrl_mon_resource_exit(void);
> +int resctrl_mon_l3_resource_init(void);
> +
> +void resctrl_mon_l3_resource_exit(void);
>  
>  void mon_event_count(void *info);
>  
> @@ -368,8 +370,6 @@ void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
>  		    struct rdt_domain_hdr *hdr, struct rdtgroup *rdtgrp,
>  		    cpumask_t *cpumask, struct mon_evt *evt, int first);
>  
> -int resctrl_mon_resource_init(void);
> -
>  void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom,
>  				unsigned long delay_ms,
>  				int exclude_cpu);
> diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
> index 92798e1fb5b0..e3eceba70713 100644
> --- a/fs/resctrl/monitor.c
> +++ b/fs/resctrl/monitor.c
> @@ -86,6 +86,37 @@ unsigned int resctrl_rmid_realloc_threshold;
>   */
>  unsigned int resctrl_rmid_realloc_limit;
>  
> +static int closid_num_dirty_rmid_init(struct rdt_resource *r)

It is not clear to me that these new helpers are needed. To me it seems
easier to follow if they are just open coded.

> +{
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
> +	    !closid_num_dirty_rmid) {
> +		u32 num_closid = resctrl_arch_get_num_closid(r);
> +		u32 *tmp;
> +
> +		/*
> +		 * If the architecture hasn't provided a sanitised value here,
> +		 * this may result in larger arrays than necessary. Resctrl will
> +		 * use a smaller system wide value based on the resources in
> +		 * use.
> +		 */
> +		tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
> +		if (!tmp)
> +			return -ENOMEM;
> +
> +		closid_num_dirty_rmid = tmp;
> +	}
> +
> +	return 0;
> +}
> +
> +static void closid_num_dirty_rmid_exit(void)
> +{
> +	if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
> +		kfree(closid_num_dirty_rmid);
> +		closid_num_dirty_rmid = NULL;
> +	}
> +}
> +
>  /*
>   * x86 and arm64 differ in their handling of monitoring.
>   * x86's RMID are independent numbers, there is only one source of traffic

...

> @@ -938,7 +942,7 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
>  }
>  
>  /**
> - * resctrl_mon_resource_init() - Initialise global monitoring structures.
> + * resctrl_mon_l3_resource_init() - Initialise global monitoring structures.
>   *
>   * Allocate and initialise global monitor resources that do not belong to a
>   * specific domain. i.e. the rmid_ptrs[] used for the limbo and free lists.
> @@ -949,7 +953,7 @@ bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
>   *
>   * Returns 0 for success, or -ENOMEM.
>   */
> -int resctrl_mon_resource_init(void)
> +int resctrl_mon_l3_resource_init(void)
>  {
>  	struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
>  	int ret;
> @@ -957,6 +961,10 @@ int resctrl_mon_resource_init(void)
>  	if (!r->mon_capable)
>  		return 0;
>  
> +	ret = closid_num_dirty_rmid_init(r);
> +	if (ret)
> +		return ret;
> +
>  	ret = dom_data_init(r);
>  	if (ret)

Leaking closid_num_dirty_rmid here?

>  		return ret;

Reinette