[v8] Enhance sysfs handling for memory hotplug in weighted interleave

[PATCH v8 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs

Posted by Rakie Kim 9 months, 4 weeks ago

Memory leaks occurred when removing sysfs attributes for weighted
interleave. Improper kobject deallocation led to unreleased memory
when initialization failed or when nodes were removed.

This patch resolves the issue by replacing unnecessary `kfree()`
calls with proper `kobject_del()` and `kobject_put()` sequences,
ensuring correct teardown and preventing memory leaks.

By explicitly calling `kobject_del()` before `kobject_put()`, the
release function is now invoked safely, and internal sysfs state is
correctly cleaned up. This guarantees that the memory associated with
the kobject is fully released and avoids resource leaks, thereby
improving system stability.

Additionally, sysfs_remove_file() is no longer called from the release
function to avoid accessing invalid sysfs state after kobject_del().
All attribute removals are now done before kobject_del(), preventing
WARN_ON() in kernfs and ensuring safe and consistent cleanup of sysfs
entries.

Fixes: dce41f5ae253 ("mm/mempolicy: implement the sysfs-based weighted_interleave interface")
Signed-off-by: Rakie Kim <rakie.kim@sk.com>
Reviewed-by: Gregory Price <gourry@gourry.net>
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 mm/mempolicy.c | 111 +++++++++++++++++++++++++++----------------------
 1 file changed, 61 insertions(+), 50 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b28a1e6ae096..dcf03c389b51 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -3463,8 +3463,8 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
 
 static struct iw_node_attr **node_attrs;
 
-static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
-				  struct kobject *parent)
+static void sysfs_wi_node_delete(struct iw_node_attr *node_attr,
+				 struct kobject *parent)
 {
 	if (!node_attr)
 		return;
@@ -3473,18 +3473,41 @@ static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
 	kfree(node_attr);
 }
 
-static void sysfs_wi_release(struct kobject *wi_kobj)
+static void sysfs_wi_node_delete_all(struct kobject *wi_kobj)
 {
-	int i;
+	int nid;
 
-	for (i = 0; i < nr_node_ids; i++)
-		sysfs_wi_node_release(node_attrs[i], wi_kobj);
-	kobject_put(wi_kobj);
+	for (nid = 0; nid < nr_node_ids; nid++)
+		sysfs_wi_node_delete(node_attrs[nid], wi_kobj);
+}
+
+static void iw_table_free(void)
+{
+	u8 *old;
+
+	mutex_lock(&iw_table_lock);
+	old = rcu_dereference_protected(iw_table,
+					lockdep_is_held(&iw_table_lock));
+	if (old) {
+		rcu_assign_pointer(iw_table, NULL);
+		mutex_unlock(&iw_table_lock);
+
+		synchronize_rcu();
+		kfree(old);
+	} else
+		mutex_unlock(&iw_table_lock);
+}
+
+static void wi_kobj_release(struct kobject *wi_kobj)
+{
+	iw_table_free();
+	kfree(node_attrs);
+	kfree(wi_kobj);
 }
 
 static const struct kobj_type wi_ktype = {
 	.sysfs_ops = &kobj_sysfs_ops,
-	.release = sysfs_wi_release,
+	.release = wi_kobj_release,
 };
 
 static int add_weight_node(int nid, struct kobject *wi_kobj)
@@ -3525,41 +3548,42 @@ static int add_weighted_interleave_group(struct kobject *root_kobj)
 	struct kobject *wi_kobj;
 	int nid, err;
 
+	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
+			     GFP_KERNEL);
+	if (!node_attrs)
+		return -ENOMEM;
+
 	wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
-	if (!wi_kobj)
+	if (!wi_kobj) {
+		kfree(node_attrs);
 		return -ENOMEM;
+	}
 
 	err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
 				   "weighted_interleave");
-	if (err) {
-		kfree(wi_kobj);
-		return err;
-	}
+	if (err)
+		goto err_put_kobj;
 
 	for_each_node_state(nid, N_POSSIBLE) {
 		err = add_weight_node(nid, wi_kobj);
 		if (err) {
 			pr_err("failed to add sysfs [node%d]\n", nid);
-			break;
+			goto err_cleanup_kobj;
 		}
 	}
-	if (err)
-		kobject_put(wi_kobj);
+
 	return 0;
+
+err_cleanup_kobj:
+	sysfs_wi_node_delete_all(wi_kobj);
+	kobject_del(wi_kobj);
+err_put_kobj:
+	kobject_put(wi_kobj);
+	return err;
 }
 
 static void mempolicy_kobj_release(struct kobject *kobj)
 {
-	u8 *old;
-
-	mutex_lock(&iw_table_lock);
-	old = rcu_dereference_protected(iw_table,
-					lockdep_is_held(&iw_table_lock));
-	rcu_assign_pointer(iw_table, NULL);
-	mutex_unlock(&iw_table_lock);
-	synchronize_rcu();
-	kfree(old);
-	kfree(node_attrs);
 	kfree(kobj);
 }
 
@@ -3573,37 +3597,24 @@ static int __init mempolicy_sysfs_init(void)
 	static struct kobject *mempolicy_kobj;
 
 	mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
-	if (!mempolicy_kobj) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-
-	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
-			     GFP_KERNEL);
-	if (!node_attrs) {
-		err = -ENOMEM;
-		goto mempol_out;
-	}
+	if (!mempolicy_kobj)
+		return -ENOMEM;
 
 	err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
 				   "mempolicy");
 	if (err)
-		goto node_out;
+		goto err_put_kobj;
 
 	err = add_weighted_interleave_group(mempolicy_kobj);
-	if (err) {
-		pr_err("mempolicy sysfs structure failed to initialize\n");
-		kobject_put(mempolicy_kobj);
-		return err;
-	}
+	if (err)
+		goto err_del_kobj;
 
-	return err;
-node_out:
-	kfree(node_attrs);
-mempol_out:
-	kfree(mempolicy_kobj);
-err_out:
-	pr_err("failed to add mempolicy kobject to the system\n");
+	return 0;
+
+err_del_kobj:
+	kobject_del(mempolicy_kobj);
+err_put_kobj:
+	kobject_put(mempolicy_kobj);
 	return err;
 }
 
-- 
2.34.1

Re: [PATCH v8 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs

Posted by Dan Williams 9 months, 4 weeks ago

Rakie Kim wrote:
> Memory leaks occurred when removing sysfs attributes for weighted
> interleave. Improper kobject deallocation led to unreleased memory
> when initialization failed or when nodes were removed.
> 
> This patch resolves the issue by replacing unnecessary `kfree()`
> calls with proper `kobject_del()` and `kobject_put()` sequences,
> ensuring correct teardown and preventing memory leaks.
> 
> By explicitly calling `kobject_del()` before `kobject_put()`, the
> release function is now invoked safely, and internal sysfs state is
> correctly cleaned up. This guarantees that the memory associated with
> the kobject is fully released and avoids resource leaks, thereby
> improving system stability.
> 
> Additionally, sysfs_remove_file() is no longer called from the release
> function to avoid accessing invalid sysfs state after kobject_del().
> All attribute removals are now done before kobject_del(), preventing
> WARN_ON() in kernfs and ensuring safe and consistent cleanup of sysfs
> entries.
> 
> Fixes: dce41f5ae253 ("mm/mempolicy: implement the sysfs-based weighted_interleave interface")
> Signed-off-by: Rakie Kim <rakie.kim@sk.com>
> Reviewed-by: Gregory Price <gourry@gourry.net>
> Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> ---
>  mm/mempolicy.c | 111 +++++++++++++++++++++++++++----------------------
>  1 file changed, 61 insertions(+), 50 deletions(-)
> 
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index b28a1e6ae096..dcf03c389b51 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -3463,8 +3463,8 @@ static ssize_t node_store(struct kobject *kobj, struct kobj_attribute *attr,
>  
>  static struct iw_node_attr **node_attrs;
>  
> -static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
> -				  struct kobject *parent)
> +static void sysfs_wi_node_delete(struct iw_node_attr *node_attr,
> +				 struct kobject *parent)
>  {
>  	if (!node_attr)
>  		return;
> @@ -3473,18 +3473,41 @@ static void sysfs_wi_node_release(struct iw_node_attr *node_attr,
>  	kfree(node_attr);
>  }
>  
> -static void sysfs_wi_release(struct kobject *wi_kobj)
> +static void sysfs_wi_node_delete_all(struct kobject *wi_kobj)
>  {
> -	int i;
> +	int nid;
>  
> -	for (i = 0; i < nr_node_ids; i++)
> -		sysfs_wi_node_release(node_attrs[i], wi_kobj);
> -	kobject_put(wi_kobj);
> +	for (nid = 0; nid < nr_node_ids; nid++)
> +		sysfs_wi_node_delete(node_attrs[nid], wi_kobj);
> +}
> +
> +static void iw_table_free(void)
> +{
> +	u8 *old;
> +
> +	mutex_lock(&iw_table_lock);
> +	old = rcu_dereference_protected(iw_table,
> +					lockdep_is_held(&iw_table_lock));
> +	if (old) {
> +		rcu_assign_pointer(iw_table, NULL);
> +		mutex_unlock(&iw_table_lock);
> +
> +		synchronize_rcu();
> +		kfree(old);
> +	} else
> +		mutex_unlock(&iw_table_lock);

This looks correct. I personally would not have spent the effort to
avoid the synchronize_rcu() because this is an error path that rarely
gets triggered, and kfree(NULL) is already a nop, so no pressing need to be
careful there either:

	mutex_lock(&iw_table_lock);
	old = rcu_dereference_protected(iw_table,
					lockdep_is_held(&iw_table_lock));
	rcu_assign_pointer(iw_table, NULL);
	mutex_unlock(&iw_table_lock);
	synchronize_rcu();
	kfree(old);

> +}
> +
> +static void wi_kobj_release(struct kobject *wi_kobj)
> +{
> +	iw_table_free();

This memory can be freed as soon as node_attrs have been deleted. By
waiting until final wi_kobj release it confuses the lifetime rules.

> +	kfree(node_attrs);

This memory too can be freed as soon as the attributes are deleted.

...the rationale for considering these additional cleanups below:

> +	kfree(wi_kobj);
>  }
>  
>  static const struct kobj_type wi_ktype = {
>  	.sysfs_ops = &kobj_sysfs_ops,
> -	.release = sysfs_wi_release,
> +	.release = wi_kobj_release,
>  };
>  
>  static int add_weight_node(int nid, struct kobject *wi_kobj)
> @@ -3525,41 +3548,42 @@ static int add_weighted_interleave_group(struct kobject *root_kobj)
>  	struct kobject *wi_kobj;
>  	int nid, err;
>  
> +	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> +			     GFP_KERNEL);
> +	if (!node_attrs)
> +		return -ENOMEM;
> +
>  	wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> -	if (!wi_kobj)
> +	if (!wi_kobj) {
> +		kfree(node_attrs);
>  		return -ENOMEM;
> +	}
>  
>  	err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
>  				   "weighted_interleave");

If you fix wi_kobj_release() to stop being responsible to free memory
that should have been handled in the delete path (@node_attrs,
iw_table_free()), then you can also drop the wi_ktype and
wi_kobj_release() callback altogether.

I.e. once releasing @wi_kobj is just "kfree(wi_kobj)", then this
sequence:

	wi_kobj = kzalloc(...)
	kobject_init_and_add(wi_kob, &wi_ktype, ...)

Can simply become:

	wi_kobj = kobject_create_and_add("weighted_interleave", root_kobj);

> -	if (err) {
> -		kfree(wi_kobj);
> -		return err;
> -	}
> +	if (err)
> +		goto err_put_kobj;
>  
>  	for_each_node_state(nid, N_POSSIBLE) {
>  		err = add_weight_node(nid, wi_kobj);
>  		if (err) {
>  			pr_err("failed to add sysfs [node%d]\n", nid);
> -			break;
> +			goto err_cleanup_kobj;
>  		}
>  	}
> -	if (err)
> -		kobject_put(wi_kobj);
> +
>  	return 0;
> +
> +err_cleanup_kobj:
> +	sysfs_wi_node_delete_all(wi_kobj);
> +	kobject_del(wi_kobj);
> +err_put_kobj:
> +	kobject_put(wi_kobj);
> +	return err;
>  }
>  
>  static void mempolicy_kobj_release(struct kobject *kobj)
>  {
> -	u8 *old;
> -
> -	mutex_lock(&iw_table_lock);
> -	old = rcu_dereference_protected(iw_table,
> -					lockdep_is_held(&iw_table_lock));
> -	rcu_assign_pointer(iw_table, NULL);
> -	mutex_unlock(&iw_table_lock);
> -	synchronize_rcu();
> -	kfree(old);
> -	kfree(node_attrs);
>  	kfree(kobj);
>  }
>  
> @@ -3573,37 +3597,24 @@ static int __init mempolicy_sysfs_init(void)
>  	static struct kobject *mempolicy_kobj;
>  
>  	mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
> -	if (!mempolicy_kobj) {
> -		err = -ENOMEM;
> -		goto err_out;
> -	}
> -
> -	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> -			     GFP_KERNEL);
> -	if (!node_attrs) {
> -		err = -ENOMEM;
> -		goto mempol_out;
> -	}
> +	if (!mempolicy_kobj)
> +		return -ENOMEM;
>  
>  	err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
>  				   "mempolicy");

Similar comment as above, now that mempolicy_kobj_release() is simply
kfree(@kobj), you can use kobject_create_and_add() and make this all
that much simpler.

So the patch looks technically correct as is, but if you make those
final cleanups I will add my review tag.

Re: [PATCH v8 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs

Posted by Rakie Kim 9 months, 4 weeks ago

On Wed, 16 Apr 2025 14:54:16 -0700 Dan Williams <dan.j.williams@intel.com> wrote:
> Rakie Kim wrote:
> > +
> > +static void iw_table_free(void)
> > +{
> > +	u8 *old;
> > +
> > +	mutex_lock(&iw_table_lock);
> > +	old = rcu_dereference_protected(iw_table,
> > +					lockdep_is_held(&iw_table_lock));
> > +	if (old) {
> > +		rcu_assign_pointer(iw_table, NULL);
> > +		mutex_unlock(&iw_table_lock);
> > +
> > +		synchronize_rcu();
> > +		kfree(old);
> > +	} else
> > +		mutex_unlock(&iw_table_lock);
> 
> This looks correct. I personally would not have spent the effort to
> avoid the synchronize_rcu() because this is an error path that rarely
> gets triggered, and kfree(NULL) is already a nop, so no pressing need to be
> careful there either:
> 
> 	mutex_lock(&iw_table_lock);
> 	old = rcu_dereference_protected(iw_table,
> 					lockdep_is_held(&iw_table_lock));
> 	rcu_assign_pointer(iw_table, NULL);
> 	mutex_unlock(&iw_table_lock);
> 	synchronize_rcu();
> 	kfree(old);

I will modify the structure as you suggested.

> 
> > +}
> > +
> > +static void wi_kobj_release(struct kobject *wi_kobj)
> > +{
> > +	iw_table_free();
> 
> This memory can be freed as soon as node_attrs have been deleted. By
> waiting until final wi_kobj release it confuses the lifetime rules.
> 
> > +	kfree(node_attrs);
> 
> This memory too can be freed as soon as the attributes are deleted.
> 
> ...the rationale for considering these additional cleanups below:
> 

I created a new function named wi_cleanup(), as you proposed.
static void wi_cleanup(struct kobject *wi_kobj) {
	sysfs_wi_node_delete_all(wi_kobj);
	iw_table_free();
	kfree(node_attrs);
}
And I changed the error handling code to call this function.
static int add_weighted_interleave_group(struct kobject *root_kobj)
{
...
err_cleanup_kobj:
	wi_cleanup(wi_kobj);
	kobject_del(wi_kobj);
err_put_kobj:
	kobject_put(wi_kobj);
	return err;
}

However, I have one question.
With this change, iw_table and node_attrs will not be freed at system
shutdown. Is it acceptable to leave this memory unfreed on shutdown?
(As you previously noted, the sysfs files in this patch are also
not removed during system shutdown.)

> > +	kfree(wi_kobj);
> >  }
> >  
> >  static const struct kobj_type wi_ktype = {
> >  	.sysfs_ops = &kobj_sysfs_ops,
> > -	.release = sysfs_wi_release,
> > +	.release = wi_kobj_release,
> >  };
> >  
> >  static int add_weight_node(int nid, struct kobject *wi_kobj)
> > @@ -3525,41 +3548,42 @@ static int add_weighted_interleave_group(struct kobject *root_kobj)
> >  	struct kobject *wi_kobj;
> >  	int nid, err;
> >  
> > +	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> > +			     GFP_KERNEL);
> > +	if (!node_attrs)
> > +		return -ENOMEM;
> > +
> >  	wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> > -	if (!wi_kobj)
> > +	if (!wi_kobj) {
> > +		kfree(node_attrs);
> >  		return -ENOMEM;
> > +	}
> >  
> >  	err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
> >  				   "weighted_interleave");
> 
> If you fix wi_kobj_release() to stop being responsible to free memory
> that should have been handled in the delete path (@node_attrs,
> iw_table_free()), then you can also drop the wi_ktype and
> wi_kobj_release() callback altogether.

I understand your suggestion about simplifying the kobject
handling.
If we only consider Patch1, then replacing kobject_init_and_add
with kobject_create_and_add would be the right choice.

However, in Patch2, the code changes as follows:
struct sysfs_wi_group {
	struct kobject wi_kobj;
	struct iw_node_attr *nattrs[];
};
static struct sysfs_wi_group *wi_group;
...
static void wi_kobj_release(struct kobject *wi_kobj)
{
	kfree(wi_group);
}
...
static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
{
	int nid, err;

	wi_group = kzalloc(struct_size(wi_group, nattrs, nr_node_ids),
			   GFP_KERNEL);

In this case, wi_kobj_release() is responsible for freeing the
container struct wi_group that includes the kobject.
Therefore, it seems more appropriate to use kobject_init_and_add
in this context.
I would appreciate your thoughts on this.

> 
> I.e. once releasing @wi_kobj is just "kfree(wi_kobj)", then this
> sequence:
> 
> 	wi_kobj = kzalloc(...)
> 	kobject_init_and_add(wi_kob, &wi_ktype, ...)
> 
> Can simply become:
> 
> 	wi_kobj = kobject_create_and_add("weighted_interleave", root_kobj);
> 
> > -	if (err) {
> > -		kfree(wi_kobj);
> > -		return err;
> > -	}
> > +	if (err)
> > +		goto err_put_kobj;
> >  
> >  	for_each_node_state(nid, N_POSSIBLE) {
> >  		err = add_weight_node(nid, wi_kobj);
> >  		if (err) {
> >  			pr_err("failed to add sysfs [node%d]\n", nid);
> > -			break;
> > +			goto err_cleanup_kobj;
> >  		}
> >  	}
> > -	if (err)
> > -		kobject_put(wi_kobj);
> > +
> >  	return 0;
> > +
> > +err_cleanup_kobj:
> > +	sysfs_wi_node_delete_all(wi_kobj);
> > +	kobject_del(wi_kobj);
> > +err_put_kobj:
> > +	kobject_put(wi_kobj);
> > +	return err;
> >  }
> >  
> >  static void mempolicy_kobj_release(struct kobject *kobj)
> >  {
> > -	u8 *old;
> > -
> > -	mutex_lock(&iw_table_lock);
> > -	old = rcu_dereference_protected(iw_table,
> > -					lockdep_is_held(&iw_table_lock));
> > -	rcu_assign_pointer(iw_table, NULL);
> > -	mutex_unlock(&iw_table_lock);
> > -	synchronize_rcu();
> > -	kfree(old);
> > -	kfree(node_attrs);
> >  	kfree(kobj);
> >  }
> >  
> > @@ -3573,37 +3597,24 @@ static int __init mempolicy_sysfs_init(void)
> >  	static struct kobject *mempolicy_kobj;
> >  
> >  	mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
> > -	if (!mempolicy_kobj) {
> > -		err = -ENOMEM;
> > -		goto err_out;
> > -	}
> > -
> > -	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> > -			     GFP_KERNEL);
> > -	if (!node_attrs) {
> > -		err = -ENOMEM;
> > -		goto mempol_out;
> > -	}
> > +	if (!mempolicy_kobj)
> > +		return -ENOMEM;
> >  
> >  	err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
> >  				   "mempolicy");
> 
> Similar comment as above, now that mempolicy_kobj_release() is simply
> kfree(@kobj), you can use kobject_create_and_add() and make this all
> that much simpler.

For the mempolicy_kobj, I will update the code as you suggested
and use kobject_create_and_add().

With all your recommendations applied, Patch1 would now look like this:
@@ -3488,20 +3488,21 @@ static void iw_table_free(void)
        mutex_lock(&iw_table_lock);
        old = rcu_dereference_protected(iw_table,
                                        lockdep_is_held(&iw_table_lock));
-       if (old) {
-               rcu_assign_pointer(iw_table, NULL);
-               mutex_unlock(&iw_table_lock);
+       rcu_assign_pointer(iw_table, NULL);
+       mutex_unlock(&iw_table_lock);

-               synchronize_rcu();
-               kfree(old);
-       } else
-               mutex_unlock(&iw_table_lock);
+       synchronize_rcu();
+       kfree(old);
 }

-static void wi_kobj_release(struct kobject *wi_kobj)
-{
+static void wi_cleanup(struct kobject *wi_kobj) {
+       sysfs_wi_node_delete_all(wi_kobj);
        iw_table_free();
        kfree(node_attrs);
+}
+
+static void wi_kobj_release(struct kobject *wi_kobj)
+{
        kfree(wi_kobj);
 }

@@ -3575,45 +3576,30 @@ static int add_weighted_interleave_group(struct kobject *root_kobj)
        return 0;

 err_cleanup_kobj:
-       sysfs_wi_node_delete_all(wi_kobj);
+       wi_cleanup(wi_kobj);
        kobject_del(wi_kobj);
 err_put_kobj:
        kobject_put(wi_kobj);
        return err;
 }

-static void mempolicy_kobj_release(struct kobject *kobj)
-{
-       kfree(kobj);
-}
-
-static const struct kobj_type mempolicy_ktype = {
-       .release = mempolicy_kobj_release
-};
-
 static int __init mempolicy_sysfs_init(void)
 {
        int err;
        static struct kobject *mempolicy_kobj;

-       mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
+       mempolicy_kobj = kobject_create_and_add("mempolicy", mm_kobj);
        if (!mempolicy_kobj)
                return -ENOMEM;

-       err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
-                                  "mempolicy");
-       if (err)
-               goto err_put_kobj;
-
        err = add_weighted_interleave_group(mempolicy_kobj);
        if (err)
-               goto err_del_kobj;
+               goto err_kobj;

        return 0;

-err_del_kobj:
+err_kobj:
        kobject_del(mempolicy_kobj);
-err_put_kobj:
        kobject_put(mempolicy_kobj);
        return err;
 }

Rakie

> 
> So the patch looks technically correct as is, but if you make those
> final cleanups I will add my review tag.

Re: [PATCH v8 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs

Posted by Dan Williams 9 months, 4 weeks ago

Rakie Kim wrote:
> On Wed, 16 Apr 2025 14:54:16 -0700 Dan Williams <dan.j.williams@intel.com> wrote:
> > Rakie Kim wrote:
> > > +
> > > +static void iw_table_free(void)
> > > +{
> > > +	u8 *old;
> > > +
> > > +	mutex_lock(&iw_table_lock);
> > > +	old = rcu_dereference_protected(iw_table,
> > > +					lockdep_is_held(&iw_table_lock));
> > > +	if (old) {
> > > +		rcu_assign_pointer(iw_table, NULL);
> > > +		mutex_unlock(&iw_table_lock);
> > > +
> > > +		synchronize_rcu();
> > > +		kfree(old);
> > > +	} else
> > > +		mutex_unlock(&iw_table_lock);
> > 
> > This looks correct. I personally would not have spent the effort to
> > avoid the synchronize_rcu() because this is an error path that rarely
> > gets triggered, and kfree(NULL) is already a nop, so no pressing need to be
> > careful there either:
> > 
> > 	mutex_lock(&iw_table_lock);
> > 	old = rcu_dereference_protected(iw_table,
> > 					lockdep_is_held(&iw_table_lock));
> > 	rcu_assign_pointer(iw_table, NULL);
> > 	mutex_unlock(&iw_table_lock);
> > 	synchronize_rcu();
> > 	kfree(old);
> 
> I will modify the structure as you suggested.
> 
> > 
> > > +}
> > > +
> > > +static void wi_kobj_release(struct kobject *wi_kobj)
> > > +{
> > > +	iw_table_free();
> > 
> > This memory can be freed as soon as node_attrs have been deleted. By
> > waiting until final wi_kobj release it confuses the lifetime rules.
> > 
> > > +	kfree(node_attrs);
> > 
> > This memory too can be freed as soon as the attributes are deleted.
> > 
> > ...the rationale for considering these additional cleanups below:
> > 
> 
> I created a new function named wi_cleanup(), as you proposed.
> static void wi_cleanup(struct kobject *wi_kobj) {
> 	sysfs_wi_node_delete_all(wi_kobj);
> 	iw_table_free();
> 	kfree(node_attrs);

Looks good.

> }
> And I changed the error handling code to call this function.
> static int add_weighted_interleave_group(struct kobject *root_kobj)
> {
> ...
> err_cleanup_kobj:
> 	wi_cleanup(wi_kobj);
> 	kobject_del(wi_kobj);
> err_put_kobj:
> 	kobject_put(wi_kobj);
> 	return err;
> }
> 
> However, I have one question.
> With this change, iw_table and node_attrs will not be freed at system
> shutdown. Is it acceptable to leave this memory unfreed on shutdown?
> (As you previously noted, the sysfs files in this patch are also
> not removed during system shutdown.)

Yes, and note that most drivers do not implement a ->shutdown() handler
which means most drivers leak allocations from ->probe() when the system
is shut down.

> > > +	kfree(wi_kobj);
> > >  }
> > >  
> > >  static const struct kobj_type wi_ktype = {
> > >  	.sysfs_ops = &kobj_sysfs_ops,
> > > -	.release = sysfs_wi_release,
> > > +	.release = wi_kobj_release,
> > >  };
> > >  
> > >  static int add_weight_node(int nid, struct kobject *wi_kobj)
> > > @@ -3525,41 +3548,42 @@ static int add_weighted_interleave_group(struct kobject *root_kobj)
> > >  	struct kobject *wi_kobj;
> > >  	int nid, err;
> > >  
> > > +	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> > > +			     GFP_KERNEL);
> > > +	if (!node_attrs)
> > > +		return -ENOMEM;
> > > +
> > >  	wi_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
> > > -	if (!wi_kobj)
> > > +	if (!wi_kobj) {
> > > +		kfree(node_attrs);
> > >  		return -ENOMEM;
> > > +	}
> > >  
> > >  	err = kobject_init_and_add(wi_kobj, &wi_ktype, root_kobj,
> > >  				   "weighted_interleave");
> > 
> > If you fix wi_kobj_release() to stop being responsible to free memory
> > that should have been handled in the delete path (@node_attrs,
> > iw_table_free()), then you can also drop the wi_ktype and
> > wi_kobj_release() callback altogether.
> 
> I understand your suggestion about simplifying the kobject
> handling.
> If we only consider Patch1, then replacing kobject_init_and_add
> with kobject_create_and_add would be the right choice.
> 
> However, in Patch2, the code changes as follows:
> struct sysfs_wi_group {
> 	struct kobject wi_kobj;
> 	struct iw_node_attr *nattrs[];
> };
> static struct sysfs_wi_group *wi_group;
> ...
> static void wi_kobj_release(struct kobject *wi_kobj)
> {
> 	kfree(wi_group);
> }
> ...
> static int __init add_weighted_interleave_group(struct kobject *mempolicy_kobj)
> {
> 	int nid, err;
> 
> 	wi_group = kzalloc(struct_size(wi_group, nattrs, nr_node_ids),
> 			   GFP_KERNEL);
> 
> In this case, wi_kobj_release() is responsible for freeing the
> container struct wi_group that includes the kobject.
> Therefore, it seems more appropriate to use kobject_init_and_add
> in this context.

Ah, ok, I agree with you.

> I would appreciate your thoughts on this.
> 
> > 
> > I.e. once releasing @wi_kobj is just "kfree(wi_kobj)", then this
> > sequence:
> > 
> > 	wi_kobj = kzalloc(...)
> > 	kobject_init_and_add(wi_kob, &wi_ktype, ...)
> > 
> > Can simply become:
> > 
> > 	wi_kobj = kobject_create_and_add("weighted_interleave", root_kobj);
> > 
> > > -	if (err) {
> > > -		kfree(wi_kobj);
> > > -		return err;
> > > -	}
> > > +	if (err)
> > > +		goto err_put_kobj;
> > >  
> > >  	for_each_node_state(nid, N_POSSIBLE) {
> > >  		err = add_weight_node(nid, wi_kobj);
> > >  		if (err) {
> > >  			pr_err("failed to add sysfs [node%d]\n", nid);
> > > -			break;
> > > +			goto err_cleanup_kobj;
> > >  		}
> > >  	}
> > > -	if (err)
> > > -		kobject_put(wi_kobj);
> > > +
> > >  	return 0;
> > > +
> > > +err_cleanup_kobj:
> > > +	sysfs_wi_node_delete_all(wi_kobj);
> > > +	kobject_del(wi_kobj);
> > > +err_put_kobj:
> > > +	kobject_put(wi_kobj);
> > > +	return err;
> > >  }
> > >  
> > >  static void mempolicy_kobj_release(struct kobject *kobj)
> > >  {
> > > -	u8 *old;
> > > -
> > > -	mutex_lock(&iw_table_lock);
> > > -	old = rcu_dereference_protected(iw_table,
> > > -					lockdep_is_held(&iw_table_lock));
> > > -	rcu_assign_pointer(iw_table, NULL);
> > > -	mutex_unlock(&iw_table_lock);
> > > -	synchronize_rcu();
> > > -	kfree(old);
> > > -	kfree(node_attrs);
> > >  	kfree(kobj);
> > >  }
> > >  
> > > @@ -3573,37 +3597,24 @@ static int __init mempolicy_sysfs_init(void)
> > >  	static struct kobject *mempolicy_kobj;
> > >  
> > >  	mempolicy_kobj = kzalloc(sizeof(*mempolicy_kobj), GFP_KERNEL);
> > > -	if (!mempolicy_kobj) {
> > > -		err = -ENOMEM;
> > > -		goto err_out;
> > > -	}
> > > -
> > > -	node_attrs = kcalloc(nr_node_ids, sizeof(struct iw_node_attr *),
> > > -			     GFP_KERNEL);
> > > -	if (!node_attrs) {
> > > -		err = -ENOMEM;
> > > -		goto mempol_out;
> > > -	}
> > > +	if (!mempolicy_kobj)
> > > +		return -ENOMEM;
> > >  
> > >  	err = kobject_init_and_add(mempolicy_kobj, &mempolicy_ktype, mm_kobj,
> > >  				   "mempolicy");
> > 
> > Similar comment as above, now that mempolicy_kobj_release() is simply
> > kfree(@kobj), you can use kobject_create_and_add() and make this all
> > that much simpler.
> 
> For the mempolicy_kobj, I will update the code as you suggested
> and use kobject_create_and_add().
> 
> With all your recommendations applied, Patch1 would now look like this:
[..]

Changes look good.

With those changes you can add:

Reviewed-by: Dan Williams <dan.j.williams@intel.com>

Thanks for all the patience with this!

[PATCH v8 1/3] mm/mempolicy: Fix memory leaks in weighted interleave sysfs
[PATCH v8 2/3] mm/mempolicy: Prepare weighted interleave sysfs for memory hotplug
[PATCH v8 3/3] mm/mempolicy: Support memory hotplug in weighted interleave