[PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap

Yu Kuai posted 23 patches 6 months, 4 weeks ago
There is a newer version of this series
[PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Yu Kuai 6 months, 4 weeks ago
From: Yu Kuai <yukuai3@huawei.com>

Currently bitmap_ops is registered while allocating mddev, this is fine
when there is only one bitmap_ops, however, after introduing a new
bitmap_ops, user space need a time window to choose which bitmap_ops to
use while creating new array.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
 1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4eb0c6effd5b..dc4b85f30e13 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
 
 static bool mddev_set_bitmap_ops(struct mddev *mddev)
 {
+	struct bitmap_operations *old = mddev->bitmap_ops;
+	struct md_submodule_head *head;
+
+	if (mddev->bitmap_id == ID_BITMAP_NONE ||
+	    (old && old->head.id == mddev->bitmap_id))
+		return true;
+
 	xa_lock(&md_submodule);
-	mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
+	head = xa_load(&md_submodule, mddev->bitmap_id);
 	xa_unlock(&md_submodule);
 
-	if (!mddev->bitmap_ops) {
-		pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
+	if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
+		pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
 		return false;
 	}
 
+	if (old && old->group)
+		sysfs_remove_group(&mddev->kobj, old->group);
+
+	mddev->bitmap_ops = (void *)head;
+	if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
+	    sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
+		pr_warn("md: cannot register extra bitmap attributes for %s\n",
+			mdname(mddev));
+
 	return true;
 }
 
 static void mddev_clear_bitmap_ops(struct mddev *mddev)
 {
+	if (mddev->bitmap_ops && mddev->bitmap_ops->group)
+		sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
+
 	mddev->bitmap_ops = NULL;
 }
 
 int mddev_init(struct mddev *mddev)
 {
-	mddev->bitmap_id = ID_BITMAP;
-
-	if (!mddev_set_bitmap_ops(mddev))
-		return -EINVAL;
-
 	if (percpu_ref_init(&mddev->active_io, active_io_release,
-			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		mddev_clear_bitmap_ops(mddev);
+			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
 		return -ENOMEM;
-	}
 
 	if (percpu_ref_init(&mddev->writes_pending, no_op,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		mddev_clear_bitmap_ops(mddev);
 		percpu_ref_exit(&mddev->active_io);
 		return -ENOMEM;
 	}
@@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
 	mddev->resync_min = 0;
 	mddev->resync_max = MaxSector;
 	mddev->level = LEVEL_NONE;
+	mddev->bitmap_id = ID_BITMAP;
 
 	INIT_WORK(&mddev->sync_work, md_start_sync);
 	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
@@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
 
 void mddev_destroy(struct mddev *mddev)
 {
-	mddev_clear_bitmap_ops(mddev);
 	percpu_ref_exit(&mddev->active_io);
 	percpu_ref_exit(&mddev->writes_pending);
 }
@@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
 		return ERR_PTR(error);
 	}
 
-	if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
-		if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
-			pr_warn("md: cannot register extra bitmap attributes for %s\n",
-				mdname(mddev));
-
 	kobject_uevent(&mddev->kobj, KOBJ_ADD);
 	mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
 	mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
@@ -6173,6 +6179,26 @@ static void md_safemode_timeout(struct timer_list *t)
 
 static int start_dirty_degraded;
 
+static int md_bitmap_create(struct mddev *mddev)
+{
+	if (mddev->bitmap_id == ID_BITMAP_NONE)
+		return -EINVAL;
+
+	if (!mddev_set_bitmap_ops(mddev))
+		return -ENOENT;
+
+	return mddev->bitmap_ops->create(mddev);
+}
+
+static void md_bitmap_destroy(struct mddev *mddev)
+{
+	if (!md_bitmap_registered(mddev))
+		return;
+
+	mddev->bitmap_ops->destroy(mddev);
+	mddev_clear_bitmap_ops(mddev);
+}
+
 int md_run(struct mddev *mddev)
 {
 	int err;
@@ -6337,9 +6363,9 @@ int md_run(struct mddev *mddev)
 			(unsigned long long)pers->size(mddev, 0, 0) / 2);
 		err = -EINVAL;
 	}
-	if (err == 0 && pers->sync_request && md_bitmap_registered(mddev) &&
+	if (err == 0 && pers->sync_request &&
 	    (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
-		err = mddev->bitmap_ops->create(mddev);
+		err = md_bitmap_create(mddev);
 		if (err)
 			pr_warn("%s: failed to create bitmap (%d)\n",
 				mdname(mddev), err);
@@ -6412,8 +6438,7 @@ int md_run(struct mddev *mddev)
 		pers->free(mddev, mddev->private);
 	mddev->private = NULL;
 	put_pers(pers);
-	if (md_bitmap_registered(mddev))
-		mddev->bitmap_ops->destroy(mddev);
+	md_bitmap_destroy(mddev);
 abort:
 	bioset_exit(&mddev->io_clone_set);
 exit_sync_set:
@@ -6436,7 +6461,7 @@ int do_md_run(struct mddev *mddev)
 	if (md_bitmap_registered(mddev)) {
 		err = mddev->bitmap_ops->load(mddev);
 		if (err) {
-			mddev->bitmap_ops->destroy(mddev);
+			md_bitmap_destroy(mddev);
 			goto out;
 		}
 	}
@@ -6627,8 +6652,7 @@ static void __md_stop(struct mddev *mddev)
 {
 	struct md_personality *pers = mddev->pers;
 
-	if (md_bitmap_registered(mddev))
-		mddev->bitmap_ops->destroy(mddev);
+	md_bitmap_destroy(mddev);
 	mddev_detach(mddev);
 	spin_lock(&mddev->lock);
 	mddev->pers = NULL;
@@ -7408,16 +7432,16 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 	err = 0;
 	if (mddev->pers) {
 		if (fd >= 0) {
-			err = mddev->bitmap_ops->create(mddev);
+			err = md_bitmap_create(mddev);
 			if (!err)
 				err = mddev->bitmap_ops->load(mddev);
 
 			if (err) {
-				mddev->bitmap_ops->destroy(mddev);
+				md_bitmap_destroy(mddev);
 				fd = -1;
 			}
 		} else if (fd < 0) {
-			mddev->bitmap_ops->destroy(mddev);
+			md_bitmap_destroy(mddev);
 		}
 	}
 
@@ -7732,12 +7756,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 				mddev->bitmap_info.default_offset;
 			mddev->bitmap_info.space =
 				mddev->bitmap_info.default_space;
-			rv = mddev->bitmap_ops->create(mddev);
+			rv = md_bitmap_create(mddev);
 			if (!rv)
 				rv = mddev->bitmap_ops->load(mddev);
 
 			if (rv)
-				mddev->bitmap_ops->destroy(mddev);
+				md_bitmap_destroy(mddev);
 		} else {
 			struct md_bitmap_stats stats;
 
@@ -7763,7 +7787,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 				put_cluster_ops(mddev);
 				mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
 			}
-			mddev->bitmap_ops->destroy(mddev);
+			md_bitmap_destroy(mddev);
 			mddev->bitmap_info.offset = 0;
 		}
 	}
-- 
2.39.2
Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Hannes Reinecke 6 months, 3 weeks ago
On 5/24/25 08:13, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently bitmap_ops is registered while allocating mddev, this is fine
> when there is only one bitmap_ops, however, after introduing a new
> bitmap_ops, user space need a time window to choose which bitmap_ops to
> use while creating new array.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>   1 file changed, 55 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4eb0c6effd5b..dc4b85f30e13 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>   
>   static bool mddev_set_bitmap_ops(struct mddev *mddev)
>   {
> +	struct bitmap_operations *old = mddev->bitmap_ops;
> +	struct md_submodule_head *head;
> +
> +	if (mddev->bitmap_id == ID_BITMAP_NONE ||
> +	    (old && old->head.id == mddev->bitmap_id))
> +		return true;
> +
>   	xa_lock(&md_submodule);
> -	mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> +	head = xa_load(&md_submodule, mddev->bitmap_id);
>   	xa_unlock(&md_submodule);
>   
> -	if (!mddev->bitmap_ops) {
> -		pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> +	if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
> +		pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>   		return false;
>   	}
>   
> +	if (old && old->group)
> +		sysfs_remove_group(&mddev->kobj, old->group);
> +
> +	mddev->bitmap_ops = (void *)head;
> +	if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
> +	    sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> +		pr_warn("md: cannot register extra bitmap attributes for %s\n",
> +			mdname(mddev));
> +
>   	return true;
>   }
>   
>   static void mddev_clear_bitmap_ops(struct mddev *mddev)
>   {
> +	if (mddev->bitmap_ops && mddev->bitmap_ops->group)
> +		sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
> +
>   	mddev->bitmap_ops = NULL;
>   }
>   
>   int mddev_init(struct mddev *mddev)
>   {
> -	mddev->bitmap_id = ID_BITMAP;
> -
> -	if (!mddev_set_bitmap_ops(mddev))
> -		return -EINVAL;
> -
>   	if (percpu_ref_init(&mddev->active_io, active_io_release,
> -			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -		mddev_clear_bitmap_ops(mddev);
> +			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
>   		return -ENOMEM;
> -	}
>   
>   	if (percpu_ref_init(&mddev->writes_pending, no_op,
>   			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -		mddev_clear_bitmap_ops(mddev);
>   		percpu_ref_exit(&mddev->active_io);
>   		return -ENOMEM;
>   	}
> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
>   	mddev->resync_min = 0;
>   	mddev->resync_max = MaxSector;
>   	mddev->level = LEVEL_NONE;
> +	mddev->bitmap_id = ID_BITMAP;
>   
>   	INIT_WORK(&mddev->sync_work, md_start_sync);
>   	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>   
>   void mddev_destroy(struct mddev *mddev)
>   {
> -	mddev_clear_bitmap_ops(mddev);
>   	percpu_ref_exit(&mddev->active_io);
>   	percpu_ref_exit(&mddev->writes_pending);
>   }
> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>   		return ERR_PTR(error);
>   	}
>   
> -	if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
> -		if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> -			pr_warn("md: cannot register extra bitmap attributes for %s\n",
> -				mdname(mddev));
> -
>   	kobject_uevent(&mddev->kobj, KOBJ_ADD);
>   	mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
>   	mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");

But now you've killed udev event processing.
Once the 'add' event is sent _all_ sysfs attributes must be present,
otherwise you'll have a race condition where udev is checking for
attributes which are present only later.

So when moving things around ensure to move the kobject_uevent() call, too.

(ideally you would set the sysfs attributes when calling 'add_device()',
but not sure if that's possible here.)

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Yu Kuai 6 months, 3 weeks ago
Hi,

在 2025/05/27 14:13, Hannes Reinecke 写道:
> On 5/24/25 08:13, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently bitmap_ops is registered while allocating mddev, this is fine
>> when there is only one bitmap_ops, however, after introduing a new
>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>> use while creating new array.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>>   1 file changed, 55 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 4eb0c6effd5b..dc4b85f30e13 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>   static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>   {
>> +    struct bitmap_operations *old = mddev->bitmap_ops;
>> +    struct md_submodule_head *head;
>> +
>> +    if (mddev->bitmap_id == ID_BITMAP_NONE ||
>> +        (old && old->head.id == mddev->bitmap_id))
>> +        return true;
>> +
>>       xa_lock(&md_submodule);
>> -    mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> +    head = xa_load(&md_submodule, mddev->bitmap_id);
>>       xa_unlock(&md_submodule);
>> -    if (!mddev->bitmap_ops) {
>> -        pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> +    if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>> +        pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>>           return false;
>>       }
>> +    if (old && old->group)
>> +        sysfs_remove_group(&mddev->kobj, old->group);
>> +
>> +    mddev->bitmap_ops = (void *)head;
>> +    if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
>> +        sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>> +        pr_warn("md: cannot register extra bitmap attributes for %s\n",
>> +            mdname(mddev));
>> +
>>       return true;
>>   }
>>   static void mddev_clear_bitmap_ops(struct mddev *mddev)
>>   {
>> +    if (mddev->bitmap_ops && mddev->bitmap_ops->group)
>> +        sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
>> +
>>       mddev->bitmap_ops = NULL;
>>   }
>>   int mddev_init(struct mddev *mddev)
>>   {
>> -    mddev->bitmap_id = ID_BITMAP;
>> -
>> -    if (!mddev_set_bitmap_ops(mddev))
>> -        return -EINVAL;
>> -
>>       if (percpu_ref_init(&mddev->active_io, active_io_release,
>> -                PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> -        mddev_clear_bitmap_ops(mddev);
>> +                PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
>>           return -ENOMEM;
>> -    }
>>       if (percpu_ref_init(&mddev->writes_pending, no_op,
>>                   PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> -        mddev_clear_bitmap_ops(mddev);
>>           percpu_ref_exit(&mddev->active_io);
>>           return -ENOMEM;
>>       }
>> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
>>       mddev->resync_min = 0;
>>       mddev->resync_max = MaxSector;
>>       mddev->level = LEVEL_NONE;
>> +    mddev->bitmap_id = ID_BITMAP;
>>       INIT_WORK(&mddev->sync_work, md_start_sync);
>>       INIT_WORK(&mddev->del_work, mddev_delayed_delete);
>> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>>   void mddev_destroy(struct mddev *mddev)
>>   {
>> -    mddev_clear_bitmap_ops(mddev);
>>       percpu_ref_exit(&mddev->active_io);
>>       percpu_ref_exit(&mddev->writes_pending);
>>   }
>> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>>           return ERR_PTR(error);
>>       }
>> -    if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
>> -        if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>> -            pr_warn("md: cannot register extra bitmap attributes for 
>> %s\n",
>> -                mdname(mddev));
>> -
>>       kobject_uevent(&mddev->kobj, KOBJ_ADD);
>>       mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, 
>> "array_state");
>>       mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, 
>> "level");
> 
> But now you've killed udev event processing.
> Once the 'add' event is sent _all_ sysfs attributes must be present,
> otherwise you'll have a race condition where udev is checking for
> attributes which are present only later.
> 
> So when moving things around ensure to move the kobject_uevent() call, too.

I do not expect the bitmap entries are checked by udev, otherwise this
set can introduce regressions since the bitmap entries are no longer
existed after using the new biltmap.

And the above KOBJ_ADD uevent is used for mddev->kobj, right? In this
case, we're creating new entries under mddev->kobj, should this be
KOBJ_CHANGE?

Thanks,
Kuai

> 
> (ideally you would set the sysfs attributes when calling 'add_device()',
> but not sure if that's possible here.)
> 
> Cheers,
> 
> Hannes

Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Hannes Reinecke 6 months, 3 weeks ago
On 5/27/25 09:53, Yu Kuai wrote:
> Hi,
> 
> 在 2025/05/27 14:13, Hannes Reinecke 写道:
>> On 5/24/25 08:13, Yu Kuai wrote:
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>> when there is only one bitmap_ops, however, after introduing a new
>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>> use while creating new array.
>>>
[ .. ]
>>> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>>>           return ERR_PTR(error);
>>>       }
>>> -    if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
>>> -        if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>>> -            pr_warn("md: cannot register extra bitmap attributes for 
>>> %s\n",
>>> -                mdname(mddev));
>>> -
>>>       kobject_uevent(&mddev->kobj, KOBJ_ADD);
>>>       mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, 
>>> "array_state");
>>>       mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, 
>>> "level");
>>
>> But now you've killed udev event processing.
>> Once the 'add' event is sent _all_ sysfs attributes must be present,
>> otherwise you'll have a race condition where udev is checking for
>> attributes which are present only later.
>>
>> So when moving things around ensure to move the kobject_uevent() call, 
>> too.
> 
> I do not expect the bitmap entries are checked by udev, otherwise this
> set can introduce regressions since the bitmap entries are no longer
> existed after using the new biltmap.
> 
> And the above KOBJ_ADD uevent is used for mddev->kobj, right? In this
> case, we're creating new entries under mddev->kobj, should this be
> KOBJ_CHANGE?
> 
Yes, please.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Xiao Ni 6 months, 3 weeks ago
On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently bitmap_ops is registered while allocating mddev, this is fine
> when there is only one bitmap_ops, however, after introduing a new
> bitmap_ops, user space need a time window to choose which bitmap_ops to
> use while creating new array.

Could you give more explanation about what the time window is? Is it
between setting llbitmap by bitmap_type and md_bitmap_create?

>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>  1 file changed, 55 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4eb0c6effd5b..dc4b85f30e13 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>
>  static bool mddev_set_bitmap_ops(struct mddev *mddev)
>  {
> +       struct bitmap_operations *old = mddev->bitmap_ops;
> +       struct md_submodule_head *head;
> +
> +       if (mddev->bitmap_id == ID_BITMAP_NONE ||
> +           (old && old->head.id == mddev->bitmap_id))
> +               return true;
> +
>         xa_lock(&md_submodule);
> -       mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> +       head = xa_load(&md_submodule, mddev->bitmap_id);
>         xa_unlock(&md_submodule);
>
> -       if (!mddev->bitmap_ops) {
> -               pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> +       if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
> +               pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>                 return false;
>         }
>
> +       if (old && old->group)
> +               sysfs_remove_group(&mddev->kobj, old->group);

I think you're handling a competition problem here. But I don't know
how the old/old->group is already created when creating an array.
Could you explain this?

Regards
Xiao

> +
> +       mddev->bitmap_ops = (void *)head;
> +       if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
> +           sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> +               pr_warn("md: cannot register extra bitmap attributes for %s\n",
> +                       mdname(mddev));
> +
>         return true;
>  }
>
>  static void mddev_clear_bitmap_ops(struct mddev *mddev)
>  {
> +       if (mddev->bitmap_ops && mddev->bitmap_ops->group)
> +               sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
> +
>         mddev->bitmap_ops = NULL;
>  }
>
>  int mddev_init(struct mddev *mddev)
>  {
> -       mddev->bitmap_id = ID_BITMAP;
> -
> -       if (!mddev_set_bitmap_ops(mddev))
> -               return -EINVAL;
> -
>         if (percpu_ref_init(&mddev->active_io, active_io_release,
> -                           PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -               mddev_clear_bitmap_ops(mddev);
> +                           PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
>                 return -ENOMEM;
> -       }
>
>         if (percpu_ref_init(&mddev->writes_pending, no_op,
>                             PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -               mddev_clear_bitmap_ops(mddev);
>                 percpu_ref_exit(&mddev->active_io);
>                 return -ENOMEM;
>         }
> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
>         mddev->resync_min = 0;
>         mddev->resync_max = MaxSector;
>         mddev->level = LEVEL_NONE;
> +       mddev->bitmap_id = ID_BITMAP;
>
>         INIT_WORK(&mddev->sync_work, md_start_sync);
>         INIT_WORK(&mddev->del_work, mddev_delayed_delete);
> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>
>  void mddev_destroy(struct mddev *mddev)
>  {
> -       mddev_clear_bitmap_ops(mddev);
>         percpu_ref_exit(&mddev->active_io);
>         percpu_ref_exit(&mddev->writes_pending);
>  }
> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>                 return ERR_PTR(error);
>         }
>
> -       if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
> -               if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> -                       pr_warn("md: cannot register extra bitmap attributes for %s\n",
> -                               mdname(mddev));
> -
>         kobject_uevent(&mddev->kobj, KOBJ_ADD);
>         mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
>         mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
> @@ -6173,6 +6179,26 @@ static void md_safemode_timeout(struct timer_list *t)
>
>  static int start_dirty_degraded;
>
> +static int md_bitmap_create(struct mddev *mddev)
> +{
> +       if (mddev->bitmap_id == ID_BITMAP_NONE)
> +               return -EINVAL;
> +
> +       if (!mddev_set_bitmap_ops(mddev))
> +               return -ENOENT;
> +
> +       return mddev->bitmap_ops->create(mddev);
> +}
> +
> +static void md_bitmap_destroy(struct mddev *mddev)
> +{
> +       if (!md_bitmap_registered(mddev))
> +               return;
> +
> +       mddev->bitmap_ops->destroy(mddev);
> +       mddev_clear_bitmap_ops(mddev);
> +}
> +
>  int md_run(struct mddev *mddev)
>  {
>         int err;
> @@ -6337,9 +6363,9 @@ int md_run(struct mddev *mddev)
>                         (unsigned long long)pers->size(mddev, 0, 0) / 2);
>                 err = -EINVAL;
>         }
> -       if (err == 0 && pers->sync_request && md_bitmap_registered(mddev) &&
> +       if (err == 0 && pers->sync_request &&
>             (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
> -               err = mddev->bitmap_ops->create(mddev);
> +               err = md_bitmap_create(mddev);
>                 if (err)
>                         pr_warn("%s: failed to create bitmap (%d)\n",
>                                 mdname(mddev), err);
> @@ -6412,8 +6438,7 @@ int md_run(struct mddev *mddev)
>                 pers->free(mddev, mddev->private);
>         mddev->private = NULL;
>         put_pers(pers);
> -       if (md_bitmap_registered(mddev))
> -               mddev->bitmap_ops->destroy(mddev);
> +       md_bitmap_destroy(mddev);
>  abort:
>         bioset_exit(&mddev->io_clone_set);
>  exit_sync_set:
> @@ -6436,7 +6461,7 @@ int do_md_run(struct mddev *mddev)
>         if (md_bitmap_registered(mddev)) {
>                 err = mddev->bitmap_ops->load(mddev);
>                 if (err) {
> -                       mddev->bitmap_ops->destroy(mddev);
> +                       md_bitmap_destroy(mddev);
>                         goto out;
>                 }
>         }
> @@ -6627,8 +6652,7 @@ static void __md_stop(struct mddev *mddev)
>  {
>         struct md_personality *pers = mddev->pers;
>
> -       if (md_bitmap_registered(mddev))
> -               mddev->bitmap_ops->destroy(mddev);
> +       md_bitmap_destroy(mddev);
>         mddev_detach(mddev);
>         spin_lock(&mddev->lock);
>         mddev->pers = NULL;
> @@ -7408,16 +7432,16 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
>         err = 0;
>         if (mddev->pers) {
>                 if (fd >= 0) {
> -                       err = mddev->bitmap_ops->create(mddev);
> +                       err = md_bitmap_create(mddev);
>                         if (!err)
>                                 err = mddev->bitmap_ops->load(mddev);
>
>                         if (err) {
> -                               mddev->bitmap_ops->destroy(mddev);
> +                               md_bitmap_destroy(mddev);
>                                 fd = -1;
>                         }
>                 } else if (fd < 0) {
> -                       mddev->bitmap_ops->destroy(mddev);
> +                       md_bitmap_destroy(mddev);
>                 }
>         }
>
> @@ -7732,12 +7756,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
>                                 mddev->bitmap_info.default_offset;
>                         mddev->bitmap_info.space =
>                                 mddev->bitmap_info.default_space;
> -                       rv = mddev->bitmap_ops->create(mddev);
> +                       rv = md_bitmap_create(mddev);
>                         if (!rv)
>                                 rv = mddev->bitmap_ops->load(mddev);
>
>                         if (rv)
> -                               mddev->bitmap_ops->destroy(mddev);
> +                               md_bitmap_destroy(mddev);
>                 } else {
>                         struct md_bitmap_stats stats;
>
> @@ -7763,7 +7787,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
>                                 put_cluster_ops(mddev);
>                                 mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
>                         }
> -                       mddev->bitmap_ops->destroy(mddev);
> +                       md_bitmap_destroy(mddev);
>                         mddev->bitmap_info.offset = 0;
>                 }
>         }
> --
> 2.39.2
>
Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Yu Kuai 6 months, 3 weeks ago
Hi,

在 2025/05/26 14:52, Xiao Ni 写道:
> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently bitmap_ops is registered while allocating mddev, this is fine
>> when there is only one bitmap_ops, however, after introduing a new
>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>> use while creating new array.
> 
> Could you give more explanation about what the time window is? Is it
> between setting llbitmap by bitmap_type and md_bitmap_create?

The window after this patch is that user can write the new sysfs after
allocating mddev, and before running the array.
> 
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>>   1 file changed, 55 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 4eb0c6effd5b..dc4b85f30e13 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>
>>   static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>   {
>> +       struct bitmap_operations *old = mddev->bitmap_ops;
>> +       struct md_submodule_head *head;
>> +
>> +       if (mddev->bitmap_id == ID_BITMAP_NONE ||
>> +           (old && old->head.id == mddev->bitmap_id))
>> +               return true;
>> +
>>          xa_lock(&md_submodule);
>> -       mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> +       head = xa_load(&md_submodule, mddev->bitmap_id);
>>          xa_unlock(&md_submodule);
>>
>> -       if (!mddev->bitmap_ops) {
>> -               pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> +       if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>> +               pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>>                  return false;
>>          }
>>
>> +       if (old && old->group)
>> +               sysfs_remove_group(&mddev->kobj, old->group);
> 
> I think you're handling a competition problem here. But I don't know
> how the old/old->group is already created when creating an array.
> Could you explain this?

It's not possible now, this is because I think we want to be able to
switch existing array with old bitmap to new bitmap.

Thanks,
Kuai

> 
> Regards
> Xiao

Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Xiao Ni 6 months, 3 weeks ago
在 2025/5/26 下午3:57, Yu Kuai 写道:
> Hi,
>
> 在 2025/05/26 14:52, Xiao Ni 写道:
>> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>> when there is only one bitmap_ops, however, after introduing a new
>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>> use while creating new array.
>>
>> Could you give more explanation about what the time window is? Is it
>> between setting llbitmap by bitmap_type and md_bitmap_create?
>
> The window after this patch is that user can write the new sysfs after
> allocating mddev, and before running the array.


Thanks for the explanation. Is it ok to add it in the commit log message?

>>
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>>   drivers/md/md.c | 86 
>>> +++++++++++++++++++++++++++++++------------------
>>>   1 file changed, 55 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index 4eb0c6effd5b..dc4b85f30e13 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>>
>>>   static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>>   {
>>> +       struct bitmap_operations *old = mddev->bitmap_ops;
>>> +       struct md_submodule_head *head;
>>> +
>>> +       if (mddev->bitmap_id == ID_BITMAP_NONE ||
>>> +           (old && old->head.id == mddev->bitmap_id))
>>> +               return true;
>>> +
>>>          xa_lock(&md_submodule);
>>> -       mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>>> +       head = xa_load(&md_submodule, mddev->bitmap_id);
>>>          xa_unlock(&md_submodule);
>>>
>>> -       if (!mddev->bitmap_ops) {
>>> -               pr_warn_once("md: can't find bitmap id %d\n", 
>>> mddev->bitmap_id);
>>> +       if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>>> +               pr_err("md: can't find bitmap id %d\n", 
>>> mddev->bitmap_id);
>>>                  return false;
>>>          }
>>>
>>> +       if (old && old->group)
>>> +               sysfs_remove_group(&mddev->kobj, old->group);
>>
>> I think you're handling a competition problem here. But I don't know
>> how the old/old->group is already created when creating an array.
>> Could you explain this?
>
> It's not possible now, this is because I think we want to be able to
> switch existing array with old bitmap to new bitmap.


Can we add the check of old when we really want it?

Regards

Xiao

>
> Thanks,
> Kuai
>
>>
>> Regards
>> Xiao
>

Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Yu Kuai 6 months, 3 weeks ago
Hi,

在 2025/05/27 10:15, Xiao Ni 写道:
> 
> 在 2025/5/26 下午3:57, Yu Kuai 写道:
>> Hi,
>>
>> 在 2025/05/26 14:52, Xiao Ni 写道:
>>> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>>
>>>> From: Yu Kuai <yukuai3@huawei.com>
>>>>
>>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>>> when there is only one bitmap_ops, however, after introduing a new
>>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>>> use while creating new array.
>>>
>>> Could you give more explanation about what the time window is? Is it
>>> between setting llbitmap by bitmap_type and md_bitmap_create?
>>
>> The window after this patch is that user can write the new sysfs after
>> allocating mddev, and before running the array.
> 
> 
> Thanks for the explanation. Is it ok to add it in the commit log message?

ok
> 
>>>
>>>>
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>>   drivers/md/md.c | 86 
>>>> +++++++++++++++++++++++++++++++------------------
>>>>   1 file changed, 55 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>>> index 4eb0c6effd5b..dc4b85f30e13 100644
>>>> --- a/drivers/md/md.c
>>>> +++ b/drivers/md/md.c
>>>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>>>
>>>>   static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>>>   {
>>>> +       struct bitmap_operations *old = mddev->bitmap_ops;
>>>> +       struct md_submodule_head *head;
>>>> +
>>>> +       if (mddev->bitmap_id == ID_BITMAP_NONE ||
>>>> +           (old && old->head.id == mddev->bitmap_id))
>>>> +               return true;
>>>> +
>>>>          xa_lock(&md_submodule);
>>>> -       mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>>>> +       head = xa_load(&md_submodule, mddev->bitmap_id);
>>>>          xa_unlock(&md_submodule);
>>>>
>>>> -       if (!mddev->bitmap_ops) {
>>>> -               pr_warn_once("md: can't find bitmap id %d\n", 
>>>> mddev->bitmap_id);
>>>> +       if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>>>> +               pr_err("md: can't find bitmap id %d\n", 
>>>> mddev->bitmap_id);
>>>>                  return false;
>>>>          }
>>>>
>>>> +       if (old && old->group)
>>>> +               sysfs_remove_group(&mddev->kobj, old->group);
>>>
>>> I think you're handling a competition problem here. But I don't know
>>> how the old/old->group is already created when creating an array.
>>> Could you explain this?
>>
>> It's not possible now, this is because I think we want to be able to
>> switch existing array with old bitmap to new bitmap.
> 
> 
> Can we add the check of old when we really want it?

I'm fine, and there is no doubt we will want it.

Thanks,
Kuai

> 
> Regards
> 
> Xiao
> 
>>
>> Thanks,
>> Kuai
>>
>>>
>>> Regards
>>> Xiao
>>
> 
> 
> .
> 

Re: [PATCH 07/23] md/md-bitmap: delay registration of bitmap_ops until creating bitmap
Posted by Christoph Hellwig 6 months, 3 weeks ago
Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>