From: Yu Kuai <yukuai3@huawei.com>
Currently bitmap_ops is registered while allocating mddev, this is fine
when there is only one bitmap_ops, however, after introduing a new
bitmap_ops, user space need a time window to choose which bitmap_ops to
use while creating new array.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
1 file changed, 55 insertions(+), 31 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4eb0c6effd5b..dc4b85f30e13 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
static bool mddev_set_bitmap_ops(struct mddev *mddev)
{
+ struct bitmap_operations *old = mddev->bitmap_ops;
+ struct md_submodule_head *head;
+
+ if (mddev->bitmap_id == ID_BITMAP_NONE ||
+ (old && old->head.id == mddev->bitmap_id))
+ return true;
+
xa_lock(&md_submodule);
- mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
+ head = xa_load(&md_submodule, mddev->bitmap_id);
xa_unlock(&md_submodule);
- if (!mddev->bitmap_ops) {
- pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
+ if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
+ pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
return false;
}
+ if (old && old->group)
+ sysfs_remove_group(&mddev->kobj, old->group);
+
+ mddev->bitmap_ops = (void *)head;
+ if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
+ sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
+ pr_warn("md: cannot register extra bitmap attributes for %s\n",
+ mdname(mddev));
+
return true;
}
static void mddev_clear_bitmap_ops(struct mddev *mddev)
{
+ if (mddev->bitmap_ops && mddev->bitmap_ops->group)
+ sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
+
mddev->bitmap_ops = NULL;
}
int mddev_init(struct mddev *mddev)
{
- mddev->bitmap_id = ID_BITMAP;
-
- if (!mddev_set_bitmap_ops(mddev))
- return -EINVAL;
-
if (percpu_ref_init(&mddev->active_io, active_io_release,
- PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
- mddev_clear_bitmap_ops(mddev);
+ PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
return -ENOMEM;
- }
if (percpu_ref_init(&mddev->writes_pending, no_op,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
- mddev_clear_bitmap_ops(mddev);
percpu_ref_exit(&mddev->active_io);
return -ENOMEM;
}
@@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
+ mddev->bitmap_id = ID_BITMAP;
INIT_WORK(&mddev->sync_work, md_start_sync);
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
@@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
void mddev_destroy(struct mddev *mddev)
{
- mddev_clear_bitmap_ops(mddev);
percpu_ref_exit(&mddev->active_io);
percpu_ref_exit(&mddev->writes_pending);
}
@@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
return ERR_PTR(error);
}
- if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
- if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
- pr_warn("md: cannot register extra bitmap attributes for %s\n",
- mdname(mddev));
-
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
@@ -6173,6 +6179,26 @@ static void md_safemode_timeout(struct timer_list *t)
static int start_dirty_degraded;
+static int md_bitmap_create(struct mddev *mddev)
+{
+ if (mddev->bitmap_id == ID_BITMAP_NONE)
+ return -EINVAL;
+
+ if (!mddev_set_bitmap_ops(mddev))
+ return -ENOENT;
+
+ return mddev->bitmap_ops->create(mddev);
+}
+
+static void md_bitmap_destroy(struct mddev *mddev)
+{
+ if (!md_bitmap_registered(mddev))
+ return;
+
+ mddev->bitmap_ops->destroy(mddev);
+ mddev_clear_bitmap_ops(mddev);
+}
+
int md_run(struct mddev *mddev)
{
int err;
@@ -6337,9 +6363,9 @@ int md_run(struct mddev *mddev)
(unsigned long long)pers->size(mddev, 0, 0) / 2);
err = -EINVAL;
}
- if (err == 0 && pers->sync_request && md_bitmap_registered(mddev) &&
+ if (err == 0 && pers->sync_request &&
(mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
- err = mddev->bitmap_ops->create(mddev);
+ err = md_bitmap_create(mddev);
if (err)
pr_warn("%s: failed to create bitmap (%d)\n",
mdname(mddev), err);
@@ -6412,8 +6438,7 @@ int md_run(struct mddev *mddev)
pers->free(mddev, mddev->private);
mddev->private = NULL;
put_pers(pers);
- if (md_bitmap_registered(mddev))
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
abort:
bioset_exit(&mddev->io_clone_set);
exit_sync_set:
@@ -6436,7 +6461,7 @@ int do_md_run(struct mddev *mddev)
if (md_bitmap_registered(mddev)) {
err = mddev->bitmap_ops->load(mddev);
if (err) {
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
goto out;
}
}
@@ -6627,8 +6652,7 @@ static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
- if (md_bitmap_registered(mddev))
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
mddev_detach(mddev);
spin_lock(&mddev->lock);
mddev->pers = NULL;
@@ -7408,16 +7432,16 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
err = 0;
if (mddev->pers) {
if (fd >= 0) {
- err = mddev->bitmap_ops->create(mddev);
+ err = md_bitmap_create(mddev);
if (!err)
err = mddev->bitmap_ops->load(mddev);
if (err) {
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
fd = -1;
}
} else if (fd < 0) {
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
}
}
@@ -7732,12 +7756,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
mddev->bitmap_info.default_offset;
mddev->bitmap_info.space =
mddev->bitmap_info.default_space;
- rv = mddev->bitmap_ops->create(mddev);
+ rv = md_bitmap_create(mddev);
if (!rv)
rv = mddev->bitmap_ops->load(mddev);
if (rv)
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
} else {
struct md_bitmap_stats stats;
@@ -7763,7 +7787,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
put_cluster_ops(mddev);
mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
}
- mddev->bitmap_ops->destroy(mddev);
+ md_bitmap_destroy(mddev);
mddev->bitmap_info.offset = 0;
}
}
--
2.39.2
On 5/24/25 08:13, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently bitmap_ops is registered while allocating mddev, this is fine
> when there is only one bitmap_ops, however, after introduing a new
> bitmap_ops, user space need a time window to choose which bitmap_ops to
> use while creating new array.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
> 1 file changed, 55 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4eb0c6effd5b..dc4b85f30e13 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>
> static bool mddev_set_bitmap_ops(struct mddev *mddev)
> {
> + struct bitmap_operations *old = mddev->bitmap_ops;
> + struct md_submodule_head *head;
> +
> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
> + (old && old->head.id == mddev->bitmap_id))
> + return true;
> +
> xa_lock(&md_submodule);
> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> + head = xa_load(&md_submodule, mddev->bitmap_id);
> xa_unlock(&md_submodule);
>
> - if (!mddev->bitmap_ops) {
> - pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
> + pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
> return false;
> }
>
> + if (old && old->group)
> + sysfs_remove_group(&mddev->kobj, old->group);
> +
> + mddev->bitmap_ops = (void *)head;
> + if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
> + sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> + pr_warn("md: cannot register extra bitmap attributes for %s\n",
> + mdname(mddev));
> +
> return true;
> }
>
> static void mddev_clear_bitmap_ops(struct mddev *mddev)
> {
> + if (mddev->bitmap_ops && mddev->bitmap_ops->group)
> + sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
> +
> mddev->bitmap_ops = NULL;
> }
>
> int mddev_init(struct mddev *mddev)
> {
> - mddev->bitmap_id = ID_BITMAP;
> -
> - if (!mddev_set_bitmap_ops(mddev))
> - return -EINVAL;
> -
> if (percpu_ref_init(&mddev->active_io, active_io_release,
> - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> - mddev_clear_bitmap_ops(mddev);
> + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
> return -ENOMEM;
> - }
>
> if (percpu_ref_init(&mddev->writes_pending, no_op,
> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> - mddev_clear_bitmap_ops(mddev);
> percpu_ref_exit(&mddev->active_io);
> return -ENOMEM;
> }
> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
> mddev->resync_min = 0;
> mddev->resync_max = MaxSector;
> mddev->level = LEVEL_NONE;
> + mddev->bitmap_id = ID_BITMAP;
>
> INIT_WORK(&mddev->sync_work, md_start_sync);
> INIT_WORK(&mddev->del_work, mddev_delayed_delete);
> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>
> void mddev_destroy(struct mddev *mddev)
> {
> - mddev_clear_bitmap_ops(mddev);
> percpu_ref_exit(&mddev->active_io);
> percpu_ref_exit(&mddev->writes_pending);
> }
> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
> return ERR_PTR(error);
> }
>
> - if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
> - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> - pr_warn("md: cannot register extra bitmap attributes for %s\n",
> - mdname(mddev));
> -
> kobject_uevent(&mddev->kobj, KOBJ_ADD);
> mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
> mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
But now you've killed udev event processing.
Once the 'add' event is sent _all_ sysfs attributes must be present,
otherwise you'll have a race condition where udev is checking for
attributes which are present only later.
So when moving things around ensure to move the kobject_uevent() call, too.
(ideally you would set the sysfs attributes when calling 'add_device()',
but not sure if that's possible here.)
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Hi,
在 2025/05/27 14:13, Hannes Reinecke 写道:
> On 5/24/25 08:13, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently bitmap_ops is registered while allocating mddev, this is fine
>> when there is only one bitmap_ops, however, after introduing a new
>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>> use while creating new array.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>> 1 file changed, 55 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 4eb0c6effd5b..dc4b85f30e13 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>> static bool mddev_set_bitmap_ops(struct mddev *mddev)
>> {
>> + struct bitmap_operations *old = mddev->bitmap_ops;
>> + struct md_submodule_head *head;
>> +
>> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
>> + (old && old->head.id == mddev->bitmap_id))
>> + return true;
>> +
>> xa_lock(&md_submodule);
>> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> + head = xa_load(&md_submodule, mddev->bitmap_id);
>> xa_unlock(&md_submodule);
>> - if (!mddev->bitmap_ops) {
>> - pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>> + pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> return false;
>> }
>> + if (old && old->group)
>> + sysfs_remove_group(&mddev->kobj, old->group);
>> +
>> + mddev->bitmap_ops = (void *)head;
>> + if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
>> + sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>> + pr_warn("md: cannot register extra bitmap attributes for %s\n",
>> + mdname(mddev));
>> +
>> return true;
>> }
>> static void mddev_clear_bitmap_ops(struct mddev *mddev)
>> {
>> + if (mddev->bitmap_ops && mddev->bitmap_ops->group)
>> + sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
>> +
>> mddev->bitmap_ops = NULL;
>> }
>> int mddev_init(struct mddev *mddev)
>> {
>> - mddev->bitmap_id = ID_BITMAP;
>> -
>> - if (!mddev_set_bitmap_ops(mddev))
>> - return -EINVAL;
>> -
>> if (percpu_ref_init(&mddev->active_io, active_io_release,
>> - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> - mddev_clear_bitmap_ops(mddev);
>> + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
>> return -ENOMEM;
>> - }
>> if (percpu_ref_init(&mddev->writes_pending, no_op,
>> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> - mddev_clear_bitmap_ops(mddev);
>> percpu_ref_exit(&mddev->active_io);
>> return -ENOMEM;
>> }
>> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
>> mddev->resync_min = 0;
>> mddev->resync_max = MaxSector;
>> mddev->level = LEVEL_NONE;
>> + mddev->bitmap_id = ID_BITMAP;
>> INIT_WORK(&mddev->sync_work, md_start_sync);
>> INIT_WORK(&mddev->del_work, mddev_delayed_delete);
>> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>> void mddev_destroy(struct mddev *mddev)
>> {
>> - mddev_clear_bitmap_ops(mddev);
>> percpu_ref_exit(&mddev->active_io);
>> percpu_ref_exit(&mddev->writes_pending);
>> }
>> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>> return ERR_PTR(error);
>> }
>> - if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
>> - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>> - pr_warn("md: cannot register extra bitmap attributes for
>> %s\n",
>> - mdname(mddev));
>> -
>> kobject_uevent(&mddev->kobj, KOBJ_ADD);
>> mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd,
>> "array_state");
>> mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd,
>> "level");
>
> But now you've killed udev event processing.
> Once the 'add' event is sent _all_ sysfs attributes must be present,
> otherwise you'll have a race condition where udev is checking for
> attributes which are present only later.
>
> So when moving things around ensure to move the kobject_uevent() call, too.
I do not expect the bitmap entries are checked by udev, otherwise this
set can introduce regressions since the bitmap entries are no longer
existed after using the new biltmap.
And the above KOBJ_ADD uevent is used for mddev->kobj, right? In this
case, we're creating new entries under mddev->kobj, should this be
KOBJ_CHANGE?
Thanks,
Kuai
>
> (ideally you would set the sysfs attributes when calling 'add_device()',
> but not sure if that's possible here.)
>
> Cheers,
>
> Hannes
On 5/27/25 09:53, Yu Kuai wrote:
> Hi,
>
> 在 2025/05/27 14:13, Hannes Reinecke 写道:
>> On 5/24/25 08:13, Yu Kuai wrote:
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>> when there is only one bitmap_ops, however, after introduing a new
>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>> use while creating new array.
>>>
[ .. ]
>>> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
>>> return ERR_PTR(error);
>>> }
>>> - if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
>>> - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
>>> - pr_warn("md: cannot register extra bitmap attributes for
>>> %s\n",
>>> - mdname(mddev));
>>> -
>>> kobject_uevent(&mddev->kobj, KOBJ_ADD);
>>> mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd,
>>> "array_state");
>>> mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd,
>>> "level");
>>
>> But now you've killed udev event processing.
>> Once the 'add' event is sent _all_ sysfs attributes must be present,
>> otherwise you'll have a race condition where udev is checking for
>> attributes which are present only later.
>>
>> So when moving things around ensure to move the kobject_uevent() call,
>> too.
>
> I do not expect the bitmap entries are checked by udev, otherwise this
> set can introduce regressions since the bitmap entries are no longer
> existed after using the new biltmap.
>
> And the above KOBJ_ADD uevent is used for mddev->kobj, right? In this
> case, we're creating new entries under mddev->kobj, should this be
> KOBJ_CHANGE?
>
Yes, please.
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently bitmap_ops is registered while allocating mddev, this is fine
> when there is only one bitmap_ops, however, after introduing a new
> bitmap_ops, user space need a time window to choose which bitmap_ops to
> use while creating new array.
Could you give more explanation about what the time window is? Is it
between setting llbitmap by bitmap_type and md_bitmap_create?
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
> 1 file changed, 55 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 4eb0c6effd5b..dc4b85f30e13 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>
> static bool mddev_set_bitmap_ops(struct mddev *mddev)
> {
> + struct bitmap_operations *old = mddev->bitmap_ops;
> + struct md_submodule_head *head;
> +
> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
> + (old && old->head.id == mddev->bitmap_id))
> + return true;
> +
> xa_lock(&md_submodule);
> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> + head = xa_load(&md_submodule, mddev->bitmap_id);
> xa_unlock(&md_submodule);
>
> - if (!mddev->bitmap_ops) {
> - pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
> + pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
> return false;
> }
>
> + if (old && old->group)
> + sysfs_remove_group(&mddev->kobj, old->group);
I think you're handling a competition problem here. But I don't know
how the old/old->group is already created when creating an array.
Could you explain this?
Regards
Xiao
> +
> + mddev->bitmap_ops = (void *)head;
> + if (mddev->bitmap_ops && mddev->bitmap_ops->group &&
> + sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> + pr_warn("md: cannot register extra bitmap attributes for %s\n",
> + mdname(mddev));
> +
> return true;
> }
>
> static void mddev_clear_bitmap_ops(struct mddev *mddev)
> {
> + if (mddev->bitmap_ops && mddev->bitmap_ops->group)
> + sysfs_remove_group(&mddev->kobj, mddev->bitmap_ops->group);
> +
> mddev->bitmap_ops = NULL;
> }
>
> int mddev_init(struct mddev *mddev)
> {
> - mddev->bitmap_id = ID_BITMAP;
> -
> - if (!mddev_set_bitmap_ops(mddev))
> - return -EINVAL;
> -
> if (percpu_ref_init(&mddev->active_io, active_io_release,
> - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> - mddev_clear_bitmap_ops(mddev);
> + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
> return -ENOMEM;
> - }
>
> if (percpu_ref_init(&mddev->writes_pending, no_op,
> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> - mddev_clear_bitmap_ops(mddev);
> percpu_ref_exit(&mddev->active_io);
> return -ENOMEM;
> }
> @@ -734,6 +745,7 @@ int mddev_init(struct mddev *mddev)
> mddev->resync_min = 0;
> mddev->resync_max = MaxSector;
> mddev->level = LEVEL_NONE;
> + mddev->bitmap_id = ID_BITMAP;
>
> INIT_WORK(&mddev->sync_work, md_start_sync);
> INIT_WORK(&mddev->del_work, mddev_delayed_delete);
> @@ -744,7 +756,6 @@ EXPORT_SYMBOL_GPL(mddev_init);
>
> void mddev_destroy(struct mddev *mddev)
> {
> - mddev_clear_bitmap_ops(mddev);
> percpu_ref_exit(&mddev->active_io);
> percpu_ref_exit(&mddev->writes_pending);
> }
> @@ -6093,11 +6104,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
> return ERR_PTR(error);
> }
>
> - if (md_bitmap_registered(mddev) && mddev->bitmap_ops->group)
> - if (sysfs_create_group(&mddev->kobj, mddev->bitmap_ops->group))
> - pr_warn("md: cannot register extra bitmap attributes for %s\n",
> - mdname(mddev));
> -
> kobject_uevent(&mddev->kobj, KOBJ_ADD);
> mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state");
> mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level");
> @@ -6173,6 +6179,26 @@ static void md_safemode_timeout(struct timer_list *t)
>
> static int start_dirty_degraded;
>
> +static int md_bitmap_create(struct mddev *mddev)
> +{
> + if (mddev->bitmap_id == ID_BITMAP_NONE)
> + return -EINVAL;
> +
> + if (!mddev_set_bitmap_ops(mddev))
> + return -ENOENT;
> +
> + return mddev->bitmap_ops->create(mddev);
> +}
> +
> +static void md_bitmap_destroy(struct mddev *mddev)
> +{
> + if (!md_bitmap_registered(mddev))
> + return;
> +
> + mddev->bitmap_ops->destroy(mddev);
> + mddev_clear_bitmap_ops(mddev);
> +}
> +
> int md_run(struct mddev *mddev)
> {
> int err;
> @@ -6337,9 +6363,9 @@ int md_run(struct mddev *mddev)
> (unsigned long long)pers->size(mddev, 0, 0) / 2);
> err = -EINVAL;
> }
> - if (err == 0 && pers->sync_request && md_bitmap_registered(mddev) &&
> + if (err == 0 && pers->sync_request &&
> (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
> - err = mddev->bitmap_ops->create(mddev);
> + err = md_bitmap_create(mddev);
> if (err)
> pr_warn("%s: failed to create bitmap (%d)\n",
> mdname(mddev), err);
> @@ -6412,8 +6438,7 @@ int md_run(struct mddev *mddev)
> pers->free(mddev, mddev->private);
> mddev->private = NULL;
> put_pers(pers);
> - if (md_bitmap_registered(mddev))
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> abort:
> bioset_exit(&mddev->io_clone_set);
> exit_sync_set:
> @@ -6436,7 +6461,7 @@ int do_md_run(struct mddev *mddev)
> if (md_bitmap_registered(mddev)) {
> err = mddev->bitmap_ops->load(mddev);
> if (err) {
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> goto out;
> }
> }
> @@ -6627,8 +6652,7 @@ static void __md_stop(struct mddev *mddev)
> {
> struct md_personality *pers = mddev->pers;
>
> - if (md_bitmap_registered(mddev))
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> mddev_detach(mddev);
> spin_lock(&mddev->lock);
> mddev->pers = NULL;
> @@ -7408,16 +7432,16 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
> err = 0;
> if (mddev->pers) {
> if (fd >= 0) {
> - err = mddev->bitmap_ops->create(mddev);
> + err = md_bitmap_create(mddev);
> if (!err)
> err = mddev->bitmap_ops->load(mddev);
>
> if (err) {
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> fd = -1;
> }
> } else if (fd < 0) {
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> }
> }
>
> @@ -7732,12 +7756,12 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
> mddev->bitmap_info.default_offset;
> mddev->bitmap_info.space =
> mddev->bitmap_info.default_space;
> - rv = mddev->bitmap_ops->create(mddev);
> + rv = md_bitmap_create(mddev);
> if (!rv)
> rv = mddev->bitmap_ops->load(mddev);
>
> if (rv)
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> } else {
> struct md_bitmap_stats stats;
>
> @@ -7763,7 +7787,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
> put_cluster_ops(mddev);
> mddev->safemode_delay = DEFAULT_SAFEMODE_DELAY;
> }
> - mddev->bitmap_ops->destroy(mddev);
> + md_bitmap_destroy(mddev);
> mddev->bitmap_info.offset = 0;
> }
> }
> --
> 2.39.2
>
Hi,
在 2025/05/26 14:52, Xiao Ni 写道:
> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently bitmap_ops is registered while allocating mddev, this is fine
>> when there is only one bitmap_ops, however, after introduing a new
>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>> use while creating new array.
>
> Could you give more explanation about what the time window is? Is it
> between setting llbitmap by bitmap_type and md_bitmap_create?
The window after this patch is that user can write the new sysfs after
allocating mddev, and before running the array.
>
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> drivers/md/md.c | 86 +++++++++++++++++++++++++++++++------------------
>> 1 file changed, 55 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 4eb0c6effd5b..dc4b85f30e13 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>
>> static bool mddev_set_bitmap_ops(struct mddev *mddev)
>> {
>> + struct bitmap_operations *old = mddev->bitmap_ops;
>> + struct md_submodule_head *head;
>> +
>> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
>> + (old && old->head.id == mddev->bitmap_id))
>> + return true;
>> +
>> xa_lock(&md_submodule);
>> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> + head = xa_load(&md_submodule, mddev->bitmap_id);
>> xa_unlock(&md_submodule);
>>
>> - if (!mddev->bitmap_ops) {
>> - pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>> + pr_err("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> return false;
>> }
>>
>> + if (old && old->group)
>> + sysfs_remove_group(&mddev->kobj, old->group);
>
> I think you're handling a competition problem here. But I don't know
> how the old/old->group is already created when creating an array.
> Could you explain this?
It's not possible now, this is because I think we want to be able to
switch existing array with old bitmap to new bitmap.
Thanks,
Kuai
>
> Regards
> Xiao
在 2025/5/26 下午3:57, Yu Kuai 写道:
> Hi,
>
> 在 2025/05/26 14:52, Xiao Ni 写道:
>> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>> when there is only one bitmap_ops, however, after introduing a new
>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>> use while creating new array.
>>
>> Could you give more explanation about what the time window is? Is it
>> between setting llbitmap by bitmap_type and md_bitmap_create?
>
> The window after this patch is that user can write the new sysfs after
> allocating mddev, and before running the array.
Thanks for the explanation. Is it ok to add it in the commit log message?
>>
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>> drivers/md/md.c | 86
>>> +++++++++++++++++++++++++++++++------------------
>>> 1 file changed, 55 insertions(+), 31 deletions(-)
>>>
>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>> index 4eb0c6effd5b..dc4b85f30e13 100644
>>> --- a/drivers/md/md.c
>>> +++ b/drivers/md/md.c
>>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>>
>>> static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>> {
>>> + struct bitmap_operations *old = mddev->bitmap_ops;
>>> + struct md_submodule_head *head;
>>> +
>>> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
>>> + (old && old->head.id == mddev->bitmap_id))
>>> + return true;
>>> +
>>> xa_lock(&md_submodule);
>>> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>>> + head = xa_load(&md_submodule, mddev->bitmap_id);
>>> xa_unlock(&md_submodule);
>>>
>>> - if (!mddev->bitmap_ops) {
>>> - pr_warn_once("md: can't find bitmap id %d\n",
>>> mddev->bitmap_id);
>>> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>>> + pr_err("md: can't find bitmap id %d\n",
>>> mddev->bitmap_id);
>>> return false;
>>> }
>>>
>>> + if (old && old->group)
>>> + sysfs_remove_group(&mddev->kobj, old->group);
>>
>> I think you're handling a competition problem here. But I don't know
>> how the old/old->group is already created when creating an array.
>> Could you explain this?
>
> It's not possible now, this is because I think we want to be able to
> switch existing array with old bitmap to new bitmap.
Can we add the check of old when we really want it?
Regards
Xiao
>
> Thanks,
> Kuai
>
>>
>> Regards
>> Xiao
>
Hi,
在 2025/05/27 10:15, Xiao Ni 写道:
>
> 在 2025/5/26 下午3:57, Yu Kuai 写道:
>> Hi,
>>
>> 在 2025/05/26 14:52, Xiao Ni 写道:
>>> On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>>>>
>>>> From: Yu Kuai <yukuai3@huawei.com>
>>>>
>>>> Currently bitmap_ops is registered while allocating mddev, this is fine
>>>> when there is only one bitmap_ops, however, after introduing a new
>>>> bitmap_ops, user space need a time window to choose which bitmap_ops to
>>>> use while creating new array.
>>>
>>> Could you give more explanation about what the time window is? Is it
>>> between setting llbitmap by bitmap_type and md_bitmap_create?
>>
>> The window after this patch is that user can write the new sysfs after
>> allocating mddev, and before running the array.
>
>
> Thanks for the explanation. Is it ok to add it in the commit log message?
ok
>
>>>
>>>>
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>> drivers/md/md.c | 86
>>>> +++++++++++++++++++++++++++++++------------------
>>>> 1 file changed, 55 insertions(+), 31 deletions(-)
>>>>
>>>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>>>> index 4eb0c6effd5b..dc4b85f30e13 100644
>>>> --- a/drivers/md/md.c
>>>> +++ b/drivers/md/md.c
>>>> @@ -674,39 +674,50 @@ static void no_op(struct percpu_ref *r) {}
>>>>
>>>> static bool mddev_set_bitmap_ops(struct mddev *mddev)
>>>> {
>>>> + struct bitmap_operations *old = mddev->bitmap_ops;
>>>> + struct md_submodule_head *head;
>>>> +
>>>> + if (mddev->bitmap_id == ID_BITMAP_NONE ||
>>>> + (old && old->head.id == mddev->bitmap_id))
>>>> + return true;
>>>> +
>>>> xa_lock(&md_submodule);
>>>> - mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>>>> + head = xa_load(&md_submodule, mddev->bitmap_id);
>>>> xa_unlock(&md_submodule);
>>>>
>>>> - if (!mddev->bitmap_ops) {
>>>> - pr_warn_once("md: can't find bitmap id %d\n",
>>>> mddev->bitmap_id);
>>>> + if (WARN_ON_ONCE(!head || head->type != MD_BITMAP)) {
>>>> + pr_err("md: can't find bitmap id %d\n",
>>>> mddev->bitmap_id);
>>>> return false;
>>>> }
>>>>
>>>> + if (old && old->group)
>>>> + sysfs_remove_group(&mddev->kobj, old->group);
>>>
>>> I think you're handling a competition problem here. But I don't know
>>> how the old/old->group is already created when creating an array.
>>> Could you explain this?
>>
>> It's not possible now, this is because I think we want to be able to
>> switch existing array with old bitmap to new bitmap.
>
>
> Can we add the check of old when we really want it?
I'm fine, and there is no doubt we will want it.
Thanks,
Kuai
>
> Regards
>
> Xiao
>
>>
>> Thanks,
>> Kuai
>>
>>>
>>> Regards
>>> Xiao
>>
>
>
> .
>
© 2016 - 2025 Red Hat, Inc.