From: Yu Kuai <yukuai3@huawei.com>
The api will be used by mdadm to set bitmap_ops while creating new array
or assemble array, prepare to add a new bitmap.
Currently available options are:
cat /sys/block/md0/md/bitmap_type
none [bitmap]
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
Documentation/admin-guide/md.rst | 73 ++++++++++++++----------
drivers/md/md.c | 96 ++++++++++++++++++++++++++++++--
drivers/md/md.h | 2 +
3 files changed, 135 insertions(+), 36 deletions(-)
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
index 4ff2cc291d18..356d2a344f08 100644
--- a/Documentation/admin-guide/md.rst
+++ b/Documentation/admin-guide/md.rst
@@ -347,6 +347,49 @@ All md devices contain:
active-idle
like active, but no writes have been seen for a while (safe_mode_delay).
+ consistency_policy
+ This indicates how the array maintains consistency in case of unexpected
+ shutdown. It can be:
+
+ none
+ Array has no redundancy information, e.g. raid0, linear.
+
+ resync
+ Full resync is performed and all redundancy is regenerated when the
+ array is started after unclean shutdown.
+
+ bitmap
+ Resync assisted by a write-intent bitmap.
+
+ journal
+ For raid4/5/6, journal device is used to log transactions and replay
+ after unclean shutdown.
+
+ ppl
+ For raid5 only, Partial Parity Log is used to close the write hole and
+ eliminate resync.
+
+ The accepted values when writing to this file are ``ppl`` and ``resync``,
+ used to enable and disable PPL.
+
+ uuid
+ This indicates the UUID of the array in the following format:
+ xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
+
+ bitmap_type
+ [RW] When read, this file will display the current and available
+ bitmap for this array. The currently active bitmap will be enclosed
+ in [] brackets. Writing an bitmap name or ID to this file will switch
+ control of this array to that new bitmap. Note that writing a new
+ bitmap for created array is forbidden.
+
+ none
+ No bitmap
+ bitmap
+ The default internal bitmap
+
+If bitmap_type is bitmap, then the md device will also contain:
+
bitmap/location
This indicates where the write-intent bitmap for the array is
stored.
@@ -401,36 +444,6 @@ All md devices contain:
once the array becomes non-degraded, and this fact has been
recorded in the metadata.
- consistency_policy
- This indicates how the array maintains consistency in case of unexpected
- shutdown. It can be:
-
- none
- Array has no redundancy information, e.g. raid0, linear.
-
- resync
- Full resync is performed and all redundancy is regenerated when the
- array is started after unclean shutdown.
-
- bitmap
- Resync assisted by a write-intent bitmap.
-
- journal
- For raid4/5/6, journal device is used to log transactions and replay
- after unclean shutdown.
-
- ppl
- For raid5 only, Partial Parity Log is used to close the write hole and
- eliminate resync.
-
- The accepted values when writing to this file are ``ppl`` and ``resync``,
- used to enable and disable PPL.
-
- uuid
- This indicates the UUID of the array in the following format:
- xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
-
-
As component devices are added to an md array, they appear in the ``md``
directory as new directories named::
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 311e52d5173d..4eb0c6effd5b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -672,13 +672,18 @@ static void active_io_release(struct percpu_ref *ref)
static void no_op(struct percpu_ref *r) {}
-static void mddev_set_bitmap_ops(struct mddev *mddev, enum md_submodule_id id)
+static bool mddev_set_bitmap_ops(struct mddev *mddev)
{
xa_lock(&md_submodule);
- mddev->bitmap_ops = xa_load(&md_submodule, id);
+ mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
xa_unlock(&md_submodule);
- if (!mddev->bitmap_ops)
- pr_warn_once("md: can't find bitmap id %d\n", id);
+
+ if (!mddev->bitmap_ops) {
+ pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
+ return false;
+ }
+
+ return true;
}
static void mddev_clear_bitmap_ops(struct mddev *mddev)
@@ -688,8 +693,10 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
int mddev_init(struct mddev *mddev)
{
- /* TODO: support more versions */
- mddev_set_bitmap_ops(mddev, ID_BITMAP);
+ mddev->bitmap_id = ID_BITMAP;
+
+ if (!mddev_set_bitmap_ops(mddev))
+ return -EINVAL;
if (percpu_ref_init(&mddev->active_io, active_io_release,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
@@ -4155,6 +4162,82 @@ new_level_store(struct mddev *mddev, const char *buf, size_t len)
static struct md_sysfs_entry md_new_level =
__ATTR(new_level, 0664, new_level_show, new_level_store);
+static ssize_t
+bitmap_type_show(struct mddev *mddev, char *page)
+{
+ struct md_submodule_head *head;
+ unsigned long i;
+ ssize_t len = 0;
+
+ if (mddev->bitmap_id == ID_BITMAP_NONE)
+ len += sprintf(page + len, "[none] ");
+ else
+ len += sprintf(page + len, "none ");
+
+ xa_lock(&md_submodule);
+ xa_for_each(&md_submodule, i, head) {
+ if (head->type != MD_BITMAP)
+ continue;
+
+ if (mddev->bitmap_id == head->id)
+ len += sprintf(page + len, "[%s] ", head->name);
+ else
+ len += sprintf(page + len, "%s ", head->name);
+ }
+ xa_unlock(&md_submodule);
+
+ len += sprintf(page + len, "\n");
+ return len;
+}
+
+static ssize_t
+bitmap_type_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ struct md_submodule_head *head;
+ enum md_submodule_id id;
+ unsigned long i;
+ int err;
+
+ if (mddev->bitmap_ops)
+ return -EBUSY;
+
+ err = kstrtoint(buf, 10, &id);
+ if (!err) {
+ if (id == ID_BITMAP_NONE) {
+ mddev->bitmap_id = id;
+ return len;
+ }
+
+ xa_lock(&md_submodule);
+ head = xa_load(&md_submodule, id);
+ xa_unlock(&md_submodule);
+
+ if (head && head->type == MD_BITMAP) {
+ mddev->bitmap_id = id;
+ return len;
+ }
+ }
+
+ if (cmd_match(buf, "none")) {
+ mddev->bitmap_id = ID_BITMAP_NONE;
+ return len;
+ }
+
+ xa_lock(&md_submodule);
+ xa_for_each(&md_submodule, i, head) {
+ if (head->type == MD_BITMAP && cmd_match(buf, head->name)) {
+ mddev->bitmap_id = head->id;
+ xa_unlock(&md_submodule);
+ return len;
+ }
+ }
+ xa_unlock(&md_submodule);
+ return -ENOENT;
+}
+
+static struct md_sysfs_entry md_bitmap_type =
+__ATTR(bitmap_type, 0664, bitmap_type_show, bitmap_type_store);
+
static ssize_t
layout_show(struct mddev *mddev, char *page)
{
@@ -5719,6 +5802,7 @@ __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
static struct attribute *md_default_attrs[] = {
&md_level.attr,
&md_new_level.attr,
+ &md_bitmap_type.attr,
&md_layout.attr,
&md_raid_disks.attr,
&md_uuid.attr,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 13e3f9ce1b79..bf34c0a36551 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -40,6 +40,7 @@ enum md_submodule_id {
ID_CLUSTER,
ID_BITMAP,
ID_LLBITMAP, /* TODO */
+ ID_BITMAP_NONE,
};
struct md_submodule_head {
@@ -565,6 +566,7 @@ struct mddev {
struct percpu_ref writes_pending;
int sync_checkers; /* # of threads checking writes_pending */
+ enum md_submodule_id bitmap_id;
void *bitmap; /* the bitmap for the device */
struct bitmap_operations *bitmap_ops;
struct {
--
2.39.2
On 5/24/25 08:13, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
>
> The api will be used by mdadm to set bitmap_ops while creating new array
> or assemble array, prepare to add a new bitmap.
>
> Currently available options are:
>
> cat /sys/block/md0/md/bitmap_type
> none [bitmap]
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> Documentation/admin-guide/md.rst | 73 ++++++++++++++----------
> drivers/md/md.c | 96 ++++++++++++++++++++++++++++++--
> drivers/md/md.h | 2 +
> 3 files changed, 135 insertions(+), 36 deletions(-)
>
[ .. ]
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 311e52d5173d..4eb0c6effd5b 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -672,13 +672,18 @@ static void active_io_release(struct percpu_ref *ref)
>
> static void no_op(struct percpu_ref *r) {}
>
> -static void mddev_set_bitmap_ops(struct mddev *mddev, enum md_submodule_id id)
> +static bool mddev_set_bitmap_ops(struct mddev *mddev)
> {
> xa_lock(&md_submodule);
> - mddev->bitmap_ops = xa_load(&md_submodule, id);
> + mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> xa_unlock(&md_submodule);
> - if (!mddev->bitmap_ops)
> - pr_warn_once("md: can't find bitmap id %d\n", id);
> +
> + if (!mddev->bitmap_ops) {
> + pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> + return false;
> + }
> +
> + return true;
> }
>
> static void mddev_clear_bitmap_ops(struct mddev *mddev)
> @@ -688,8 +693,10 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
>
> int mddev_init(struct mddev *mddev)
> {
> - /* TODO: support more versions */
> - mddev_set_bitmap_ops(mddev, ID_BITMAP);
> + mddev->bitmap_id = ID_BITMAP;
> +
> + if (!mddev_set_bitmap_ops(mddev))
> + return -EINVAL;
>
> if (percpu_ref_init(&mddev->active_io, active_io_release,
> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> @@ -4155,6 +4162,82 @@ new_level_store(struct mddev *mddev, const char *buf, size_t len)
> static struct md_sysfs_entry md_new_level =
> __ATTR(new_level, 0664, new_level_show, new_level_store);
>
> +static ssize_t
> +bitmap_type_show(struct mddev *mddev, char *page)
> +{
> + struct md_submodule_head *head;
> + unsigned long i;
> + ssize_t len = 0;
> +
> + if (mddev->bitmap_id == ID_BITMAP_NONE)
> + len += sprintf(page + len, "[none] ");
> + else
> + len += sprintf(page + len, "none ");
> +
> + xa_lock(&md_submodule);
> + xa_for_each(&md_submodule, i, head) {
> + if (head->type != MD_BITMAP)
> + continue;
> +
> + if (mddev->bitmap_id == head->id)
> + len += sprintf(page + len, "[%s] ", head->name);
> + else
> + len += sprintf(page + len, "%s ", head->name);
> + }
> + xa_unlock(&md_submodule);
> +
> + len += sprintf(page + len, "\n");
> + return len;
> +}
> +
> +static ssize_t
> +bitmap_type_store(struct mddev *mddev, const char *buf, size_t len)
> +{
> + struct md_submodule_head *head;
> + enum md_submodule_id id;
> + unsigned long i;
> + int err;
> +
> + if (mddev->bitmap_ops)
> + return -EBUSY;
> +
Why isn't this protected by md_submodule lock?
The lock is taken when updating ->bitmap_ops, so I would
have expected it to be taken when checking it ...
> + err = kstrtoint(buf, 10, &id);
> + if (!err) {
> + if (id == ID_BITMAP_NONE) {
> + mddev->bitmap_id = id;
> + return len;
> + }
> +
> + xa_lock(&md_submodule);
> + head = xa_load(&md_submodule, id);
> + xa_unlock(&md_submodule);
> +
> + if (head && head->type == MD_BITMAP) {
> + mddev->bitmap_id = id;
> + return len;
> + }
> + }
> +
> + if (cmd_match(buf, "none")) {
> + mddev->bitmap_id = ID_BITMAP_NONE;
> + return len;
> + }
> +
That is odd coding. The 'if (!err)' condition above might
fall through to here, but then we already now that it cannot
match 'none'.
Please invert the logic, first check for 'none', and only
call kstroint if the match failed.
> + xa_lock(&md_submodule);
> + xa_for_each(&md_submodule, i, head) {
> + if (head->type == MD_BITMAP && cmd_match(buf, head->name)) {
> + mddev->bitmap_id = head->id;
> + xa_unlock(&md_submodule);
> + return len;
> + }
> + }
> + xa_unlock(&md_submodule);
> + return -ENOENT;
> +}
> +
> +static struct md_sysfs_entry md_bitmap_type =
> +__ATTR(bitmap_type, 0664, bitmap_type_show, bitmap_type_store);
> +
> static ssize_t
> layout_show(struct mddev *mddev, char *page)
> {
> @@ -5719,6 +5802,7 @@ __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
> static struct attribute *md_default_attrs[] = {
> &md_level.attr,
> &md_new_level.attr,
> + &md_bitmap_type.attr,
> &md_layout.attr,
> &md_raid_disks.attr,
> &md_uuid.attr,
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 13e3f9ce1b79..bf34c0a36551 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -40,6 +40,7 @@ enum md_submodule_id {
> ID_CLUSTER,
> ID_BITMAP,
> ID_LLBITMAP, /* TODO */
> + ID_BITMAP_NONE,
> };
>
> struct md_submodule_head {
> @@ -565,6 +566,7 @@ struct mddev {
> struct percpu_ref writes_pending;
> int sync_checkers; /* # of threads checking writes_pending */
>
> + enum md_submodule_id bitmap_id;
> void *bitmap; /* the bitmap for the device */
> struct bitmap_operations *bitmap_ops;
> struct {
Cheers,
Hannes
--
Dr. Hannes Reinecke Kernel Storage Architect
hare@suse.de +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich
Hi,
在 2025/05/27 14:10, Hannes Reinecke 写道:
> On 5/24/25 08:13, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> The api will be used by mdadm to set bitmap_ops while creating new array
>> or assemble array, prepare to add a new bitmap.
>>
>> Currently available options are:
>>
>> cat /sys/block/md0/md/bitmap_type
>> none [bitmap]
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> Documentation/admin-guide/md.rst | 73 ++++++++++++++----------
>> drivers/md/md.c | 96 ++++++++++++++++++++++++++++++--
>> drivers/md/md.h | 2 +
>> 3 files changed, 135 insertions(+), 36 deletions(-)
>>
> [ .. ]
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 311e52d5173d..4eb0c6effd5b 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -672,13 +672,18 @@ static void active_io_release(struct percpu_ref
>> *ref)
>> static void no_op(struct percpu_ref *r) {}
>> -static void mddev_set_bitmap_ops(struct mddev *mddev, enum
>> md_submodule_id id)
>> +static bool mddev_set_bitmap_ops(struct mddev *mddev)
>> {
>> xa_lock(&md_submodule);
>> - mddev->bitmap_ops = xa_load(&md_submodule, id);
>> + mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> xa_unlock(&md_submodule);
>> - if (!mddev->bitmap_ops)
>> - pr_warn_once("md: can't find bitmap id %d\n", id);
>> +
>> + if (!mddev->bitmap_ops) {
>> + pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> + return false;
>> + }
>> +
>> + return true;
>> }
>> static void mddev_clear_bitmap_ops(struct mddev *mddev)
>> @@ -688,8 +693,10 @@ static void mddev_clear_bitmap_ops(struct mddev
>> *mddev)
>> int mddev_init(struct mddev *mddev)
>> {
>> - /* TODO: support more versions */
>> - mddev_set_bitmap_ops(mddev, ID_BITMAP);
>> + mddev->bitmap_id = ID_BITMAP;
>> +
>> + if (!mddev_set_bitmap_ops(mddev))
>> + return -EINVAL;
>> if (percpu_ref_init(&mddev->active_io, active_io_release,
>> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> @@ -4155,6 +4162,82 @@ new_level_store(struct mddev *mddev, const char
>> *buf, size_t len)
>> static struct md_sysfs_entry md_new_level =
>> __ATTR(new_level, 0664, new_level_show, new_level_store);
>> +static ssize_t
>> +bitmap_type_show(struct mddev *mddev, char *page)
>> +{
>> + struct md_submodule_head *head;
>> + unsigned long i;
>> + ssize_t len = 0;
>> +
>> + if (mddev->bitmap_id == ID_BITMAP_NONE)
>> + len += sprintf(page + len, "[none] ");
>> + else
>> + len += sprintf(page + len, "none ");
>> +
>> + xa_lock(&md_submodule);
>> + xa_for_each(&md_submodule, i, head) {
>> + if (head->type != MD_BITMAP)
>> + continue;
>> +
>> + if (mddev->bitmap_id == head->id)
>> + len += sprintf(page + len, "[%s] ", head->name);
>> + else
>> + len += sprintf(page + len, "%s ", head->name);
>> + }
>> + xa_unlock(&md_submodule);
>> +
>> + len += sprintf(page + len, "\n");
>> + return len;
>> +}
>> +
>> +static ssize_t
>> +bitmap_type_store(struct mddev *mddev, const char *buf, size_t len)
>> +{
>> + struct md_submodule_head *head;
>> + enum md_submodule_id id;
>> + unsigned long i;
>> + int err;
>> +
>> + if (mddev->bitmap_ops)
>> + return -EBUSY;
>> +
> Why isn't this protected by md_submodule lock?
> The lock is taken when updating ->bitmap_ops, so I would
> have expected it to be taken when checking it ...
The design is that when bitmap is created, user can no longer set
bitmap_id, and it's right without the protecting there will be race
window.
>
>> + err = kstrtoint(buf, 10, &id);
>> + if (!err) {
>> + if (id == ID_BITMAP_NONE) {
>> + mddev->bitmap_id = id;
>> + return len;
>> + }
>> +
>> + xa_lock(&md_submodule);
>> + head = xa_load(&md_submodule, id);
>> + xa_unlock(&md_submodule);
>> +
>> + if (head && head->type == MD_BITMAP) {
>> + mddev->bitmap_id = id;
>> + return len;
>> + }
>> + }
>> +
>> + if (cmd_match(buf, "none")) {
>> + mddev->bitmap_id = ID_BITMAP_NONE;
>> + return len;
>> + }
>> +
> That is odd coding. The 'if (!err)' condition above might
> fall through to here, but then we already now that it cannot
> match 'none'.
The first kstrtoint() is trying to convert the input string to int id,
looks like I missed return -EINVAL if the id can't be found in
md_submodule.
> Please invert the logic, first check for 'none', and only
> call kstroint if the match failed.
Sure, this sounds better.
Thanks for the review!
Kuai
>
>> + xa_lock(&md_submodule);
>> + xa_for_each(&md_submodule, i, head) {
>> + if (head->type == MD_BITMAP && cmd_match(buf, head->name)) {
>> + mddev->bitmap_id = head->id;
>> + xa_unlock(&md_submodule);
>> + return len;
>> + }
>> + }
>> + xa_unlock(&md_submodule);
>> + return -ENOENT;
>> +}
>> +
>> +static struct md_sysfs_entry md_bitmap_type =
>> +__ATTR(bitmap_type, 0664, bitmap_type_show, bitmap_type_store);
>> +
>> static ssize_t
>> layout_show(struct mddev *mddev, char *page)
>> {
>> @@ -5719,6 +5802,7 @@ __ATTR(serialize_policy, S_IRUGO | S_IWUSR,
>> serialize_policy_show,
>> static struct attribute *md_default_attrs[] = {
>> &md_level.attr,
>> &md_new_level.attr,
>> + &md_bitmap_type.attr,
>> &md_layout.attr,
>> &md_raid_disks.attr,
>> &md_uuid.attr,
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 13e3f9ce1b79..bf34c0a36551 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -40,6 +40,7 @@ enum md_submodule_id {
>> ID_CLUSTER,
>> ID_BITMAP,
>> ID_LLBITMAP, /* TODO */
>> + ID_BITMAP_NONE,
>> };
>> struct md_submodule_head {
>> @@ -565,6 +566,7 @@ struct mddev {
>> struct percpu_ref writes_pending;
>> int sync_checkers; /* # of threads checking
>> writes_pending */
>> + enum md_submodule_id bitmap_id;
>> void *bitmap; /* the bitmap for the device */
>> struct bitmap_operations *bitmap_ops;
>> struct {
>
> Cheers,
>
> Hannes
On Sat, May 24, 2025 at 02:13:03PM +0800, Yu Kuai wrote:
> + consistency_policy
.. these doc changes look unrelated, or am I missing something?
> -static void mddev_set_bitmap_ops(struct mddev *mddev, enum md_submodule_id id)
> +static bool mddev_set_bitmap_ops(struct mddev *mddev)
> {
> xa_lock(&md_submodule);
> - mddev->bitmap_ops = xa_load(&md_submodule, id);
> + mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> xa_unlock(&md_submodule);
> - if (!mddev->bitmap_ops)
> - pr_warn_once("md: can't find bitmap id %d\n", id);
> +
> + if (!mddev->bitmap_ops) {
> + pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> + return false;
> + }
> +
> + return true;
This also looks unrelated and like another prep patch?
Hi,
在 2025/05/26 14:32, Christoph Hellwig 写道:
> On Sat, May 24, 2025 at 02:13:03PM +0800, Yu Kuai wrote:
>> + consistency_policy
>
> .. these doc changes look unrelated, or am I missing something?
The position are moved to the front of the bitmap fields, because now
bitmap/xxx is not always here.
Before:
All md devices contain:
level
...
bitmap/xxx
bitmap/xxx
consistency_policy
uuid
After:
All md devices contain:
level
...
consistency_policy
uuid
bitmap_type
none xxx
bitmap xxx
If bitmap_type is bitmap, then the md device will also contain:
bitmap/xxx
bitmap/xxx
>
>> -static void mddev_set_bitmap_ops(struct mddev *mddev, enum md_submodule_id id)
>> +static bool mddev_set_bitmap_ops(struct mddev *mddev)
>> {
>> xa_lock(&md_submodule);
>> - mddev->bitmap_ops = xa_load(&md_submodule, id);
>> + mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
>> xa_unlock(&md_submodule);
>> - if (!mddev->bitmap_ops)
>> - pr_warn_once("md: can't find bitmap id %d\n", id);
>> +
>> + if (!mddev->bitmap_ops) {
>> + pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
>> + return false;
>> + }
>> +
>> + return true;
>
> This also looks unrelated and like another prep patch?
The new api will set mddev->bitmap_id, and the above change switch to
use mddev->bitmap_id to register bitmap_ops, perhaps I can factor the
change to a new prep patch, like:
md: add a new field mddev->bitmap_id
Before:
mddev_set_bitmap_ops(mddev, ID_BITMAP);
After:
mddev->bitmap_id = ID_BITMAP;
if (!mddev_set_bitmap_ops(mddev))
return -EINVAL;
Thanks,
Kuai
>
> .
>
On Mon, May 26, 2025 at 03:45:39PM +0800, Yu Kuai wrote: > Hi, > > 在 2025/05/26 14:32, Christoph Hellwig 写道: >> On Sat, May 24, 2025 at 02:13:03PM +0800, Yu Kuai wrote: >>> + consistency_policy >> >> .. these doc changes look unrelated, or am I missing something? > > The position are moved to the front of the bitmap fields, because now > bitmap/xxx is not always here. Ah, ok.
On Sat, May 24, 2025 at 2:18 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> The api will be used by mdadm to set bitmap_ops while creating new array
Hi Kuai
Maybe you want to say "set bitmap type" here? And can you explain more
here, why does it need this sys file while creating a new array? The
reason I ask is that it doesn't use a sys file when creating an array
with bitmap.
And if it really needs this, can this be gotten by superblock?
Best Regards
Xiao
> or assemble array, prepare to add a new bitmap.
>
> Currently available options are:
>
> cat /sys/block/md0/md/bitmap_type
> none [bitmap]
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> Documentation/admin-guide/md.rst | 73 ++++++++++++++----------
> drivers/md/md.c | 96 ++++++++++++++++++++++++++++++--
> drivers/md/md.h | 2 +
> 3 files changed, 135 insertions(+), 36 deletions(-)
>
> diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
> index 4ff2cc291d18..356d2a344f08 100644
> --- a/Documentation/admin-guide/md.rst
> +++ b/Documentation/admin-guide/md.rst
> @@ -347,6 +347,49 @@ All md devices contain:
> active-idle
> like active, but no writes have been seen for a while (safe_mode_delay).
>
> + consistency_policy
> + This indicates how the array maintains consistency in case of unexpected
> + shutdown. It can be:
> +
> + none
> + Array has no redundancy information, e.g. raid0, linear.
> +
> + resync
> + Full resync is performed and all redundancy is regenerated when the
> + array is started after unclean shutdown.
> +
> + bitmap
> + Resync assisted by a write-intent bitmap.
> +
> + journal
> + For raid4/5/6, journal device is used to log transactions and replay
> + after unclean shutdown.
> +
> + ppl
> + For raid5 only, Partial Parity Log is used to close the write hole and
> + eliminate resync.
> +
> + The accepted values when writing to this file are ``ppl`` and ``resync``,
> + used to enable and disable PPL.
> +
> + uuid
> + This indicates the UUID of the array in the following format:
> + xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
> +
> + bitmap_type
> + [RW] When read, this file will display the current and available
> + bitmap for this array. The currently active bitmap will be enclosed
> + in [] brackets. Writing an bitmap name or ID to this file will switch
> + control of this array to that new bitmap. Note that writing a new
> + bitmap for created array is forbidden.
> +
> + none
> + No bitmap
> + bitmap
> + The default internal bitmap
> +
> +If bitmap_type is bitmap, then the md device will also contain:
> +
> bitmap/location
> This indicates where the write-intent bitmap for the array is
> stored.
> @@ -401,36 +444,6 @@ All md devices contain:
> once the array becomes non-degraded, and this fact has been
> recorded in the metadata.
>
> - consistency_policy
> - This indicates how the array maintains consistency in case of unexpected
> - shutdown. It can be:
> -
> - none
> - Array has no redundancy information, e.g. raid0, linear.
> -
> - resync
> - Full resync is performed and all redundancy is regenerated when the
> - array is started after unclean shutdown.
> -
> - bitmap
> - Resync assisted by a write-intent bitmap.
> -
> - journal
> - For raid4/5/6, journal device is used to log transactions and replay
> - after unclean shutdown.
> -
> - ppl
> - For raid5 only, Partial Parity Log is used to close the write hole and
> - eliminate resync.
> -
> - The accepted values when writing to this file are ``ppl`` and ``resync``,
> - used to enable and disable PPL.
> -
> - uuid
> - This indicates the UUID of the array in the following format:
> - xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
> -
> -
> As component devices are added to an md array, they appear in the ``md``
> directory as new directories named::
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 311e52d5173d..4eb0c6effd5b 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -672,13 +672,18 @@ static void active_io_release(struct percpu_ref *ref)
>
> static void no_op(struct percpu_ref *r) {}
>
> -static void mddev_set_bitmap_ops(struct mddev *mddev, enum md_submodule_id id)
> +static bool mddev_set_bitmap_ops(struct mddev *mddev)
> {
> xa_lock(&md_submodule);
> - mddev->bitmap_ops = xa_load(&md_submodule, id);
> + mddev->bitmap_ops = xa_load(&md_submodule, mddev->bitmap_id);
> xa_unlock(&md_submodule);
> - if (!mddev->bitmap_ops)
> - pr_warn_once("md: can't find bitmap id %d\n", id);
> +
> + if (!mddev->bitmap_ops) {
> + pr_warn_once("md: can't find bitmap id %d\n", mddev->bitmap_id);
> + return false;
> + }
> +
> + return true;
> }
>
> static void mddev_clear_bitmap_ops(struct mddev *mddev)
> @@ -688,8 +693,10 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
>
> int mddev_init(struct mddev *mddev)
> {
> - /* TODO: support more versions */
> - mddev_set_bitmap_ops(mddev, ID_BITMAP);
> + mddev->bitmap_id = ID_BITMAP;
> +
> + if (!mddev_set_bitmap_ops(mddev))
> + return -EINVAL;
>
> if (percpu_ref_init(&mddev->active_io, active_io_release,
> PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> @@ -4155,6 +4162,82 @@ new_level_store(struct mddev *mddev, const char *buf, size_t len)
> static struct md_sysfs_entry md_new_level =
> __ATTR(new_level, 0664, new_level_show, new_level_store);
>
> +static ssize_t
> +bitmap_type_show(struct mddev *mddev, char *page)
> +{
> + struct md_submodule_head *head;
> + unsigned long i;
> + ssize_t len = 0;
> +
> + if (mddev->bitmap_id == ID_BITMAP_NONE)
> + len += sprintf(page + len, "[none] ");
> + else
> + len += sprintf(page + len, "none ");
> +
> + xa_lock(&md_submodule);
> + xa_for_each(&md_submodule, i, head) {
> + if (head->type != MD_BITMAP)
> + continue;
> +
> + if (mddev->bitmap_id == head->id)
> + len += sprintf(page + len, "[%s] ", head->name);
> + else
> + len += sprintf(page + len, "%s ", head->name);
> + }
> + xa_unlock(&md_submodule);
> +
> + len += sprintf(page + len, "\n");
> + return len;
> +}
> +
> +static ssize_t
> +bitmap_type_store(struct mddev *mddev, const char *buf, size_t len)
> +{
> + struct md_submodule_head *head;
> + enum md_submodule_id id;
> + unsigned long i;
> + int err;
> +
> + if (mddev->bitmap_ops)
> + return -EBUSY;
> +
> + err = kstrtoint(buf, 10, &id);
> + if (!err) {
> + if (id == ID_BITMAP_NONE) {
> + mddev->bitmap_id = id;
> + return len;
> + }
> +
> + xa_lock(&md_submodule);
> + head = xa_load(&md_submodule, id);
> + xa_unlock(&md_submodule);
> +
> + if (head && head->type == MD_BITMAP) {
> + mddev->bitmap_id = id;
> + return len;
> + }
> + }
> +
> + if (cmd_match(buf, "none")) {
> + mddev->bitmap_id = ID_BITMAP_NONE;
> + return len;
> + }
> +
> + xa_lock(&md_submodule);
> + xa_for_each(&md_submodule, i, head) {
> + if (head->type == MD_BITMAP && cmd_match(buf, head->name)) {
> + mddev->bitmap_id = head->id;
> + xa_unlock(&md_submodule);
> + return len;
> + }
> + }
> + xa_unlock(&md_submodule);
> + return -ENOENT;
> +}
> +
> +static struct md_sysfs_entry md_bitmap_type =
> +__ATTR(bitmap_type, 0664, bitmap_type_show, bitmap_type_store);
> +
> static ssize_t
> layout_show(struct mddev *mddev, char *page)
> {
> @@ -5719,6 +5802,7 @@ __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
> static struct attribute *md_default_attrs[] = {
> &md_level.attr,
> &md_new_level.attr,
> + &md_bitmap_type.attr,
> &md_layout.attr,
> &md_raid_disks.attr,
> &md_uuid.attr,
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 13e3f9ce1b79..bf34c0a36551 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -40,6 +40,7 @@ enum md_submodule_id {
> ID_CLUSTER,
> ID_BITMAP,
> ID_LLBITMAP, /* TODO */
> + ID_BITMAP_NONE,
> };
>
> struct md_submodule_head {
> @@ -565,6 +566,7 @@ struct mddev {
> struct percpu_ref writes_pending;
> int sync_checkers; /* # of threads checking writes_pending */
>
> + enum md_submodule_id bitmap_id;
> void *bitmap; /* the bitmap for the device */
> struct bitmap_operations *bitmap_ops;
> struct {
> --
> 2.39.2
>
Hi, 在 2025/05/26 0:32, Xiao Ni 写道: >> The api will be used by mdadm to set bitmap_ops while creating new array > Hi Kuai > > Maybe you want to say "set bitmap type" here? And can you explain more > here, why does it need this sys file while creating a new array? The > reason I ask is that it doesn't use a sys file when creating an array > with bitmap. I do mean mddev->bitmap_ops here, this is the same as mddev->pers and the md/level api. The mdadm patch will write the new helper before running array. > > And if it really needs this, can this be gotten by superblock? Theoretically, I can, however, the bitmap superblock is read by bitmap_ops->create method, and we need to set the bitmap_ops first. And changing the framwork will be much complex. Thanks, Kuai
On Mon, May 26, 2025 at 9:14 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > Hi, > > 在 2025/05/26 0:32, Xiao Ni 写道: > >> The api will be used by mdadm to set bitmap_ops while creating new array > > Hi Kuai > > > > Maybe you want to say "set bitmap type" here? And can you explain more > > here, why does it need this sys file while creating a new array? The > > reason I ask is that it doesn't use a sys file when creating an array > > with bitmap. > > I do mean mddev->bitmap_ops here, this is the same as mddev->pers and > the md/level api. The mdadm patch will write the new helper before > running array. + if (s->btype == BitmapLockless && + sysfs_set_str(&info, NULL, "bitmap_type", "llbitmap") < 0) + goto abort_locked; The three lines of code are in the Create function. From an intuitive perspective, it's used to set bitmap type to llbitmap rather than bitmap ops. And in this patch, it adds the bitmap_type sysfs api to set mddev->bitmap_id. After adding some debug logs, I understand you. It's better to describe here more. Because the sysfs file api is used to set bitmap type. Then it can be used to choose the bitmap ops when creating array in md_create_bitmap > > > > And if it really needs this, can this be gotten by superblock? > > Theoretically, I can, however, the bitmap superblock is read by > bitmap_ops->create method, and we need to set the bitmap_ops > first. And changing the framwork will be much complex. After adding some debug logs, I understand you. Now the default bitmap is "bitmap", so it can set bitmap ops in md_run->md_bitmap_create. If it wants to use llbitmap, it needs to set bitmap type first. Then it can set bitmap ops in md_run->md_bitmap_create. And it's better to explain why it's a better choice to use bitmap_type sys rather than reading from superblock. So in future, developers can understand the design easily. Regards Xiao > > Thanks, > Kuai > >
Hi, 在 2025/05/26 13:11, Xiao Ni 写道: > On Mon, May 26, 2025 at 9:14 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: >> >> Hi, >> >> 在 2025/05/26 0:32, Xiao Ni 写道: >>>> The api will be used by mdadm to set bitmap_ops while creating new array >>> Hi Kuai >>> >>> Maybe you want to say "set bitmap type" here? And can you explain more >>> here, why does it need this sys file while creating a new array? The >>> reason I ask is that it doesn't use a sys file when creating an array >>> with bitmap. >> >> I do mean mddev->bitmap_ops here, this is the same as mddev->pers and >> the md/level api. The mdadm patch will write the new helper before >> running array. > > + if (s->btype == BitmapLockless && > + sysfs_set_str(&info, NULL, "bitmap_type", "llbitmap") < 0) > + goto abort_locked; > > The three lines of code are in the Create function. From an intuitive > perspective, it's used to set bitmap type to llbitmap rather than > bitmap ops. And in this patch, it adds the bitmap_type sysfs api to > set mddev->bitmap_id. After adding some debug logs, I understand you. > It's better to describe here more. Because the sysfs file api is used > to set bitmap type. Then it can be used to choose the bitmap ops when > creating array in md_create_bitmap > Yes, sorry about the misleading, we're setting mddev->bitmap_id by sysfs, and mddev->bitmap_ops by mddev->bitmap_id later in kernel(the next patch). Thanks, Kuai
© 2016 - 2025 Red Hat, Inc.