Don't set MD_BROKEN on failfast bio failure

[PATCH v4 9/9] md/raid1,raid10: Fix: Operation continuing on 0 devices.

Posted by Kenta Akagi 2 weeks, 3 days ago

Since commit 9a567843f7ce ("md: allow last device to be forcibly
removed from RAID1/RAID10."), RAID1/10 arrays can now lose all rdevs.

Before that commit, losing the array last rdev or reaching the end of
the function without early return in raid{1,10}_error never occurred.
However, both situations can occur in the current implementation.

As a result, when mddev->fail_last_dev is set, a spurious pr_crit
message can be printed.

This patch prevents "Operation continuing" printed if the array
is not operational.

root@fedora:~# mdadm --create --verbose /dev/md0 --level=1 \
--raid-devices=2  /dev/loop0 /dev/loop1
mdadm: Note: this array has metadata at the start and
    may not be suitable as a boot device.  If you plan to
    store '/boot' on this device please ensure that
    your boot-loader understands md/v1.x metadata, or use
    --metadata=0.90
mdadm: size set to 1046528K
Continue creating array? y
mdadm: Defaulting to version 1.2 metadata
mdadm: array /dev/md0 started.
root@fedora:~# echo 1 > /sys/block/md0/md/fail_last_dev
root@fedora:~# mdadm --fail /dev/md0 loop0
mdadm: set loop0 faulty in /dev/md0
root@fedora:~# mdadm --fail /dev/md0 loop1
mdadm: set device faulty failed for loop1:  Device or resource busy
root@fedora:~# dmesg | tail -n 4
[ 1314.359674] md/raid1:md0: Disk failure on loop0, disabling device.
               md/raid1:md0: Operation continuing on 1 devices.
[ 1315.506633] md/raid1:md0: Disk failure on loop1, disabling device.
               md/raid1:md0: Operation continuing on 0 devices.
root@fedora:~#

Fixes: 9a567843f7ce ("md: allow last device to be forcibly removed from RAID1/RAID10.")
Signed-off-by: Kenta Akagi <k@mgml.me>
---
 drivers/md/raid1.c  | 9 +++++----
 drivers/md/raid10.c | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index febe2849a71a..b3c845855841 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1803,6 +1803,11 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 		update_lastdev(conf);
 	}
 	set_bit(Faulty, &rdev->flags);
+	if ((conf->raid_disks - mddev->degraded) > 0)
+		pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
+			"md/raid1:%s: Operation continuing on %d devices.\n",
+			mdname(mddev), rdev->bdev,
+			mdname(mddev), conf->raid_disks - mddev->degraded);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 	/*
 	 * if recovery is running, make sure it aborts.
@@ -1810,10 +1815,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
 	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 	set_mask_bits(&mddev->sb_flags, 0,
 		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
-	pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
-		"md/raid1:%s: Operation continuing on %d devices.\n",
-		mdname(mddev), rdev->bdev,
-		mdname(mddev), conf->raid_disks - mddev->degraded);
 }
 
 static void print_conf(struct r1conf *conf)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index be5fd77da3e1..4f3ef43ebd2a 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2059,11 +2059,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
 	set_bit(Faulty, &rdev->flags);
 	set_mask_bits(&mddev->sb_flags, 0,
 		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
+	if (enough(conf, -1))
+		pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
+			"md/raid10:%s: Operation continuing on %d devices.\n",
+			mdname(mddev), rdev->bdev,
+			mdname(mddev), conf->geo.raid_disks - mddev->degraded);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
-	pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
-		"md/raid10:%s: Operation continuing on %d devices.\n",
-		mdname(mddev), rdev->bdev,
-		mdname(mddev), conf->geo.raid_disks - mddev->degraded);
 }
 
 static void print_conf(struct r10conf *conf)
-- 
2.50.1

Re: [PATCH v4 9/9] md/raid1,raid10: Fix: Operation continuing on 0 devices.

Posted by Paul Menzel 2 weeks, 3 days ago

Dear Kenta,


Thank you for your patch, and well-written commit message and test. 
Reading just the summary, I didn’t know what it’s about. Should you 
resend, maybe:

 > md/raid1,raid10: Fix logging `Operation continuing on 0 devices.`

Am 15.09.25 um 05:42 schrieb Kenta Akagi:
> Since commit 9a567843f7ce ("md: allow last device to be forcibly
> removed from RAID1/RAID10."), RAID1/10 arrays can now lose all rdevs.
> 
> Before that commit, losing the array last rdev or reaching the end of
> the function without early return in raid{1,10}_error never occurred.
> However, both situations can occur in the current implementation.
> 
> As a result, when mddev->fail_last_dev is set, a spurious pr_crit
> message can be printed.
> 
> This patch prevents "Operation continuing" printed if the array
> is not operational.
> 
> root@fedora:~# mdadm --create --verbose /dev/md0 --level=1 \
> --raid-devices=2  /dev/loop0 /dev/loop1
> mdadm: Note: this array has metadata at the start and
>      may not be suitable as a boot device.  If you plan to
>      store '/boot' on this device please ensure that
>      your boot-loader understands md/v1.x metadata, or use
>      --metadata=0.90
> mdadm: size set to 1046528K
> Continue creating array? y
> mdadm: Defaulting to version 1.2 metadata
> mdadm: array /dev/md0 started.
> root@fedora:~# echo 1 > /sys/block/md0/md/fail_last_dev
> root@fedora:~# mdadm --fail /dev/md0 loop0
> mdadm: set loop0 faulty in /dev/md0
> root@fedora:~# mdadm --fail /dev/md0 loop1
> mdadm: set device faulty failed for loop1:  Device or resource busy
> root@fedora:~# dmesg | tail -n 4
> [ 1314.359674] md/raid1:md0: Disk failure on loop0, disabling device.
>                 md/raid1:md0: Operation continuing on 1 devices.
> [ 1315.506633] md/raid1:md0: Disk failure on loop1, disabling device.
>                 md/raid1:md0: Operation continuing on 0 devices.

Shouldn’t the first line of the critical log still be printed, but your 
patch would remove it?

> root@fedora:~#
> 
> Fixes: 9a567843f7ce ("md: allow last device to be forcibly removed from RAID1/RAID10.")
> Signed-off-by: Kenta Akagi <k@mgml.me>
> ---
>   drivers/md/raid1.c  | 9 +++++----
>   drivers/md/raid10.c | 9 +++++----
>   2 files changed, 10 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index febe2849a71a..b3c845855841 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -1803,6 +1803,11 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>   		update_lastdev(conf);
>   	}
>   	set_bit(Faulty, &rdev->flags);
> +	if ((conf->raid_disks - mddev->degraded) > 0)
> +		pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
> +			"md/raid1:%s: Operation continuing on %d devices.\n",
> +			mdname(mddev), rdev->bdev,
> +			mdname(mddev), conf->raid_disks - mddev->degraded);
>   	spin_unlock_irqrestore(&conf->device_lock, flags);
>   	/*
>   	 * if recovery is running, make sure it aborts.
> @@ -1810,10 +1815,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>   	set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>   	set_mask_bits(&mddev->sb_flags, 0,
>   		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
> -	pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
> -		"md/raid1:%s: Operation continuing on %d devices.\n",
> -		mdname(mddev), rdev->bdev,
> -		mdname(mddev), conf->raid_disks - mddev->degraded);
>   }
>   
>   static void print_conf(struct r1conf *conf)
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index be5fd77da3e1..4f3ef43ebd2a 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2059,11 +2059,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>   	set_bit(Faulty, &rdev->flags);
>   	set_mask_bits(&mddev->sb_flags, 0,
>   		      BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
> +	if (enough(conf, -1))
> +		pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
> +			"md/raid10:%s: Operation continuing on %d devices.\n",
> +			mdname(mddev), rdev->bdev,
> +			mdname(mddev), conf->geo.raid_disks - mddev->degraded);
>   	spin_unlock_irqrestore(&conf->device_lock, flags);
> -	pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
> -		"md/raid10:%s: Operation continuing on %d devices.\n",
> -		mdname(mddev), rdev->bdev,
> -		mdname(mddev), conf->geo.raid_disks - mddev->degraded);
>   }
>   
>   static void print_conf(struct r10conf *conf)


Kind regards,

Paul

Re: [PATCH v4 9/9] md/raid1,raid10: Fix: Operation continuing on 0 devices.

Posted by Kenta Akagi 2 weeks, 3 days ago

Hi,
Thank you for reviewing.

On 2025/09/15 16:19, Paul Menzel wrote:
> Dear Kenta,
> 
> 
> Thank you for your patch, and well-written commit message and test. Reading just the summary, I didn’t know what it’s about. Should you resend, maybe:
> 
>> md/raid1,raid10: Fix logging `Operation continuing on 0 devices.`

Understood.
I'll fix commit message and title as you advised.

> 
> Am 15.09.25 um 05:42 schrieb Kenta Akagi:
>> Since commit 9a567843f7ce ("md: allow last device to be forcibly
>> removed from RAID1/RAID10."), RAID1/10 arrays can now lose all rdevs.
>>
>> Before that commit, losing the array last rdev or reaching the end of
>> the function without early return in raid{1,10}_error never occurred.
>> However, both situations can occur in the current implementation.
>>
>> As a result, when mddev->fail_last_dev is set, a spurious pr_crit
>> message can be printed.
>>
>> This patch prevents "Operation continuing" printed if the array
>> is not operational.
>>
>> root@fedora:~# mdadm --create --verbose /dev/md0 --level=1 \
>> --raid-devices=2  /dev/loop0 /dev/loop1
>> mdadm: Note: this array has metadata at the start and
>>      may not be suitable as a boot device.  If you plan to
>>      store '/boot' on this device please ensure that
>>      your boot-loader understands md/v1.x metadata, or use
>>      --metadata=0.90
>> mdadm: size set to 1046528K
>> Continue creating array? y
>> mdadm: Defaulting to version 1.2 metadata
>> mdadm: array /dev/md0 started.
>> root@fedora:~# echo 1 > /sys/block/md0/md/fail_last_dev
>> root@fedora:~# mdadm --fail /dev/md0 loop0
>> mdadm: set loop0 faulty in /dev/md0
>> root@fedora:~# mdadm --fail /dev/md0 loop1
>> mdadm: set device faulty failed for loop1:  Device or resource busy
>> root@fedora:~# dmesg | tail -n 4
>> [ 1314.359674] md/raid1:md0: Disk failure on loop0, disabling device.
>>                 md/raid1:md0: Operation continuing on 1 devices.
>> [ 1315.506633] md/raid1:md0: Disk failure on loop1, disabling device.
>>                 md/raid1:md0: Operation continuing on 0 devices.
> 
> Shouldn’t the first line of the critical log still be printed, but your patch would remove it?
> 

"Operation continuing on 1 devices." will remain after this patch.
"Operation continuing on 0 devices." will be removed.

I thought that since Patch 8 in this series introduces the message
"Cannot continue operation", the message "Continuing operation on 0 devices"
would no longer be necessary.

However, the conditions under which the messages are output differ slightly.
"Cannot continue operation" is output when md_error is called on the last
rdev and MD_BROKEN is set.
"Operation continuing on 0 devices." is output when, after md_error is called
on the last rdev and MD_BROKEN is set, the rdev becomes Faulty because
fail_last_dev is set. In both cases, there is no difference in the fact that
the array cannot operate normally, but the state of the rdevs and whether it
can still be read are different.

Would it be better to only adjust the message itself?
e.g. "Operation cannot continue, there are 0 devices."
Or, since setting fail_last_dev is presumably done only by power users,
perhaps it is acceptable to leave this odd "continuing on 0 device" message as is.

Thanks,
Akagi

>> root@fedora:~#
>>
>> Fixes: 9a567843f7ce ("md: allow last device to be forcibly removed from RAID1/RAID10.")
>> Signed-off-by: Kenta Akagi <k@mgml.me>
>> ---
>>   drivers/md/raid1.c  | 9 +++++----
>>   drivers/md/raid10.c | 9 +++++----
>>   2 files changed, 10 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index febe2849a71a..b3c845855841 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -1803,6 +1803,11 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>>           update_lastdev(conf);
>>       }
>>       set_bit(Faulty, &rdev->flags);
>> +    if ((conf->raid_disks - mddev->degraded) > 0)
>> +        pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
>> +            "md/raid1:%s: Operation continuing on %d devices.\n",
>> +            mdname(mddev), rdev->bdev,
>> +            mdname(mddev), conf->raid_disks - mddev->degraded);
>>       spin_unlock_irqrestore(&conf->device_lock, flags);
>>       /*
>>        * if recovery is running, make sure it aborts.
>> @@ -1810,10 +1815,6 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>>       set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>>       set_mask_bits(&mddev->sb_flags, 0,
>>                 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
>> -    pr_crit("md/raid1:%s: Disk failure on %pg, disabling device.\n"
>> -        "md/raid1:%s: Operation continuing on %d devices.\n",
>> -        mdname(mddev), rdev->bdev,
>> -        mdname(mddev), conf->raid_disks - mddev->degraded);
>>   }
>>     static void print_conf(struct r1conf *conf)
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index be5fd77da3e1..4f3ef43ebd2a 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -2059,11 +2059,12 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>>       set_bit(Faulty, &rdev->flags);
>>       set_mask_bits(&mddev->sb_flags, 0,
>>                 BIT(MD_SB_CHANGE_DEVS) | BIT(MD_SB_CHANGE_PENDING));
>> +    if (enough(conf, -1))
>> +        pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
>> +            "md/raid10:%s: Operation continuing on %d devices.\n",
>> +            mdname(mddev), rdev->bdev,
>> +            mdname(mddev), conf->geo.raid_disks - mddev->degraded);
>>       spin_unlock_irqrestore(&conf->device_lock, flags);
>> -    pr_crit("md/raid10:%s: Disk failure on %pg, disabling device.\n"
>> -        "md/raid10:%s: Operation continuing on %d devices.\n",
>> -        mdname(mddev), rdev->bdev,
>> -        mdname(mddev), conf->geo.raid_disks - mddev->degraded);
>>   }
>>     static void print_conf(struct r10conf *conf)
> 
> 
> Kind regards,
> 
> Paul
>