md_error is mainly called when a bio fails, so it can run in parallel.
Each personality’s error_handler locks with device_lock, so concurrent
calls are safe.
However, RAID1 and RAID10 require changes for Failfast bio error handling,
which needs a special helper for md_error. For that helper to work, the
regular md_error must also be serialized.
The helper function md_bio_failure_error for failfast will be introduced
in a subsequent commit.
This commit serializes md_error for all RAID personalities. While
unnecessary for RAID levels other than 1 and 10, it has no performance
impact as it is a cold path.
Signed-off-by: Kenta Akagi <k@mgml.me>
---
drivers/md/md.c | 10 +++++++++-
drivers/md/md.h | 4 ++++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 268410b66b83..5607578a6db9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->sync_seq, 0);
spin_lock_init(&mddev->lock);
+ spin_lock_init(&mddev->error_handle_lock);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
}
EXPORT_SYMBOL(md_unregister_thread);
-void md_error(struct mddev *mddev, struct md_rdev *rdev)
+void _md_error(struct mddev *mddev, struct md_rdev *rdev)
{
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
@@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
}
+
+void md_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ spin_lock(&mddev->error_handle_lock);
+ _md_error(mddev, rdev);
+ spin_unlock(&mddev->error_handle_lock);
+}
EXPORT_SYMBOL(md_error);
/* seq_file implementation /proc/mdstat */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ec598f9a8381..5177cb609e4b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -619,6 +619,9 @@ struct mddev {
/* The sequence number for sync thread */
atomic_t sync_seq;
+ /* Lock for serializing md_error */
+ spinlock_t error_handle_lock;
+
bool has_superblocks:1;
bool fail_last_dev:1;
bool serialize_policy:1;
@@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
+void _md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
--
2.50.1
Hi,
在 2025/09/15 11:42, Kenta Akagi 写道:
> md_error is mainly called when a bio fails, so it can run in parallel.
> Each personality’s error_handler locks with device_lock, so concurrent
> calls are safe.
>
> However, RAID1 and RAID10 require changes for Failfast bio error handling,
> which needs a special helper for md_error. For that helper to work, the
> regular md_error must also be serialized.
>
> The helper function md_bio_failure_error for failfast will be introduced
> in a subsequent commit.
>
> This commit serializes md_error for all RAID personalities. While
> unnecessary for RAID levels other than 1 and 10, it has no performance
> impact as it is a cold path.
>
> Signed-off-by: Kenta Akagi <k@mgml.me>
> ---
> drivers/md/md.c | 10 +++++++++-
> drivers/md/md.h | 4 ++++
> 2 files changed, 13 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 268410b66b83..5607578a6db9 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev)
> atomic_set(&mddev->openers, 0);
> atomic_set(&mddev->sync_seq, 0);
> spin_lock_init(&mddev->lock);
> + spin_lock_init(&mddev->error_handle_lock);
Instead of introduing a new lock, can we use device_lock directly?
it's held inside pers->error_handler() now, just move it forward
to md_error().
Thanks,
Kuai
> init_waitqueue_head(&mddev->sb_wait);
> init_waitqueue_head(&mddev->recovery_wait);
> mddev->reshape_position = MaxSector;
> @@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
> }
> EXPORT_SYMBOL(md_unregister_thread);
>
> -void md_error(struct mddev *mddev, struct md_rdev *rdev)
> +void _md_error(struct mddev *mddev, struct md_rdev *rdev)
> {
> if (!rdev || test_bit(Faulty, &rdev->flags))
> return;
> @@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
> queue_work(md_misc_wq, &mddev->event_work);
> md_new_event();
> }
> +
> +void md_error(struct mddev *mddev, struct md_rdev *rdev)
> +{
> + spin_lock(&mddev->error_handle_lock);
> + _md_error(mddev, rdev);
> + spin_unlock(&mddev->error_handle_lock);
> +}
> EXPORT_SYMBOL(md_error);
>
> /* seq_file implementation /proc/mdstat */
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index ec598f9a8381..5177cb609e4b 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -619,6 +619,9 @@ struct mddev {
> /* The sequence number for sync thread */
> atomic_t sync_seq;
>
> + /* Lock for serializing md_error */
> + spinlock_t error_handle_lock;
> +
> bool has_superblocks:1;
> bool fail_last_dev:1;
> bool serialize_policy:1;
> @@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
> extern void md_write_inc(struct mddev *mddev, struct bio *bi);
> extern void md_write_end(struct mddev *mddev);
> extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
> +void _md_error(struct mddev *mddev, struct md_rdev *rdev);
> extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
> extern void md_finish_reshape(struct mddev *mddev);
> void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
>
On 2025/09/18 10:04, Yu Kuai wrote:
> Hi,
>
> 在 2025/09/15 11:42, Kenta Akagi 写道:
>> md_error is mainly called when a bio fails, so it can run in parallel.
>> Each personality’s error_handler locks with device_lock, so concurrent
>> calls are safe.
>>
>> However, RAID1 and RAID10 require changes for Failfast bio error handling,
>> which needs a special helper for md_error. For that helper to work, the
>> regular md_error must also be serialized.
>>
>> The helper function md_bio_failure_error for failfast will be introduced
>> in a subsequent commit.
>>
>> This commit serializes md_error for all RAID personalities. While
>> unnecessary for RAID levels other than 1 and 10, it has no performance
>> impact as it is a cold path.
>>
>> Signed-off-by: Kenta Akagi <k@mgml.me>
>> ---
>> drivers/md/md.c | 10 +++++++++-
>> drivers/md/md.h | 4 ++++
>> 2 files changed, 13 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 268410b66b83..5607578a6db9 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev)
>> atomic_set(&mddev->openers, 0);
>> atomic_set(&mddev->sync_seq, 0);
>> spin_lock_init(&mddev->lock);
>> + spin_lock_init(&mddev->error_handle_lock);
>
> Instead of introduing a new lock, can we use device_lock directly?
> it's held inside pers->error_handler() now, just move it forward
> to md_error().
It seems possible. In all personalities, both the caller and the callee
of md_error() appear to have no dependency on device_lock.
I will move device_lock to mddev and use it.
Thanks,
Akagi
>
> Thanks,
> Kuai
>
>> init_waitqueue_head(&mddev->sb_wait);
>> init_waitqueue_head(&mddev->recovery_wait);
>> mddev->reshape_position = MaxSector;
>> @@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
>> }
>> EXPORT_SYMBOL(md_unregister_thread);
>> -void md_error(struct mddev *mddev, struct md_rdev *rdev)
>> +void _md_error(struct mddev *mddev, struct md_rdev *rdev)
>> {
>> if (!rdev || test_bit(Faulty, &rdev->flags))
>> return;
>> @@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
>> queue_work(md_misc_wq, &mddev->event_work);
>> md_new_event();
>> }
>> +
>> +void md_error(struct mddev *mddev, struct md_rdev *rdev)
>> +{
>> + spin_lock(&mddev->error_handle_lock);
>> + _md_error(mddev, rdev);
>> + spin_unlock(&mddev->error_handle_lock);
>> +}
>> EXPORT_SYMBOL(md_error);
>> /* seq_file implementation /proc/mdstat */
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index ec598f9a8381..5177cb609e4b 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -619,6 +619,9 @@ struct mddev {
>> /* The sequence number for sync thread */
>> atomic_t sync_seq;
>> + /* Lock for serializing md_error */
>> + spinlock_t error_handle_lock;
>> +
>> bool has_superblocks:1;
>> bool fail_last_dev:1;
>> bool serialize_policy:1;
>> @@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
>> extern void md_write_inc(struct mddev *mddev, struct bio *bi);
>> extern void md_write_end(struct mddev *mddev);
>> extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
>> +void _md_error(struct mddev *mddev, struct md_rdev *rdev);
>> extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
>> extern void md_finish_reshape(struct mddev *mddev);
>> void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
>>
>
>
© 2016 - 2026 Red Hat, Inc.