md_error is mainly called when a bio fails, so it can run in parallel.
Each personality’s error_handler locks with device_lock, so concurrent
calls are safe.
However, RAID1 and RAID10 require changes for Failfast bio error handling,
which needs a special helper for md_error. For that helper to work, the
regular md_error must also be serialized.
The helper function md_bio_failure_error for failfast will be introduced
in a subsequent commit.
This commit serializes md_error for all RAID personalities. While
unnecessary for RAID levels other than 1 and 10, it has no performance
impact as it is a cold path.
Signed-off-by: Kenta Akagi <k@mgml.me>
---
drivers/md/md.c | 10 +++++++++-
drivers/md/md.h | 4 ++++
2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 268410b66b83..5607578a6db9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev)
atomic_set(&mddev->openers, 0);
atomic_set(&mddev->sync_seq, 0);
spin_lock_init(&mddev->lock);
+ spin_lock_init(&mddev->error_handle_lock);
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
@@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp)
}
EXPORT_SYMBOL(md_unregister_thread);
-void md_error(struct mddev *mddev, struct md_rdev *rdev)
+void _md_error(struct mddev *mddev, struct md_rdev *rdev)
{
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
@@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
}
+
+void md_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ spin_lock(&mddev->error_handle_lock);
+ _md_error(mddev, rdev);
+ spin_unlock(&mddev->error_handle_lock);
+}
EXPORT_SYMBOL(md_error);
/* seq_file implementation /proc/mdstat */
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ec598f9a8381..5177cb609e4b 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -619,6 +619,9 @@ struct mddev {
/* The sequence number for sync thread */
atomic_t sync_seq;
+ /* Lock for serializing md_error */
+ spinlock_t error_handle_lock;
+
bool has_superblocks:1;
bool fail_last_dev:1;
bool serialize_policy:1;
@@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
extern void md_write_end(struct mddev *mddev);
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
+void _md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_error(struct mddev *mddev, struct md_rdev *rdev);
extern void md_finish_reshape(struct mddev *mddev);
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
--
2.50.1
Hi, 在 2025/09/15 11:42, Kenta Akagi 写道: > md_error is mainly called when a bio fails, so it can run in parallel. > Each personality’s error_handler locks with device_lock, so concurrent > calls are safe. > > However, RAID1 and RAID10 require changes for Failfast bio error handling, > which needs a special helper for md_error. For that helper to work, the > regular md_error must also be serialized. > > The helper function md_bio_failure_error for failfast will be introduced > in a subsequent commit. > > This commit serializes md_error for all RAID personalities. While > unnecessary for RAID levels other than 1 and 10, it has no performance > impact as it is a cold path. > > Signed-off-by: Kenta Akagi <k@mgml.me> > --- > drivers/md/md.c | 10 +++++++++- > drivers/md/md.h | 4 ++++ > 2 files changed, 13 insertions(+), 1 deletion(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 268410b66b83..5607578a6db9 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev) > atomic_set(&mddev->openers, 0); > atomic_set(&mddev->sync_seq, 0); > spin_lock_init(&mddev->lock); > + spin_lock_init(&mddev->error_handle_lock); Instead of introduing a new lock, can we use device_lock directly? it's held inside pers->error_handler() now, just move it forward to md_error(). Thanks, Kuai > init_waitqueue_head(&mddev->sb_wait); > init_waitqueue_head(&mddev->recovery_wait); > mddev->reshape_position = MaxSector; > @@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp) > } > EXPORT_SYMBOL(md_unregister_thread); > > -void md_error(struct mddev *mddev, struct md_rdev *rdev) > +void _md_error(struct mddev *mddev, struct md_rdev *rdev) > { > if (!rdev || test_bit(Faulty, &rdev->flags)) > return; > @@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) > queue_work(md_misc_wq, &mddev->event_work); > md_new_event(); > } > + > +void md_error(struct mddev *mddev, struct md_rdev *rdev) > +{ > + spin_lock(&mddev->error_handle_lock); > + _md_error(mddev, rdev); > + spin_unlock(&mddev->error_handle_lock); > +} > EXPORT_SYMBOL(md_error); > > /* seq_file implementation /proc/mdstat */ > diff --git a/drivers/md/md.h b/drivers/md/md.h > index ec598f9a8381..5177cb609e4b 100644 > --- a/drivers/md/md.h > +++ b/drivers/md/md.h > @@ -619,6 +619,9 @@ struct mddev { > /* The sequence number for sync thread */ > atomic_t sync_seq; > > + /* Lock for serializing md_error */ > + spinlock_t error_handle_lock; > + > bool has_superblocks:1; > bool fail_last_dev:1; > bool serialize_policy:1; > @@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); > extern void md_write_inc(struct mddev *mddev, struct bio *bi); > extern void md_write_end(struct mddev *mddev); > extern void md_done_sync(struct mddev *mddev, int blocks, int ok); > +void _md_error(struct mddev *mddev, struct md_rdev *rdev); > extern void md_error(struct mddev *mddev, struct md_rdev *rdev); > extern void md_finish_reshape(struct mddev *mddev); > void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, >
On 2025/09/18 10:04, Yu Kuai wrote: > Hi, > > 在 2025/09/15 11:42, Kenta Akagi 写道: >> md_error is mainly called when a bio fails, so it can run in parallel. >> Each personality’s error_handler locks with device_lock, so concurrent >> calls are safe. >> >> However, RAID1 and RAID10 require changes for Failfast bio error handling, >> which needs a special helper for md_error. For that helper to work, the >> regular md_error must also be serialized. >> >> The helper function md_bio_failure_error for failfast will be introduced >> in a subsequent commit. >> >> This commit serializes md_error for all RAID personalities. While >> unnecessary for RAID levels other than 1 and 10, it has no performance >> impact as it is a cold path. >> >> Signed-off-by: Kenta Akagi <k@mgml.me> >> --- >> drivers/md/md.c | 10 +++++++++- >> drivers/md/md.h | 4 ++++ >> 2 files changed, 13 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/md/md.c b/drivers/md/md.c >> index 268410b66b83..5607578a6db9 100644 >> --- a/drivers/md/md.c >> +++ b/drivers/md/md.c >> @@ -705,6 +705,7 @@ int mddev_init(struct mddev *mddev) >> atomic_set(&mddev->openers, 0); >> atomic_set(&mddev->sync_seq, 0); >> spin_lock_init(&mddev->lock); >> + spin_lock_init(&mddev->error_handle_lock); > > Instead of introduing a new lock, can we use device_lock directly? > it's held inside pers->error_handler() now, just move it forward > to md_error(). It seems possible. In all personalities, both the caller and the callee of md_error() appear to have no dependency on device_lock. I will move device_lock to mddev and use it. Thanks, Akagi > > Thanks, > Kuai > >> init_waitqueue_head(&mddev->sb_wait); >> init_waitqueue_head(&mddev->recovery_wait); >> mddev->reshape_position = MaxSector; >> @@ -8262,7 +8263,7 @@ void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp) >> } >> EXPORT_SYMBOL(md_unregister_thread); >> -void md_error(struct mddev *mddev, struct md_rdev *rdev) >> +void _md_error(struct mddev *mddev, struct md_rdev *rdev) >> { >> if (!rdev || test_bit(Faulty, &rdev->flags)) >> return; >> @@ -8287,6 +8288,13 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev) >> queue_work(md_misc_wq, &mddev->event_work); >> md_new_event(); >> } >> + >> +void md_error(struct mddev *mddev, struct md_rdev *rdev) >> +{ >> + spin_lock(&mddev->error_handle_lock); >> + _md_error(mddev, rdev); >> + spin_unlock(&mddev->error_handle_lock); >> +} >> EXPORT_SYMBOL(md_error); >> /* seq_file implementation /proc/mdstat */ >> diff --git a/drivers/md/md.h b/drivers/md/md.h >> index ec598f9a8381..5177cb609e4b 100644 >> --- a/drivers/md/md.h >> +++ b/drivers/md/md.h >> @@ -619,6 +619,9 @@ struct mddev { >> /* The sequence number for sync thread */ >> atomic_t sync_seq; >> + /* Lock for serializing md_error */ >> + spinlock_t error_handle_lock; >> + >> bool has_superblocks:1; >> bool fail_last_dev:1; >> bool serialize_policy:1; >> @@ -901,6 +904,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); >> extern void md_write_inc(struct mddev *mddev, struct bio *bi); >> extern void md_write_end(struct mddev *mddev); >> extern void md_done_sync(struct mddev *mddev, int blocks, int ok); >> +void _md_error(struct mddev *mddev, struct md_rdev *rdev); >> extern void md_error(struct mddev *mddev, struct md_rdev *rdev); >> extern void md_finish_reshape(struct mddev *mddev); >> void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, >> > >
© 2016 - 2025 Red Hat, Inc.