From: Yu Kuai <yukuai3@huawei.com>
Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.
Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid10.c | 85 +++++++++++++++++++++++++++++----------------
drivers/md/raid10.h | 2 +-
2 files changed, 57 insertions(+), 30 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 377d4641bb54..6c2396fe75a0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
#include "raid1-10.c"
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+ do { \
+ write_sequnlock_irq(&(conf)->resync_lock); \
+ cmd; \
+ } while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+ wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+ cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+ wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
/*
* for resync bio, r10bio pointer can be retrieved from the per-bio
* 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
static void raise_barrier(struct r10conf *conf, int force)
{
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
BUG_ON(force && !conf->barrier);
/* Wait until no block IO is waiting (unless 'force') */
- wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
- conf->resync_lock);
+ wait_event_barrier(conf, force || !conf->nr_waiting);
/* block any new IO from starting */
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
/* Now wait for all pending IO to complete */
- wait_event_lock_irq(conf->wait_barrier,
- !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
- conf->resync_lock);
+ wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+ conf->barrier < RESYNC_DEPTH);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void lower_barrier(struct r10conf *conf)
{
unsigned long flags;
- spin_lock_irqsave(&conf->resync_lock, flags);
- conf->barrier--;
- spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+ write_seqlock_irqsave(&conf->resync_lock, flags);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
+ write_sequnlock_irqrestore(&conf->resync_lock, flags);
wake_up_barrier(conf);
}
@@ -992,11 +1006,29 @@ static bool stop_waiting_barrier(struct r10conf *conf)
return false;
}
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+ unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+ if (READ_ONCE(conf->barrier))
+ return false;
+
+ atomic_inc(&conf->nr_pending);
+ if (!read_seqretry(&conf->resync_lock, seq))
+ return true;
+
+ atomic_dec(&conf->nr_pending);
+ return false;
+}
+
static bool wait_barrier(struct r10conf *conf, bool nowait)
{
bool ret = true;
- spin_lock_irq(&conf->resync_lock);
+ if (wait_barrier_nolock(conf))
+ return true;
+
+ write_seqlock_irq(&conf->resync_lock);
if (conf->barrier) {
/* Return false when nowait flag is set */
if (nowait) {
@@ -1004,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
} else {
conf->nr_waiting++;
raid10_log(conf->mddev, "wait barrier");
- wait_event_lock_irq(conf->wait_barrier,
- stop_waiting_barrier(conf),
- conf->resync_lock);
+ wait_event_barrier(conf, stop_waiting_barrier(conf));
conf->nr_waiting--;
}
if (!conf->nr_waiting)
@@ -1015,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
/* Only increment nr_pending when we wait */
if (ret)
atomic_inc(&conf->nr_pending);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
return ret;
}
@@ -1040,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
- spin_lock_irq(&conf->resync_lock);
+ write_seqlock_irq(&conf->resync_lock);
conf->array_freeze_pending++;
- conf->barrier++;
+ WRITE_ONCE(conf->barrier, conf->barrier + 1);
conf->nr_waiting++;
- wait_event_lock_irq_cmd(conf->wait_barrier,
- atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
- conf->resync_lock,
- flush_pending_writes(conf));
-
+ wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+ conf->nr_queued + extra, flush_pending_writes(conf));
conf->array_freeze_pending--;
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static void unfreeze_array(struct r10conf *conf)
{
/* reverse the effect of the freeze */
- spin_lock_irq(&conf->resync_lock);
- conf->barrier--;
+ write_seqlock_irq(&conf->resync_lock);
+ WRITE_ONCE(conf->barrier, conf->barrier - 1);
conf->nr_waiting--;
wake_up_barrier(conf);
- spin_unlock_irq(&conf->resync_lock);
+ write_sequnlock_irq(&conf->resync_lock);
}
static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4046,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
INIT_LIST_HEAD(&conf->retry_list);
INIT_LIST_HEAD(&conf->bio_end_io_list);
- spin_lock_init(&conf->resync_lock);
+ seqlock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
atomic_set(&conf->nr_pending, 0);
@@ -4365,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
rdev->new_raid_disk = rdev->raid_disk * 2;
rdev->sectors = size;
}
- conf->barrier = 1;
+ WRITE_ONCE(conf->barrier, 1);
}
return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
/* queue pending writes and submit them on unplug */
struct bio_list pending_bio_list;
- spinlock_t resync_lock;
+ seqlock_t resync_lock;
atomic_t nr_pending;
int nr_waiting;
int nr_queued;
--
2.31.1
Hi,
在 2022/09/14 9:49, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
>
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid10.c | 85 +++++++++++++++++++++++++++++----------------
> drivers/md/raid10.h | 2 +-
> 2 files changed, 57 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 377d4641bb54..6c2396fe75a0 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>
> #include "raid1-10.c"
>
> +#define NULL_CMD
> +#define cmd_before(conf, cmd) \
> + do { \
> + write_sequnlock_irq(&(conf)->resync_lock); \
> + cmd; \
> + } while (0)
> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
> +
> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> + wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
> + cmd_after(conf))
> +
> +#define wait_event_barrier(conf, cond) \
> + wait_event_barrier_cmd(conf, cond, NULL_CMD)
> +
> /*
> * for resync bio, r10bio pointer can be retrieved from the per-bio
> * 'struct resync_pages'.
> @@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
>
> static void raise_barrier(struct r10conf *conf, int force)
> {
> - spin_lock_irq(&conf->resync_lock);
> + write_seqlock_irq(&conf->resync_lock);
> BUG_ON(force && !conf->barrier);
>
> /* Wait until no block IO is waiting (unless 'force') */
> - wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
> - conf->resync_lock);
> + wait_event_barrier(conf, force || !conf->nr_waiting);
>
> /* block any new IO from starting */
> - conf->barrier++;
> + WRITE_ONCE(conf->barrier, conf->barrier + 1);
>
> /* Now wait for all pending IO to complete */
> - wait_event_lock_irq(conf->wait_barrier,
> - !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
> - conf->resync_lock);
> + wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
> + conf->barrier < RESYNC_DEPTH);
>
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static void lower_barrier(struct r10conf *conf)
> {
> unsigned long flags;
> - spin_lock_irqsave(&conf->resync_lock, flags);
> - conf->barrier--;
> - spin_unlock_irqrestore(&conf->resync_lock, flags);
> +
> + write_seqlock_irqsave(&conf->resync_lock, flags);
> + WRITE_ONCE(conf->barrier, conf->barrier - 1);
> + write_sequnlock_irqrestore(&conf->resync_lock, flags);
> wake_up_barrier(conf);
> }
>
> @@ -992,11 +1006,29 @@ static bool stop_waiting_barrier(struct r10conf *conf)
> return false;
> }
>
> +static bool wait_barrier_nolock(struct r10conf *conf)
> +{
> + unsigned int seq = read_seqbegin(&conf->resync_lock);
> +
> + if (READ_ONCE(conf->barrier))
> + return false;
> +
> + atomic_inc(&conf->nr_pending);
> + if (!read_seqretry(&conf->resync_lock, seq))
> + return true;
> +
> + atomic_dec(&conf->nr_pending);
During pressure test, I found that this is problematic, raise_barrier()
can wait for nr_pending to be zero, and the increase and decrease here
will cause raise_barrier() hang if nr_pending is decreased to 0 here.
I'll send to new version to fix this.
Thanks,
Kuai
> + return false;
> +}
> +
> static bool wait_barrier(struct r10conf *conf, bool nowait)
> {
> bool ret = true;
>
> - spin_lock_irq(&conf->resync_lock);
> + if (wait_barrier_nolock(conf))
> + return true;
> +
> + write_seqlock_irq(&conf->resync_lock);
> if (conf->barrier) {
> /* Return false when nowait flag is set */
> if (nowait) {
> @@ -1004,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> } else {
> conf->nr_waiting++;
> raid10_log(conf->mddev, "wait barrier");
> - wait_event_lock_irq(conf->wait_barrier,
> - stop_waiting_barrier(conf),
> - conf->resync_lock);
> + wait_event_barrier(conf, stop_waiting_barrier(conf));
> conf->nr_waiting--;
> }
> if (!conf->nr_waiting)
> @@ -1015,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
> /* Only increment nr_pending when we wait */
> if (ret)
> atomic_inc(&conf->nr_pending);
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> return ret;
> }
>
> @@ -1040,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
> * must match the number of pending IOs (nr_pending) before
> * we continue.
> */
> - spin_lock_irq(&conf->resync_lock);
> + write_seqlock_irq(&conf->resync_lock);
> conf->array_freeze_pending++;
> - conf->barrier++;
> + WRITE_ONCE(conf->barrier, conf->barrier + 1);
> conf->nr_waiting++;
> - wait_event_lock_irq_cmd(conf->wait_barrier,
> - atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
> - conf->resync_lock,
> - flush_pending_writes(conf));
> -
> + wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
> + conf->nr_queued + extra, flush_pending_writes(conf));
> conf->array_freeze_pending--;
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static void unfreeze_array(struct r10conf *conf)
> {
> /* reverse the effect of the freeze */
> - spin_lock_irq(&conf->resync_lock);
> - conf->barrier--;
> + write_seqlock_irq(&conf->resync_lock);
> + WRITE_ONCE(conf->barrier, conf->barrier - 1);
> conf->nr_waiting--;
> wake_up_barrier(conf);
> - spin_unlock_irq(&conf->resync_lock);
> + write_sequnlock_irq(&conf->resync_lock);
> }
>
> static sector_t choose_data_offset(struct r10bio *r10_bio,
> @@ -4046,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
> INIT_LIST_HEAD(&conf->retry_list);
> INIT_LIST_HEAD(&conf->bio_end_io_list);
>
> - spin_lock_init(&conf->resync_lock);
> + seqlock_init(&conf->resync_lock);
> init_waitqueue_head(&conf->wait_barrier);
> atomic_set(&conf->nr_pending, 0);
>
> @@ -4365,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
> rdev->new_raid_disk = rdev->raid_disk * 2;
> rdev->sectors = size;
> }
> - conf->barrier = 1;
> + WRITE_ONCE(conf->barrier, 1);
> }
>
> return conf;
> diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
> index 5c0804d8bb1f..8c072ce0bc54 100644
> --- a/drivers/md/raid10.h
> +++ b/drivers/md/raid10.h
> @@ -76,7 +76,7 @@ struct r10conf {
> /* queue pending writes and submit them on unplug */
> struct bio_list pending_bio_list;
>
> - spinlock_t resync_lock;
> + seqlock_t resync_lock;
> atomic_t nr_pending;
> int nr_waiting;
> int nr_queued;
>
© 2016 - 2026 Red Hat, Inc.