[PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock

Yu Kuai posted 5 patches 3 years, 6 months ago
[PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Yu Kuai 3 years, 6 months ago
From: Yu Kuai <yukuai3@huawei.com>

Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
 drivers/md/raid10.h |  2 +-
 2 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9a28abd19709..2daa7d57034c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
 
 #include "raid1-10.c"
 
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+	do { \
+		write_sequnlock_irq(&(conf)->resync_lock); \
+		cmd; \
+	} while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+		       cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+	wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
 /*
  * for resync bio, r10bio pointer can be retrieved from the per-bio
  * 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
 
 static void raise_barrier(struct r10conf *conf, int force)
 {
-	spin_lock_irq(&conf->resync_lock);
+	write_seqlock_irq(&conf->resync_lock);
 	BUG_ON(force && !conf->barrier);
 
 	/* Wait until no block IO is waiting (unless 'force') */
-	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-			    conf->resync_lock);
+	wait_event_barrier(conf, force || !conf->nr_waiting);
 
 	/* block any new IO from starting */
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
 
 	/* Now wait for all pending IO to complete */
-	wait_event_lock_irq(conf->wait_barrier,
-			    !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock);
+	wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+				 conf->barrier < RESYNC_DEPTH);
 
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void lower_barrier(struct r10conf *conf)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	conf->barrier--;
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+	write_seqlock_irqsave(&conf->resync_lock, flags);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
+	write_sequnlock_irqrestore(&conf->resync_lock, flags);
 	wake_up(&conf->wait_barrier);
 }
 
@@ -990,11 +1004,31 @@ static bool stop_waiting_barrier(struct r10conf *conf)
 	return false;
 }
 
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+	unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+	if (READ_ONCE(conf->barrier))
+		return false;
+
+	atomic_inc(&conf->nr_pending);
+	if (!read_seqretry(&conf->resync_lock, seq))
+		return true;
+
+	if (atomic_dec_and_test(&conf->nr_pending))
+		wake_up_barrier(conf);
+
+	return false;
+}
+
 static bool wait_barrier(struct r10conf *conf, bool nowait)
 {
 	bool ret = true;
 
-	spin_lock_irq(&conf->resync_lock);
+	if (wait_barrier_nolock(conf))
+		return true;
+
+	write_seqlock_irq(&conf->resync_lock);
 	if (conf->barrier) {
 		/* Return false when nowait flag is set */
 		if (nowait) {
@@ -1002,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
 		} else {
 			conf->nr_waiting++;
 			raid10_log(conf->mddev, "wait barrier");
-			wait_event_lock_irq(conf->wait_barrier,
-					    stop_waiting_barrier(conf),
-					    conf->resync_lock);
+			wait_event_barrier(conf, stop_waiting_barrier(conf));
 			conf->nr_waiting--;
 		}
 		if (!conf->nr_waiting)
@@ -1013,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
 	/* Only increment nr_pending when we wait */
 	if (ret)
 		atomic_inc(&conf->nr_pending);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 	return ret;
 }
 
@@ -1038,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
 	 * must match the number of pending IOs (nr_pending) before
 	 * we continue.
 	 */
-	spin_lock_irq(&conf->resync_lock);
+	write_seqlock_irq(&conf->resync_lock);
 	conf->array_freeze_pending++;
-	conf->barrier++;
+	WRITE_ONCE(conf->barrier, conf->barrier + 1);
 	conf->nr_waiting++;
-	wait_event_lock_irq_cmd(conf->wait_barrier,
-				atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
-				conf->resync_lock,
-				flush_pending_writes(conf));
-
+	wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+			conf->nr_queued + extra, flush_pending_writes(conf));
 	conf->array_freeze_pending--;
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void unfreeze_array(struct r10conf *conf)
 {
 	/* reverse the effect of the freeze */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
+	write_seqlock_irq(&conf->resync_lock);
+	WRITE_ONCE(conf->barrier, conf->barrier - 1);
 	conf->nr_waiting--;
 	wake_up(&conf->wait_barrier);
-	spin_unlock_irq(&conf->resync_lock);
+	write_sequnlock_irq(&conf->resync_lock);
 }
 
 static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4044,7 +4073,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
 	INIT_LIST_HEAD(&conf->retry_list);
 	INIT_LIST_HEAD(&conf->bio_end_io_list);
 
-	spin_lock_init(&conf->resync_lock);
+	seqlock_init(&conf->resync_lock);
 	init_waitqueue_head(&conf->wait_barrier);
 	atomic_set(&conf->nr_pending, 0);
 
@@ -4363,7 +4392,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
 				rdev->new_raid_disk = rdev->raid_disk * 2;
 				rdev->sectors = size;
 			}
-		conf->barrier = 1;
+		WRITE_ONCE(conf->barrier, 1);
 	}
 
 	return conf;
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 5c0804d8bb1f..8c072ce0bc54 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -76,7 +76,7 @@ struct r10conf {
 	/* queue pending writes and submit them on unplug */
 	struct bio_list		pending_bio_list;
 
-	spinlock_t		resync_lock;
+	seqlock_t		resync_lock;
 	atomic_t		nr_pending;
 	int			nr_waiting;
 	int			nr_queued;
-- 
2.31.1
Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Logan Gunthorpe 3 years, 6 months ago

On 2022-09-16 05:34, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
> 
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>

Reviewed-and-Tested-by: Logan Gunthorpe <logang@deltatee.com>

So far, I've run this series 3 times through the mdadm test suite and
haven't detected any issues.

Thanks,

Logan
Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Guoqing Jiang 3 years, 6 months ago

On 9/16/22 7:34 PM, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
> and io can't be dispatched until 'barrier' is dropped.
>
> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> seqlock so that holding lock can be avoided in fast path.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
>   drivers/md/raid10.h |  2 +-
>   2 files changed, 59 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 9a28abd19709..2daa7d57034c 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>   
>   #include "raid1-10.c"
>   
> +#define NULL_CMD
> +#define cmd_before(conf, cmd) \
> +	do { \
> +		write_sequnlock_irq(&(conf)->resync_lock); \
> +		cmd; \
> +	} while (0)
> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)

The two is not paired well given only cmd_before needs the 'cmd'.

> +
> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> +	wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
> +		       cmd_after(conf))
> +
> +#define wait_event_barrier(conf, cond) \
> +	wait_event_barrier_cmd(conf, cond, NULL_CMD)

What is the issue without define NULL_CMD?

Thanks,
Guoqing
Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Yu Kuai 3 years, 6 months ago
Hi,

在 2022/09/18 19:36, Guoqing Jiang 写道:
> 
> 
> On 9/16/22 7:34 PM, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently, wait_barrier() will hold 'resync_lock' to read 
>> 'conf->barrier',
>> and io can't be dispatched until 'barrier' is dropped.
>>
>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
>> seqlock so that holding lock can be avoided in fast path.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/raid10.c | 87 ++++++++++++++++++++++++++++++---------------
>>   drivers/md/raid10.h |  2 +-
>>   2 files changed, 59 insertions(+), 30 deletions(-)
>>
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index 9a28abd19709..2daa7d57034c 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>>   #include "raid1-10.c"
>> +#define NULL_CMD
>> +#define cmd_before(conf, cmd) \
>> +    do { \
>> +        write_sequnlock_irq(&(conf)->resync_lock); \
>> +        cmd; \
>> +    } while (0)
>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
> 
> The two is not paired well given only cmd_before needs the 'cmd'.

May be should I just remove cmd_after?
> 
>> +
>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
>> +    wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
>> +               cmd_after(conf))
>> +
>> +#define wait_event_barrier(conf, cond) \
>> +    wait_event_barrier_cmd(conf, cond, NULL_CMD)
> 
> What is the issue without define NULL_CMD?
> 

Checkpatch will complain this:

ERROR: space prohibited before that close parenthesis ')'
#38: FILE: drivers/md/raid10.c:94:
+       wait_event_barrier_cmd(conf, cond, )

total: 1 errors, 0 warnings, 169 lines checked

Thanks,
Kuai
> Thanks,
> Guoqing
> .
> 

Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Guoqing Jiang 3 years, 6 months ago

On 9/19/22 9:08 AM, Yu Kuai wrote:
> Hi,
>
> 在 2022/09/18 19:36, Guoqing Jiang 写道:
>>
>>
>> On 9/16/22 7:34 PM, Yu Kuai wrote:
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Currently, wait_barrier() will hold 'resync_lock' to read 
>>> 'conf->barrier',
>>> and io can't be dispatched until 'barrier' is dropped.
>>>
>>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
>>> seqlock so that holding lock can be avoided in fast path.
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>>   drivers/md/raid10.c | 87 
>>> ++++++++++++++++++++++++++++++---------------
>>>   drivers/md/raid10.h |  2 +-
>>>   2 files changed, 59 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>>> index 9a28abd19709..2daa7d57034c 100644
>>> --- a/drivers/md/raid10.c
>>> +++ b/drivers/md/raid10.c
>>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
>>>   #include "raid1-10.c"
>>> +#define NULL_CMD
>>> +#define cmd_before(conf, cmd) \
>>> +    do { \
>>> +        write_sequnlock_irq(&(conf)->resync_lock); \
>>> +        cmd; \
>>> +    } while (0)
>>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
>>
>> The two is not paired well given only cmd_before needs the 'cmd'.
>
> May be should I just remove cmd_after?

I'd remove it but just my personal flavor.

>>
>>> +
>>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
>>> +    wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, 
>>> cmd), \
>>> +               cmd_after(conf))
>>> +
>>> +#define wait_event_barrier(conf, cond) \
>>> +    wait_event_barrier_cmd(conf, cond, NULL_CMD)
>>
>> What is the issue without define NULL_CMD?
>>
>
> Checkpatch will complain this:
>
> ERROR: space prohibited before that close parenthesis ')'
> #38: FILE: drivers/md/raid10.c:94:
> +       wait_event_barrier_cmd(conf, cond, )
>
> total: 1 errors, 0 warnings, 169 lines checked

Hmm, freeze_array has a different usage for it, so two cmds before sleep
and one cmd after sleep, perhaps it is the best way for now.

Thanks,
Guoqing
Re: [PATCH v3 5/5] md/raid10: convert resync_lock to use seqlock
Posted by Song Liu 3 years, 6 months ago
On Mon, Sep 19, 2022 at 3:28 AM Guoqing Jiang <guoqing.jiang@linux.dev> wrote:
>
>
>
> On 9/19/22 9:08 AM, Yu Kuai wrote:
> > Hi,
> >
> > 在 2022/09/18 19:36, Guoqing Jiang 写道:
> >>
> >>
> >> On 9/16/22 7:34 PM, Yu Kuai wrote:
> >>> From: Yu Kuai <yukuai3@huawei.com>
> >>>
> >>> Currently, wait_barrier() will hold 'resync_lock' to read
> >>> 'conf->barrier',
> >>> and io can't be dispatched until 'barrier' is dropped.
> >>>
> >>> Since holding the 'barrier' is not common, convert 'resync_lock' to use
> >>> seqlock so that holding lock can be avoided in fast path.
> >>>
> >>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> >>> ---
> >>>   drivers/md/raid10.c | 87
> >>> ++++++++++++++++++++++++++++++---------------
> >>>   drivers/md/raid10.h |  2 +-
> >>>   2 files changed, 59 insertions(+), 30 deletions(-)
> >>>
> >>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> >>> index 9a28abd19709..2daa7d57034c 100644
> >>> --- a/drivers/md/raid10.c
> >>> +++ b/drivers/md/raid10.c
> >>> @@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
> >>>   #include "raid1-10.c"
> >>> +#define NULL_CMD
> >>> +#define cmd_before(conf, cmd) \
> >>> +    do { \
> >>> +        write_sequnlock_irq(&(conf)->resync_lock); \
> >>> +        cmd; \
> >>> +    } while (0)
> >>> +#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
> >>
> >> The two is not paired well given only cmd_before needs the 'cmd'.
> >
> > May be should I just remove cmd_after?
>
> I'd remove it but just my personal flavor.
>
> >>
> >>> +
> >>> +#define wait_event_barrier_cmd(conf, cond, cmd) \
> >>> +    wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf,
> >>> cmd), \
> >>> +               cmd_after(conf))
> >>> +
> >>> +#define wait_event_barrier(conf, cond) \
> >>> +    wait_event_barrier_cmd(conf, cond, NULL_CMD)
> >>
> >> What is the issue without define NULL_CMD?
> >>
> >
> > Checkpatch will complain this:
> >
> > ERROR: space prohibited before that close parenthesis ')'
> > #38: FILE: drivers/md/raid10.c:94:
> > +       wait_event_barrier_cmd(conf, cond, )
> >
> > total: 1 errors, 0 warnings, 169 lines checked
>
> Hmm, freeze_array has a different usage for it, so two cmds before sleep
> and one cmd after sleep, perhaps it is the best way for now.

Current version looks good to me. Please let me know if there is more
concern.

Applied the set to md-next. Thanks!

Song