[v1] md: fix and refactor io accounting and 'active_io'

[PATCH -next 3/8] raid5: fix missing io accounting in raid5_align_endio()

Posted by Yu Kuai 2 years, 7 months ago

From: Yu Kuai <yukuai3@huawei.com>

Io will only be accounted as done from raid5_align_endio() if the io
succeed, and io inflight counter will be leaked if such io failed.

Fix this problem by switching to use md_account_bio() for io accounting.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 drivers/md/raid5.c | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cef0b400b2ee..4cdb35e54251 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
  */
 static void raid5_align_endio(struct bio *bi)
 {
-	struct md_io_clone *md_io_clone = bi->bi_private;
-	struct bio *raid_bi = md_io_clone->orig_bio;
-	struct mddev *mddev;
-	struct r5conf *conf;
-	struct md_rdev *rdev;
+	struct bio *raid_bi = bi->bi_private;
+	struct md_rdev *rdev = (void *)raid_bi->bi_next;
+	struct mddev *mddev = rdev->mddev;
+	struct r5conf *conf = mddev->private;
 	blk_status_t error = bi->bi_status;
-	unsigned long start_time = md_io_clone->start_time;
 
 	bio_put(bi);
-
-	rdev = (void*)raid_bi->bi_next;
 	raid_bi->bi_next = NULL;
-	mddev = rdev->mddev;
-	conf = mddev->private;
-
 	rdev_dec_pending(rdev, conf->mddev);
 
 	if (!error) {
-		if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
-			bio_end_io_acct(raid_bi, start_time);
 		bio_endio(raid_bi);
 		if (atomic_dec_and_test(&conf->active_aligned_reads))
 			wake_up(&conf->wait_for_quiescent);
@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 	struct md_rdev *rdev;
 	sector_t sector, end_sector, first_bad;
 	int bad_sectors, dd_idx;
-	struct md_io_clone *md_io_clone;
 	bool did_inc;
 
 	if (!in_chunk_boundary(mddev, raid_bio)) {
@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
 		return 0;
 	}
 
-	align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
-				    &mddev->io_clone_set);
-	md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
+	md_account_bio(mddev, &raid_bio);
 	raid_bio->bi_next = (void *)rdev;
-	if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
-		md_io_clone->start_time = bio_start_io_acct(raid_bio);
-	md_io_clone->orig_bio = raid_bio;
 
+	align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
+				    &mddev->bio_set);
 	align_bio->bi_end_io = raid5_align_endio;
-	align_bio->bi_private = md_io_clone;
+	align_bio->bi_private = raid_bio;
 	align_bio->bi_iter.bi_sector = sector;
 
 	/* No reshape active, so we can trust rdev->data_offset */
-- 
2.39.2

Re: [PATCH -next 3/8] raid5: fix missing io accounting in raid5_align_endio()

Posted by Paul Menzel 2 years, 7 months ago

Dear Yu,


Thank you for your patch.

Am 19.06.23 um 22:48 schrieb Yu Kuai:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Io will only be accounted as done from raid5_align_endio() if the io
> succeed, and io inflight counter will be leaked if such io failed.

succeed*s* or succeed*ed*?

> Fix this problem by switching to use md_account_bio() for io accounting.

How can this be tested?

> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   drivers/md/raid5.c | 29 ++++++++---------------------
>   1 file changed, 8 insertions(+), 21 deletions(-)
> 
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index cef0b400b2ee..4cdb35e54251 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
>    */
>   static void raid5_align_endio(struct bio *bi)
>   {
> -	struct md_io_clone *md_io_clone = bi->bi_private;
> -	struct bio *raid_bi = md_io_clone->orig_bio;
> -	struct mddev *mddev;
> -	struct r5conf *conf;
> -	struct md_rdev *rdev;
> +	struct bio *raid_bi = bi->bi_private;
> +	struct md_rdev *rdev = (void *)raid_bi->bi_next;
> +	struct mddev *mddev = rdev->mddev;
> +	struct r5conf *conf = mddev->private;
>   	blk_status_t error = bi->bi_status;
> -	unsigned long start_time = md_io_clone->start_time;
>   
>   	bio_put(bi);
> -
> -	rdev = (void*)raid_bi->bi_next;
>   	raid_bi->bi_next = NULL;
> -	mddev = rdev->mddev;
> -	conf = mddev->private;
> -

This looks like unnecessary refactoring. No idea what the preferred 
style for the subsystem is though. If it is wanted, maybe make it a 
separate commit?

>   	rdev_dec_pending(rdev, conf->mddev);
>   
>   	if (!error) {
> -		if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
> -			bio_end_io_acct(raid_bi, start_time);
>   		bio_endio(raid_bi);
>   		if (atomic_dec_and_test(&conf->active_aligned_reads))
>   			wake_up(&conf->wait_for_quiescent);
> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>   	struct md_rdev *rdev;
>   	sector_t sector, end_sector, first_bad;
>   	int bad_sectors, dd_idx;
> -	struct md_io_clone *md_io_clone;
>   	bool did_inc;
>   
>   	if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>   		return 0;
>   	}
>   
> -	align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> -				    &mddev->io_clone_set);
> -	md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
> +	md_account_bio(mddev, &raid_bio);
>   	raid_bio->bi_next = (void *)rdev;
> -	if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> -		md_io_clone->start_time = bio_start_io_acct(raid_bio);
> -	md_io_clone->orig_bio = raid_bio;
>   
> +	align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> +				    &mddev->bio_set);
>   	align_bio->bi_end_io = raid5_align_endio;
> -	align_bio->bi_private = md_io_clone;
> +	align_bio->bi_private = raid_bio;
>   	align_bio->bi_iter.bi_sector = sector;
>   
>   	/* No reshape active, so we can trust rdev->data_offset */


Kind regards,

Paul

Re: [PATCH -next 3/8] raid5: fix missing io accounting in raid5_align_endio()

Posted by Yu Kuai 2 years, 7 months ago

Hi,

在 2023/06/20 17:57, Paul Menzel 写道:
> Dear Yu,
> 
> 
> Thank you for your patch.
> 
> Am 19.06.23 um 22:48 schrieb Yu Kuai:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Io will only be accounted as done from raid5_align_endio() if the io
>> succeed, and io inflight counter will be leaked if such io failed.
> 
> succeed*s* or succeed*ed*?

I'll up date this.

> 
>> Fix this problem by switching to use md_account_bio() for io accounting.
> 
> How can this be tested?
> 
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   drivers/md/raid5.c | 29 ++++++++---------------------
>>   1 file changed, 8 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index cef0b400b2ee..4cdb35e54251 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -5468,26 +5468,17 @@ static struct bio 
>> *remove_bio_from_retry(struct r5conf *conf,
>>    */
>>   static void raid5_align_endio(struct bio *bi)
>>   {
>> -    struct md_io_clone *md_io_clone = bi->bi_private;
>> -    struct bio *raid_bi = md_io_clone->orig_bio;
>> -    struct mddev *mddev;
>> -    struct r5conf *conf;
>> -    struct md_rdev *rdev;
>> +    struct bio *raid_bi = bi->bi_private;
>> +    struct md_rdev *rdev = (void *)raid_bi->bi_next;
>> +    struct mddev *mddev = rdev->mddev;
>> +    struct r5conf *conf = mddev->private;
>>       blk_status_t error = bi->bi_status;
>> -    unsigned long start_time = md_io_clone->start_time;
>>       bio_put(bi);
>> -
>> -    rdev = (void*)raid_bi->bi_next;
>>       raid_bi->bi_next = NULL;
>> -    mddev = rdev->mddev;
>> -    conf = mddev->private;
>> -
> 
> This looks like unnecessary refactoring. No idea what the preferred 
> style for the subsystem is though. If it is wanted, maybe make it a 
> separate commit?

You mean that I initialize 'rdev' and 'mdev' while declaration?
I think code is cleaner this way, and this is too tiny to make a patch
for this... I will keep this for now.  😉

Thanks,
Kuai

> 
>>       rdev_dec_pending(rdev, conf->mddev);
>>       if (!error) {
>> -        if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
>> -            bio_end_io_acct(raid_bi, start_time);
>>           bio_endio(raid_bi);
>>           if (atomic_dec_and_test(&conf->active_aligned_reads))
>>               wake_up(&conf->wait_for_quiescent);
>> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev 
>> *mddev, struct bio *raid_bio)
>>       struct md_rdev *rdev;
>>       sector_t sector, end_sector, first_bad;
>>       int bad_sectors, dd_idx;
>> -    struct md_io_clone *md_io_clone;
>>       bool did_inc;
>>       if (!in_chunk_boundary(mddev, raid_bio)) {
>> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev 
>> *mddev, struct bio *raid_bio)
>>           return 0;
>>       }
>> -    align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
>> -                    &mddev->io_clone_set);
>> -    md_io_clone = container_of(align_bio, struct md_io_clone, 
>> bio_clone);
>> +    md_account_bio(mddev, &raid_bio);
>>       raid_bio->bi_next = (void *)rdev;
>> -    if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
>> -        md_io_clone->start_time = bio_start_io_acct(raid_bio);
>> -    md_io_clone->orig_bio = raid_bio;
>> +    align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
>> +                    &mddev->bio_set);
>>       align_bio->bi_end_io = raid5_align_endio;
>> -    align_bio->bi_private = md_io_clone;
>> +    align_bio->bi_private = raid_bio;
>>       align_bio->bi_iter.bi_sector = sector;
>>       /* No reshape active, so we can trust rdev->data_offset */
> 
> 
> Kind regards,
> 
> Paul
> 
> .
>

Re: [PATCH -next 3/8] raid5: fix missing io accounting in raid5_align_endio()

Posted by Xiao Ni 2 years, 7 months ago

On Mon, Jun 19, 2023 at 8:50 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Io will only be accounted as done from raid5_align_endio() if the io
> succeed, and io inflight counter will be leaked if such io failed.
>
> Fix this problem by switching to use md_account_bio() for io accounting.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  drivers/md/raid5.c | 29 ++++++++---------------------
>  1 file changed, 8 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index cef0b400b2ee..4cdb35e54251 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
>   */
>  static void raid5_align_endio(struct bio *bi)
>  {
> -       struct md_io_clone *md_io_clone = bi->bi_private;
> -       struct bio *raid_bi = md_io_clone->orig_bio;
> -       struct mddev *mddev;
> -       struct r5conf *conf;
> -       struct md_rdev *rdev;
> +       struct bio *raid_bi = bi->bi_private;
> +       struct md_rdev *rdev = (void *)raid_bi->bi_next;
> +       struct mddev *mddev = rdev->mddev;
> +       struct r5conf *conf = mddev->private;
>         blk_status_t error = bi->bi_status;
> -       unsigned long start_time = md_io_clone->start_time;
>
>         bio_put(bi);
> -
> -       rdev = (void*)raid_bi->bi_next;
>         raid_bi->bi_next = NULL;
> -       mddev = rdev->mddev;
> -       conf = mddev->private;
> -
>         rdev_dec_pending(rdev, conf->mddev);
>
>         if (!error) {
> -               if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
> -                       bio_end_io_acct(raid_bi, start_time);
>                 bio_endio(raid_bi);
>                 if (atomic_dec_and_test(&conf->active_aligned_reads))
>                         wake_up(&conf->wait_for_quiescent);
> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>         struct md_rdev *rdev;
>         sector_t sector, end_sector, first_bad;
>         int bad_sectors, dd_idx;
> -       struct md_io_clone *md_io_clone;
>         bool did_inc;
>
>         if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>                 return 0;
>         }
>
> -       align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> -                                   &mddev->io_clone_set);
> -       md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
> +       md_account_bio(mddev, &raid_bio);
>         raid_bio->bi_next = (void *)rdev;
> -       if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> -               md_io_clone->start_time = bio_start_io_acct(raid_bio);
> -       md_io_clone->orig_bio = raid_bio;
>
> +       align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> +                                   &mddev->bio_set);
>         align_bio->bi_end_io = raid5_align_endio;
> -       align_bio->bi_private = md_io_clone;
> +       align_bio->bi_private = raid_bio;
>         align_bio->bi_iter.bi_sector = sector;
>
>         /* No reshape active, so we can trust rdev->data_offset */
> --
> 2.39.2
>

Reviewed-by: Xiao Ni <xni@redhat.com>

[PATCH -next 1/8] md: move initialization and destruction of 'io_acct_set' to md.c
[PATCH -next 2/8] md: also clone new io if io accounting is disabled
[PATCH -next 3/8] raid5: fix missing io accounting in raid5_align_endio()
[PATCH -next 4/8] md/raid1: switch to use md_account_bio() for io accounting
[PATCH -next 5/8] md/raid10: switch to use md_account_bio() for io accounting
[PATCH -next 6/8] md/md-multipath: enable io accounting
[PATCH -next 7/8] md/md-linear: enable io accounting
[PATCH -next 8/8] md/md-faulty: enable io accounting