From: Yu Kuai <yukuai3@huawei.com>
Io will only be accounted as done from raid5_align_endio() if the io
succeed, and io inflight counter will be leaked if such io failed.
Fix this problem by switching to use md_account_bio() for io accounting.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/raid5.c | 29 ++++++++---------------------
1 file changed, 8 insertions(+), 21 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cef0b400b2ee..4cdb35e54251 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
*/
static void raid5_align_endio(struct bio *bi)
{
- struct md_io_clone *md_io_clone = bi->bi_private;
- struct bio *raid_bi = md_io_clone->orig_bio;
- struct mddev *mddev;
- struct r5conf *conf;
- struct md_rdev *rdev;
+ struct bio *raid_bi = bi->bi_private;
+ struct md_rdev *rdev = (void *)raid_bi->bi_next;
+ struct mddev *mddev = rdev->mddev;
+ struct r5conf *conf = mddev->private;
blk_status_t error = bi->bi_status;
- unsigned long start_time = md_io_clone->start_time;
bio_put(bi);
-
- rdev = (void*)raid_bi->bi_next;
raid_bi->bi_next = NULL;
- mddev = rdev->mddev;
- conf = mddev->private;
-
rdev_dec_pending(rdev, conf->mddev);
if (!error) {
- if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
- bio_end_io_acct(raid_bi, start_time);
bio_endio(raid_bi);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_quiescent);
@@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
struct md_rdev *rdev;
sector_t sector, end_sector, first_bad;
int bad_sectors, dd_idx;
- struct md_io_clone *md_io_clone;
bool did_inc;
if (!in_chunk_boundary(mddev, raid_bio)) {
@@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
return 0;
}
- align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
- &mddev->io_clone_set);
- md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
+ md_account_bio(mddev, &raid_bio);
raid_bio->bi_next = (void *)rdev;
- if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
- md_io_clone->start_time = bio_start_io_acct(raid_bio);
- md_io_clone->orig_bio = raid_bio;
+ align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
+ &mddev->bio_set);
align_bio->bi_end_io = raid5_align_endio;
- align_bio->bi_private = md_io_clone;
+ align_bio->bi_private = raid_bio;
align_bio->bi_iter.bi_sector = sector;
/* No reshape active, so we can trust rdev->data_offset */
--
2.39.2
Dear Yu,
Thank you for your patch.
Am 19.06.23 um 22:48 schrieb Yu Kuai:
> From: Yu Kuai <yukuai3@huawei.com>
>
> Io will only be accounted as done from raid5_align_endio() if the io
> succeed, and io inflight counter will be leaked if such io failed.
succeed*s* or succeed*ed*?
> Fix this problem by switching to use md_account_bio() for io accounting.
How can this be tested?
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid5.c | 29 ++++++++---------------------
> 1 file changed, 8 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index cef0b400b2ee..4cdb35e54251 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
> */
> static void raid5_align_endio(struct bio *bi)
> {
> - struct md_io_clone *md_io_clone = bi->bi_private;
> - struct bio *raid_bi = md_io_clone->orig_bio;
> - struct mddev *mddev;
> - struct r5conf *conf;
> - struct md_rdev *rdev;
> + struct bio *raid_bi = bi->bi_private;
> + struct md_rdev *rdev = (void *)raid_bi->bi_next;
> + struct mddev *mddev = rdev->mddev;
> + struct r5conf *conf = mddev->private;
> blk_status_t error = bi->bi_status;
> - unsigned long start_time = md_io_clone->start_time;
>
> bio_put(bi);
> -
> - rdev = (void*)raid_bi->bi_next;
> raid_bi->bi_next = NULL;
> - mddev = rdev->mddev;
> - conf = mddev->private;
> -
This looks like unnecessary refactoring. No idea what the preferred
style for the subsystem is though. If it is wanted, maybe make it a
separate commit?
> rdev_dec_pending(rdev, conf->mddev);
>
> if (!error) {
> - if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
> - bio_end_io_acct(raid_bi, start_time);
> bio_endio(raid_bi);
> if (atomic_dec_and_test(&conf->active_aligned_reads))
> wake_up(&conf->wait_for_quiescent);
> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> struct md_rdev *rdev;
> sector_t sector, end_sector, first_bad;
> int bad_sectors, dd_idx;
> - struct md_io_clone *md_io_clone;
> bool did_inc;
>
> if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> return 0;
> }
>
> - align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> - &mddev->io_clone_set);
> - md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
> + md_account_bio(mddev, &raid_bio);
> raid_bio->bi_next = (void *)rdev;
> - if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> - md_io_clone->start_time = bio_start_io_acct(raid_bio);
> - md_io_clone->orig_bio = raid_bio;
>
> + align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> + &mddev->bio_set);
> align_bio->bi_end_io = raid5_align_endio;
> - align_bio->bi_private = md_io_clone;
> + align_bio->bi_private = raid_bio;
> align_bio->bi_iter.bi_sector = sector;
>
> /* No reshape active, so we can trust rdev->data_offset */
Kind regards,
Paul
Hi,
在 2023/06/20 17:57, Paul Menzel 写道:
> Dear Yu,
>
>
> Thank you for your patch.
>
> Am 19.06.23 um 22:48 schrieb Yu Kuai:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Io will only be accounted as done from raid5_align_endio() if the io
>> succeed, and io inflight counter will be leaked if such io failed.
>
> succeed*s* or succeed*ed*?
I'll up date this.
>
>> Fix this problem by switching to use md_account_bio() for io accounting.
>
> How can this be tested?
>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>> drivers/md/raid5.c | 29 ++++++++---------------------
>> 1 file changed, 8 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
>> index cef0b400b2ee..4cdb35e54251 100644
>> --- a/drivers/md/raid5.c
>> +++ b/drivers/md/raid5.c
>> @@ -5468,26 +5468,17 @@ static struct bio
>> *remove_bio_from_retry(struct r5conf *conf,
>> */
>> static void raid5_align_endio(struct bio *bi)
>> {
>> - struct md_io_clone *md_io_clone = bi->bi_private;
>> - struct bio *raid_bi = md_io_clone->orig_bio;
>> - struct mddev *mddev;
>> - struct r5conf *conf;
>> - struct md_rdev *rdev;
>> + struct bio *raid_bi = bi->bi_private;
>> + struct md_rdev *rdev = (void *)raid_bi->bi_next;
>> + struct mddev *mddev = rdev->mddev;
>> + struct r5conf *conf = mddev->private;
>> blk_status_t error = bi->bi_status;
>> - unsigned long start_time = md_io_clone->start_time;
>> bio_put(bi);
>> -
>> - rdev = (void*)raid_bi->bi_next;
>> raid_bi->bi_next = NULL;
>> - mddev = rdev->mddev;
>> - conf = mddev->private;
>> -
>
> This looks like unnecessary refactoring. No idea what the preferred
> style for the subsystem is though. If it is wanted, maybe make it a
> separate commit?
You mean that I initialize 'rdev' and 'mdev' while declaration?
I think code is cleaner this way, and this is too tiny to make a patch
for this... I will keep this for now. 😉
Thanks,
Kuai
>
>> rdev_dec_pending(rdev, conf->mddev);
>> if (!error) {
>> - if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
>> - bio_end_io_acct(raid_bi, start_time);
>> bio_endio(raid_bi);
>> if (atomic_dec_and_test(&conf->active_aligned_reads))
>> wake_up(&conf->wait_for_quiescent);
>> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev
>> *mddev, struct bio *raid_bio)
>> struct md_rdev *rdev;
>> sector_t sector, end_sector, first_bad;
>> int bad_sectors, dd_idx;
>> - struct md_io_clone *md_io_clone;
>> bool did_inc;
>> if (!in_chunk_boundary(mddev, raid_bio)) {
>> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev
>> *mddev, struct bio *raid_bio)
>> return 0;
>> }
>> - align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
>> - &mddev->io_clone_set);
>> - md_io_clone = container_of(align_bio, struct md_io_clone,
>> bio_clone);
>> + md_account_bio(mddev, &raid_bio);
>> raid_bio->bi_next = (void *)rdev;
>> - if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
>> - md_io_clone->start_time = bio_start_io_acct(raid_bio);
>> - md_io_clone->orig_bio = raid_bio;
>> + align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
>> + &mddev->bio_set);
>> align_bio->bi_end_io = raid5_align_endio;
>> - align_bio->bi_private = md_io_clone;
>> + align_bio->bi_private = raid_bio;
>> align_bio->bi_iter.bi_sector = sector;
>> /* No reshape active, so we can trust rdev->data_offset */
>
>
> Kind regards,
>
> Paul
>
> .
>
On Mon, Jun 19, 2023 at 8:50 PM Yu Kuai <yukuai1@huaweicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@huawei.com>
>
> Io will only be accounted as done from raid5_align_endio() if the io
> succeed, and io inflight counter will be leaked if such io failed.
>
> Fix this problem by switching to use md_account_bio() for io accounting.
>
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
> drivers/md/raid5.c | 29 ++++++++---------------------
> 1 file changed, 8 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index cef0b400b2ee..4cdb35e54251 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,26 +5468,17 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
> */
> static void raid5_align_endio(struct bio *bi)
> {
> - struct md_io_clone *md_io_clone = bi->bi_private;
> - struct bio *raid_bi = md_io_clone->orig_bio;
> - struct mddev *mddev;
> - struct r5conf *conf;
> - struct md_rdev *rdev;
> + struct bio *raid_bi = bi->bi_private;
> + struct md_rdev *rdev = (void *)raid_bi->bi_next;
> + struct mddev *mddev = rdev->mddev;
> + struct r5conf *conf = mddev->private;
> blk_status_t error = bi->bi_status;
> - unsigned long start_time = md_io_clone->start_time;
>
> bio_put(bi);
> -
> - rdev = (void*)raid_bi->bi_next;
> raid_bi->bi_next = NULL;
> - mddev = rdev->mddev;
> - conf = mddev->private;
> -
> rdev_dec_pending(rdev, conf->mddev);
>
> if (!error) {
> - if (blk_queue_io_stat(raid_bi->bi_bdev->bd_disk->queue))
> - bio_end_io_acct(raid_bi, start_time);
> bio_endio(raid_bi);
> if (atomic_dec_and_test(&conf->active_aligned_reads))
> wake_up(&conf->wait_for_quiescent);
> @@ -5506,7 +5497,6 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> struct md_rdev *rdev;
> sector_t sector, end_sector, first_bad;
> int bad_sectors, dd_idx;
> - struct md_io_clone *md_io_clone;
> bool did_inc;
>
> if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5543,16 +5533,13 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
> return 0;
> }
>
> - align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> - &mddev->io_clone_set);
> - md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
> + md_account_bio(mddev, &raid_bio);
> raid_bio->bi_next = (void *)rdev;
> - if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> - md_io_clone->start_time = bio_start_io_acct(raid_bio);
> - md_io_clone->orig_bio = raid_bio;
>
> + align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> + &mddev->bio_set);
> align_bio->bi_end_io = raid5_align_endio;
> - align_bio->bi_private = md_io_clone;
> + align_bio->bi_private = raid_bio;
> align_bio->bi_iter.bi_sector = sector;
>
> /* No reshape active, so we can trust rdev->data_offset */
> --
> 2.39.2
>
Reviewed-by: Xiao Ni <xni@redhat.com>
© 2016 - 2026 Red Hat, Inc.