block/blk-core.c | 16 ++++++---------- drivers/md/dm.c | 6 +++--- drivers/nvme/host/multipath.c | 8 ++++---- include/linux/blkdev.h | 5 ++--- 4 files changed, 15 insertions(+), 20 deletions(-)
From: Yu Kuai <yukuai3@huawei.com>
While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.
Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.
Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-core.c | 16 ++++++----------
drivers/md/dm.c | 6 +++---
drivers/nvme/host/multipath.c | 8 ++++----
include/linux/blkdev.h | 5 ++---
4 files changed, 15 insertions(+), 20 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 82b5b2c53f1e..fe1d320f5f07 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
}
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time)
{
- const int sgrp = op_stat_group(op);
-
part_stat_lock();
update_io_ticks(bdev, start_time, false);
- part_stat_inc(bdev, ios[sgrp]);
- part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
*/
unsigned long bio_start_io_acct(struct bio *bio)
{
- return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
- bio_op(bio), jiffies);
+ return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
}
EXPORT_SYMBOL_GPL(bio_start_io_acct);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time)
+ unsigned int sectors, unsigned long start_time)
{
const int sgrp = op_stat_group(op);
unsigned long now = READ_ONCE(jiffies);
@@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
part_stat_lock();
update_io_ticks(bdev, now, true);
+ part_stat_inc(bdev, ios[sgrp]);
+ part_stat_add(bdev, sectors[sgrp], sectors);
part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
part_stat_unlock();
@@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
struct block_device *orig_bdev)
{
- bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
}
EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d45..f5cc330bb549 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
sectors = io->sectors;
if (!end)
- bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
- start_time);
+ bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
else
- bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+ bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+ start_time);
if (static_branch_unlikely(&stats_enabled) &&
unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63..9171452e2f6d 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
return;
nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
- nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
- blk_rq_bytes(rq) >> SECTOR_SHIFT,
- req_op(rq), jiffies);
+ nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+ jiffies);
}
EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
return;
bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
- nvme_req(rq)->start_time);
+ blk_rq_bytes(rq) >> SECTOR_SHIFT,
+ nvme_req(rq)->start_time);
}
void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c18..941304f17492 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
wake_up_process(waiter);
}
-unsigned long bdev_start_io_acct(struct block_device *bdev,
- unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
unsigned long start_time);
void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
- unsigned long start_time);
+ unsigned int sectors, unsigned long start_time);
unsigned long bio_start_io_acct(struct bio *bio);
void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
--
2.31.1
On Thu, 23 Feb 2023 17:12:26 +0800, Yu Kuai wrote: > While using iostat for raid, I observed very strange 'await' > occasionally, and turns out it's due to that 'ios' and 'sectors' is > counted in bdev_start_io_acct(), while 'nsecs' is counted in > bdev_end_io_acct(). I'm not sure why they are ccounted like that > but I think this behaviour is obviously wrong because user will get > wrong disk stats. > > [...] Applied, thanks! [1/1] block: count 'ios' and 'sectors' when io is done for bio-based device commit: 5f27571382ca42daa3e3d40d1b252bf18c2b61d2 Best regards, -- Jens Axboe
Looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
Hi, friendly ping ... Thanks, Kuai 在 2023/02/23 17:12, Yu Kuai 写道: > From: Yu Kuai <yukuai3@huawei.com> > > While using iostat for raid, I observed very strange 'await' > occasionally, and turns out it's due to that 'ios' and 'sectors' is > counted in bdev_start_io_acct(), while 'nsecs' is counted in > bdev_end_io_acct(). I'm not sure why they are ccounted like that > but I think this behaviour is obviously wrong because user will get > wrong disk stats. > > Fix the problem by counting 'ios' and 'sectors' when io is done, like > what rq-based device does. > > Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function") > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > block/blk-core.c | 16 ++++++---------- > drivers/md/dm.c | 6 +++--- > drivers/nvme/host/multipath.c | 8 ++++---- > include/linux/blkdev.h | 5 ++--- > 4 files changed, 15 insertions(+), 20 deletions(-) > > diff --git a/block/blk-core.c b/block/blk-core.c > index 82b5b2c53f1e..fe1d320f5f07 100644 > --- a/block/blk-core.c > +++ b/block/blk-core.c > @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end) > } > } > > -unsigned long bdev_start_io_acct(struct block_device *bdev, > - unsigned int sectors, enum req_op op, > +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, > unsigned long start_time) > { > - const int sgrp = op_stat_group(op); > - > part_stat_lock(); > update_io_ticks(bdev, start_time, false); > - part_stat_inc(bdev, ios[sgrp]); > - part_stat_add(bdev, sectors[sgrp], sectors); > part_stat_local_inc(bdev, in_flight[op_is_write(op)]); > part_stat_unlock(); > > @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); > */ > unsigned long bio_start_io_acct(struct bio *bio) > { > - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), > - bio_op(bio), jiffies); > + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); > } > EXPORT_SYMBOL_GPL(bio_start_io_acct); > > void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > - unsigned long start_time) > + unsigned int sectors, unsigned long start_time) > { > const int sgrp = op_stat_group(op); > unsigned long now = READ_ONCE(jiffies); > @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > > part_stat_lock(); > update_io_ticks(bdev, now, true); > + part_stat_inc(bdev, ios[sgrp]); > + part_stat_add(bdev, sectors[sgrp], sectors); > part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); > part_stat_local_dec(bdev, in_flight[op_is_write(op)]); > part_stat_unlock(); > @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); > void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, > struct block_device *orig_bdev) > { > - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); > + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time); > } > EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c > index eace45a18d45..f5cc330bb549 100644 > --- a/drivers/md/dm.c > +++ b/drivers/md/dm.c > @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) > sectors = io->sectors; > > if (!end) > - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), > - start_time); > + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); > else > - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); > + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, > + start_time); > > if (static_branch_unlikely(&stats_enabled) && > unlikely(dm_stats_used(&md->stats))) { > diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c > index fc39d01e7b63..9171452e2f6d 100644 > --- a/drivers/nvme/host/multipath.c > +++ b/drivers/nvme/host/multipath.c > @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) > return; > > nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; > - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, > - blk_rq_bytes(rq) >> SECTOR_SHIFT, > - req_op(rq), jiffies); > + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq), > + jiffies); > } > EXPORT_SYMBOL_GPL(nvme_mpath_start_request); > > @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) > if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) > return; > bdev_end_io_acct(ns->head->disk->part0, req_op(rq), > - nvme_req(rq)->start_time); > + blk_rq_bytes(rq) >> SECTOR_SHIFT, > + nvme_req(rq)->start_time); > } > > void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index d1aee08f8c18..941304f17492 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter) > wake_up_process(waiter); > } > > -unsigned long bdev_start_io_acct(struct block_device *bdev, > - unsigned int sectors, enum req_op op, > +unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op, > unsigned long start_time); > void bdev_end_io_acct(struct block_device *bdev, enum req_op op, > - unsigned long start_time); > + unsigned int sectors, unsigned long start_time); > > unsigned long bio_start_io_acct(struct bio *bio); > void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, >
Hi, 在 2023/02/28 9:01, Yu Kuai 写道: > Hi, > > friendly ping ... > > Thanks, > Kuai > > 在 2023/02/23 17:12, Yu Kuai 写道: >> From: Yu Kuai <yukuai3@huawei.com> >> >> While using iostat for raid, I observed very strange 'await' >> occasionally, and turns out it's due to that 'ios' and 'sectors' is >> counted in bdev_start_io_acct(), while 'nsecs' is counted in >> bdev_end_io_acct(). I'm not sure why they are ccounted like that >> but I think this behaviour is obviously wrong because user will get >> wrong disk stats. >> >> Fix the problem by counting 'ios' and 'sectors' when io is done, like >> what rq-based device does. Can anyone help to review this change? Or is there any reason to count 'ios' and 'sectors' when io is started? Thanks, Kuai >> >> Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help >> function") >> Signed-off-by: Yu Kuai <yukuai3@huawei.com> >> --- >> block/blk-core.c | 16 ++++++---------- >> drivers/md/dm.c | 6 +++--- >> drivers/nvme/host/multipath.c | 8 ++++---- >> include/linux/blkdev.h | 5 ++--- >> 4 files changed, 15 insertions(+), 20 deletions(-) >> >> diff --git a/block/blk-core.c b/block/blk-core.c >> index 82b5b2c53f1e..fe1d320f5f07 100644 >> --- a/block/blk-core.c >> +++ b/block/blk-core.c >> @@ -953,16 +953,11 @@ void update_io_ticks(struct block_device *part, >> unsigned long now, bool end) >> } >> } >> -unsigned long bdev_start_io_acct(struct block_device *bdev, >> - unsigned int sectors, enum req_op op, >> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum >> req_op op, >> unsigned long start_time) >> { >> - const int sgrp = op_stat_group(op); >> - >> part_stat_lock(); >> update_io_ticks(bdev, start_time, false); >> - part_stat_inc(bdev, ios[sgrp]); >> - part_stat_add(bdev, sectors[sgrp], sectors); >> part_stat_local_inc(bdev, in_flight[op_is_write(op)]); >> part_stat_unlock(); >> @@ -978,13 +973,12 @@ EXPORT_SYMBOL(bdev_start_io_acct); >> */ >> unsigned long bio_start_io_acct(struct bio *bio) >> { >> - return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio), >> - bio_op(bio), jiffies); >> + return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies); >> } >> EXPORT_SYMBOL_GPL(bio_start_io_acct); >> void bdev_end_io_acct(struct block_device *bdev, enum req_op op, >> - unsigned long start_time) >> + unsigned int sectors, unsigned long start_time) >> { >> const int sgrp = op_stat_group(op); >> unsigned long now = READ_ONCE(jiffies); >> @@ -992,6 +986,8 @@ void bdev_end_io_acct(struct block_device *bdev, >> enum req_op op, >> part_stat_lock(); >> update_io_ticks(bdev, now, true); >> + part_stat_inc(bdev, ios[sgrp]); >> + part_stat_add(bdev, sectors[sgrp], sectors); >> part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration)); >> part_stat_local_dec(bdev, in_flight[op_is_write(op)]); >> part_stat_unlock(); >> @@ -1001,7 +997,7 @@ EXPORT_SYMBOL(bdev_end_io_acct); >> void bio_end_io_acct_remapped(struct bio *bio, unsigned long >> start_time, >> struct block_device *orig_bdev) >> { >> - bdev_end_io_acct(orig_bdev, bio_op(bio), start_time); >> + bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), >> start_time); >> } >> EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped); >> diff --git a/drivers/md/dm.c b/drivers/md/dm.c >> index eace45a18d45..f5cc330bb549 100644 >> --- a/drivers/md/dm.c >> +++ b/drivers/md/dm.c >> @@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end) >> sectors = io->sectors; >> if (!end) >> - bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio), >> - start_time); >> + bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time); >> else >> - bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time); >> + bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors, >> + start_time); >> if (static_branch_unlikely(&stats_enabled) && >> unlikely(dm_stats_used(&md->stats))) { >> diff --git a/drivers/nvme/host/multipath.c >> b/drivers/nvme/host/multipath.c >> index fc39d01e7b63..9171452e2f6d 100644 >> --- a/drivers/nvme/host/multipath.c >> +++ b/drivers/nvme/host/multipath.c >> @@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq) >> return; >> nvme_req(rq)->flags |= NVME_MPATH_IO_STATS; >> - nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, >> - blk_rq_bytes(rq) >> SECTOR_SHIFT, >> - req_op(rq), jiffies); >> + nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, >> req_op(rq), >> + jiffies); >> } >> EXPORT_SYMBOL_GPL(nvme_mpath_start_request); >> @@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq) >> if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS)) >> return; >> bdev_end_io_acct(ns->head->disk->part0, req_op(rq), >> - nvme_req(rq)->start_time); >> + blk_rq_bytes(rq) >> SECTOR_SHIFT, >> + nvme_req(rq)->start_time); >> } >> void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index d1aee08f8c18..941304f17492 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct >> task_struct *waiter) >> wake_up_process(waiter); >> } >> -unsigned long bdev_start_io_acct(struct block_device *bdev, >> - unsigned int sectors, enum req_op op, >> +unsigned long bdev_start_io_acct(struct block_device *bdev, enum >> req_op op, >> unsigned long start_time); >> void bdev_end_io_acct(struct block_device *bdev, enum req_op op, >> - unsigned long start_time); >> + unsigned int sectors, unsigned long start_time); >> unsigned long bio_start_io_acct(struct bio *bio); >> void bio_end_io_acct_remapped(struct bio *bio, unsigned long >> start_time, >> > > . >
© 2016 - 2025 Red Hat, Inc.