block/blk-iocost.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-)
Current kernel (d2980d8d826554fa6981d621e569a453787472f8) crashes
when blk_iocost_init for `nvme1` disk.
BUG: kernel NULL pointer dereference, address: 0000000000000050
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
blk_iocost_init (include/asm-generic/qspinlock.h:128
include/linux/spinlock.h:203
include/linux/spinlock_api_smp.h:158
include/linux/spinlock.h:400
block/blk-iocost.c:2884)
ioc_qos_write (block/blk-iocost.c:3198)
? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566)
? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311)
? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491)
? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46
arch/x86/include/asm/uaccess_64.h:52
lib/iov_iter.c:183 lib/iov_iter.c:628)
? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693)
cgroup_file_write (kernel/cgroup/cgroup.c:4061)
kernfs_fop_write_iter (fs/kernfs/file.c:334)
vfs_write (include/linux/fs.h:1849 fs/read_write.c:491
fs/read_write.c:584)
ksys_write (fs/read_write.c:637)
do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
This happens because ioc_refresh_params() is being called without
a properly initialized ioc->rqos, which is happening later.
ioc_refresh_params() -> ioc_autop_idx() tries to access
ioc->rqos.disk->queue but ioc->rqos.disk is NULL, causing the BUG above.
Move the ioc_refresh_params() call to after rqos is populated
(rq_qos_add).
Fixes: ce57b558604e ("blk-rq-qos: make rq_qos_add and rq_qos_del more useful")
Signed-off-by: Breno Leitao <leitao@debian.org>
---
block/blk-iocost.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ff534e9d92dc..6cced8a76e9c 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2878,11 +2878,6 @@ static int blk_iocost_init(struct gendisk *disk)
atomic64_set(&ioc->cur_period, 0);
atomic_set(&ioc->hweight_gen, 0);
- spin_lock_irq(&ioc->lock);
- ioc->autop_idx = AUTOP_INVALID;
- ioc_refresh_params(ioc, true);
- spin_unlock_irq(&ioc->lock);
-
/*
* rqos must be added before activation to allow ioc_pd_init() to
* lookup the ioc from q. This means that the rqos methods may get
@@ -2893,6 +2888,11 @@ static int blk_iocost_init(struct gendisk *disk)
if (ret)
goto err_free_ioc;
+ spin_lock_irq(&ioc->lock);
+ ioc->autop_idx = AUTOP_INVALID;
+ ioc_refresh_params(ioc, true);
+ spin_unlock_irq(&ioc->lock);
+
ret = blkcg_activate_policy(disk, &blkcg_policy_iocost);
if (ret)
goto err_del_qos;
--
2.30.2
On Fri, 24 Feb 2023 08:07:14 -0800, Breno Leitao wrote: > Current kernel (d2980d8d826554fa6981d621e569a453787472f8) crashes > when blk_iocost_init for `nvme1` disk. > > BUG: kernel NULL pointer dereference, address: 0000000000000050 > #PF: supervisor read access in kernel mode > #PF: error_code(0x0000) - not-present page > > [...] Applied, thanks! [1/1] blk-iocost: initialize rqos before accessing it commit: efbb51a0aae5fcecda266ac254146e36fff41e16 Best regards, -- Jens Axboe
Hello, Breno. On Fri, Feb 24, 2023 at 08:07:14AM -0800, Breno Leitao wrote: > diff --git a/block/blk-iocost.c b/block/blk-iocost.c > index ff534e9d92dc..6cced8a76e9c 100644 > --- a/block/blk-iocost.c > +++ b/block/blk-iocost.c > @@ -2878,11 +2878,6 @@ static int blk_iocost_init(struct gendisk *disk) > atomic64_set(&ioc->cur_period, 0); > atomic_set(&ioc->hweight_gen, 0); > > - spin_lock_irq(&ioc->lock); > - ioc->autop_idx = AUTOP_INVALID; > - ioc_refresh_params(ioc, true); > - spin_unlock_irq(&ioc->lock); > - > /* > * rqos must be added before activation to allow ioc_pd_init() to > * lookup the ioc from q. This means that the rqos methods may get > @@ -2893,6 +2888,11 @@ static int blk_iocost_init(struct gendisk *disk) > if (ret) > goto err_free_ioc; > > + spin_lock_irq(&ioc->lock); > + ioc->autop_idx = AUTOP_INVALID; > + ioc_refresh_params(ioc, true); > + spin_unlock_irq(&ioc->lock); > + I'm a bit worried about registering the rqos before ioc_refresh_params() as that initializes all the internal parameters and letting IOs flow through without initializing them can lead to subtle issues. Can you please instead explicitly pass @q into ioc_refresh_params() (and explain why we need it passed explicitly in the function comment)? Thanks. -- tejun
On 2/26/23 9:55 AM, Tejun Heo wrote: > Hello, Breno. > > On Fri, Feb 24, 2023 at 08:07:14AM -0800, Breno Leitao wrote: >> diff --git a/block/blk-iocost.c b/block/blk-iocost.c >> index ff534e9d92dc..6cced8a76e9c 100644 >> --- a/block/blk-iocost.c >> +++ b/block/blk-iocost.c >> @@ -2878,11 +2878,6 @@ static int blk_iocost_init(struct gendisk *disk) >> atomic64_set(&ioc->cur_period, 0); >> atomic_set(&ioc->hweight_gen, 0); >> >> - spin_lock_irq(&ioc->lock); >> - ioc->autop_idx = AUTOP_INVALID; >> - ioc_refresh_params(ioc, true); >> - spin_unlock_irq(&ioc->lock); >> - >> /* >> * rqos must be added before activation to allow ioc_pd_init() to >> * lookup the ioc from q. This means that the rqos methods may get >> @@ -2893,6 +2888,11 @@ static int blk_iocost_init(struct gendisk *disk) >> if (ret) >> goto err_free_ioc; >> >> + spin_lock_irq(&ioc->lock); >> + ioc->autop_idx = AUTOP_INVALID; >> + ioc_refresh_params(ioc, true); >> + spin_unlock_irq(&ioc->lock); >> + > > I'm a bit worried about registering the rqos before ioc_refresh_params() as > that initializes all the internal parameters and letting IOs flow through > without initializing them can lead to subtle issues. Can you please instead > explicitly pass @q into ioc_refresh_params() (and explain why we need it > passed explicitly in the function comment)? Sorry missed this, I'll drop it for now. -- Jens Axboe
On Fri, Feb 24, 2023 at 08:07:14AM -0800, Breno Leitao <leitao@debian.org> wrote: > --- > block/blk-iocost.c | 10 +++++----- > 1 file changed, 5 insertions(+), 5 deletions(-) Well done. Reviewed-by: Michal Koutný <mkoutny@suse.com> [...] > blk_iocost_init (include/asm-generic/qspinlock.h:128 > include/linux/spinlock.h:203 > include/linux/spinlock_api_smp.h:158 > include/linux/spinlock.h:400 > block/blk-iocost.c:2884) > ioc_qos_write (block/blk-iocost.c:3198) > ? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566) > ? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311) > ? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491) > ? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46 > arch/x86/include/asm/uaccess_64.h:52 > lib/iov_iter.c:183 lib/iov_iter.c:628) > ? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693) > cgroup_file_write (kernel/cgroup/cgroup.c:4061) > kernfs_fop_write_iter (fs/kernfs/file.c:334) > vfs_write (include/linux/fs.h:1849 fs/read_write.c:491 > fs/read_write.c:584) > ksys_write (fs/read_write.c:637) > do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) > entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) BTW, out of curiosity what tool did you use to list stack with line numbers? Thanks, Michal
Hello Michal, On 2/24/23 18:51, Michal Koutný wrote: >> blk_iocost_init (include/asm-generic/qspinlock.h:128 >> include/linux/spinlock.h:203 >> include/linux/spinlock_api_smp.h:158 >> include/linux/spinlock.h:400 >> block/blk-iocost.c:2884) >> ioc_qos_write (block/blk-iocost.c:3198) >> ? kretprobe_perf_func (kernel/trace/trace_kprobe.c:1566) >> ? kernfs_fop_write_iter (include/linux/slab.h:584 fs/kernfs/file.c:311) >> ? __kmem_cache_alloc_node (mm/slab.h:? mm/slub.c:3452 mm/slub.c:3491) >> ? _copy_from_iter (arch/x86/include/asm/uaccess_64.h:46 >> arch/x86/include/asm/uaccess_64.h:52 >> lib/iov_iter.c:183 lib/iov_iter.c:628) >> ? kretprobe_dispatcher (kernel/trace/trace_kprobe.c:1693) >> cgroup_file_write (kernel/cgroup/cgroup.c:4061) >> kernfs_fop_write_iter (fs/kernfs/file.c:334) >> vfs_write (include/linux/fs.h:1849 fs/read_write.c:491 >> fs/read_write.c:584) >> ksys_write (fs/read_write.c:637) >> do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80) >> entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120) > > BTW, out of curiosity what tool did you use to list stack with line > numbers? I use the decode_stacktrace.sh from kernel's scripts directory. You basically pipe the stack to it, and call it passing the vmlinux file. It is incredible handy. https://elixir.bootlin.com/linux/latest/source/scripts/decode_stacktrace.sh Thanks for the review, Breno
© 2016 - 2025 Red Hat, Inc.