block/genhd.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) mode change 100644 => 100755 block/genhd.c
A potential unsafe locking scenario presents itself when
mutex_lock(&disk->open_mutex) is called with reader's lock held on
update_nr_hwq_lock:
CPU0 CPU1
---- ----
rlock(&set->update_nr_hwq_lock)
lock(&nbd->config_lock);
lock(&set->update_nr_hwq_lock);
lock(&disk->open_mutex)
When the gendisk is added back concurrently, a writer's lock is
attempted to be held on update_nr_hwq_lock while holding other locks in
the call-path, becoming a potential source of deadlock(s).
Scope read-critical section to blk_unregister_queue, which is the only
function that interacts with switching elevator and requires
synchronization with update_nr_hwq_lock.
Reported-by: syzbot+2e9e529ac0b319316453@syzkaller.appspotmail.com
Signed-off-by: Ujwal Kundur <ujwal.kundur@gmail.com>
---
block/genhd.c | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
mode change 100644 => 100755 block/genhd.c
diff --git a/block/genhd.c b/block/genhd.c
old mode 100644
new mode 100755
index c26733f6324b..b56f09f5699b
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -696,6 +696,7 @@ static void __del_gendisk(struct gendisk *disk)
struct block_device *part;
unsigned long idx;
bool start_drain;
+ struct blk_mq_tag_set *set = q->tag_set;
might_sleep();
@@ -740,7 +741,9 @@ static void __del_gendisk(struct gendisk *disk)
bdi_unregister(disk->bdi);
}
+ down_read(&set->update_nr_hwq_lock);
blk_unregister_queue(disk);
+ up_read(&set->update_nr_hwq_lock);
kobject_put(disk->part0->bd_holder_dir);
kobject_put(disk->slave_dir);
@@ -808,20 +811,15 @@ static void disable_elv_switch(struct request_queue *q)
*/
void del_gendisk(struct gendisk *disk)
{
- struct blk_mq_tag_set *set;
unsigned int memflags;
if (!queue_is_mq(disk->queue)) {
__del_gendisk(disk);
} else {
- set = disk->queue->tag_set;
-
disable_elv_switch(disk->queue);
memflags = memalloc_noio_save();
- down_read(&set->update_nr_hwq_lock);
__del_gendisk(disk);
- up_read(&set->update_nr_hwq_lock);
memalloc_noio_restore(memflags);
}
}
--
2.30.2
Hi, 在 2025/08/03 21:41, Ujwal Kundur 写道: > A potential unsafe locking scenario presents itself when > mutex_lock(&disk->open_mutex) is called with reader's lock held on > update_nr_hwq_lock: > CPU0 CPU1 > ---- ---- > rlock(&set->update_nr_hwq_lock) > lock(&nbd->config_lock); > lock(&set->update_nr_hwq_lock); > lock(&disk->open_mutex) > This problem is already fixed inside nbd by: 8b428f42f3ed ("nbd: fix lockdep deadlock warning") Thanks, Kuai > When the gendisk is added back concurrently, a writer's lock is > attempted to be held on update_nr_hwq_lock while holding other locks in > the call-path, becoming a potential source of deadlock(s). > > Scope read-critical section to blk_unregister_queue, which is the only > function that interacts with switching elevator and requires > synchronization with update_nr_hwq_lock. > > Reported-by: syzbot+2e9e529ac0b319316453@syzkaller.appspotmail.com > Signed-off-by: Ujwal Kundur <ujwal.kundur@gmail.com> > --- > block/genhd.c | 8 +++----- > 1 file changed, 3 insertions(+), 5 deletions(-) > mode change 100644 => 100755 block/genhd.c > > diff --git a/block/genhd.c b/block/genhd.c > old mode 100644 > new mode 100755 > index c26733f6324b..b56f09f5699b > --- a/block/genhd.c > +++ b/block/genhd.c > @@ -696,6 +696,7 @@ static void __del_gendisk(struct gendisk *disk) > struct block_device *part; > unsigned long idx; > bool start_drain; > + struct blk_mq_tag_set *set = q->tag_set; > > might_sleep(); > > @@ -740,7 +741,9 @@ static void __del_gendisk(struct gendisk *disk) > bdi_unregister(disk->bdi); > } > > + down_read(&set->update_nr_hwq_lock); > blk_unregister_queue(disk); > + up_read(&set->update_nr_hwq_lock); > > kobject_put(disk->part0->bd_holder_dir); > kobject_put(disk->slave_dir); > @@ -808,20 +811,15 @@ static void disable_elv_switch(struct request_queue *q) > */ > void del_gendisk(struct gendisk *disk) > { > - struct blk_mq_tag_set *set; > unsigned int memflags; > > if (!queue_is_mq(disk->queue)) { > __del_gendisk(disk); > } else { > - set = disk->queue->tag_set; > - > disable_elv_switch(disk->queue); > > memflags = memalloc_noio_save(); > - down_read(&set->update_nr_hwq_lock); > __del_gendisk(disk); > - up_read(&set->update_nr_hwq_lock); > memalloc_noio_restore(memflags); > } > } >
On Mon, 4 Aug 2025 15:51:48 +0800 Yu Kuai wrote: > �� 2025/08/03 21:41, Ujwal Kundur �: > > A potential unsafe locking scenario presents itself when > > mutex_lock(&disk->open_mutex) is called with reader's lock held on > > update_nr_hwq_lock: > > CPU0 CPU1 > > ---- ---- > > rlock(&set->update_nr_hwq_lock) > > lock(&nbd->config_lock); > > lock(&set->update_nr_hwq_lock); > > lock(&disk->open_mutex) > > > This problem is already fixed inside nbd by: > 8b428f42f3ed ("nbd: fix lockdep deadlock warning") > Deadlock still exists [1]. [1] https://lore.kernel.org/lkml/6891742c.050a0220.7f033.001a.GAE@google.com/
在 2025/08/05 19:59, Hillf Danton 写道: > On Mon, 4 Aug 2025 15:51:48 +0800 Yu Kuai wrote: >> �� 2025/08/03 21:41, Ujwal Kundur д��: >>> A potential unsafe locking scenario presents itself when >>> mutex_lock(&disk->open_mutex) is called with reader's lock held on >>> update_nr_hwq_lock: >>> CPU0 CPU1 >>> ---- ---- >>> rlock(&set->update_nr_hwq_lock) >>> lock(&nbd->config_lock); >>> lock(&set->update_nr_hwq_lock); >>> lock(&disk->open_mutex) >>> >> This problem is already fixed inside nbd by: >> 8b428f42f3ed ("nbd: fix lockdep deadlock warning") >> > Deadlock still exists [1]. > This deadlock is a different problem, not what you claimed to fix in this patch. Thanks, Kuai > [1] https://lore.kernel.org/lkml/6891742c.050a0220.7f033.001a.GAE@google.com/ > . >
> This problem is already fixed inside nbd by: > 8b428f42f3ed ("nbd: fix lockdep deadlock warning") Thanks for letting me know. Would this patch still make sense as it narrows the scope of the read-critical section? It reduces the chances of holding other locks while trying to update the number of hw queues (and thus avoids more lockdep warnings for this particular lock in the future). Thanks, Ujwal
Hello, kernel test robot noticed "BUG:kernel_NULL_pointer_dereference,address" on: commit: 00ff643264f9f5c300898a591fa4b83a62c43950 ("[PATCH] block: prevent deadlock in del_gendisk()") url: https://github.com/intel-lab-lkp/linux/commits/Ujwal-Kundur/block-prevent-deadlock-in-del_gendisk/20250803-214237 base: https://git.kernel.org/cgit/linux/kernel/git/axboe/linux-block.git for-next patch link: https://lore.kernel.org/all/20250803134114.2707-1-ujwal.kundur@gmail.com/ patch subject: [PATCH] block: prevent deadlock in del_gendisk() in testcase: phoronix-test-suite version: with following parameters: test: svt-av1-2.11.1 option_a: 2 option_b: Bosphorus 1080p cpufreq_governor: performance config: x86_64-rhel-9.4 compiler: gcc-12 test machine: 96 threads 2 sockets Intel(R) Xeon(R) Gold 6252 CPU @ 2.10GHz (Cascade Lake) with 512G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202508081000.cb134e91-lkp@intel.com [ 24.331036][ T1884] BUG: kernel NULL pointer dereference, address: 00000000000000a8 [ 24.339231][ T1884] #PF: supervisor write access in kernel mode [ 24.345493][ T1884] #PF: error_code(0x0002) - not-present page [ 24.351631][ T1884] PGD 0 P4D 0 [ 24.355133][ T1884] Oops: Oops: 0002 [#1] SMP NOPTI [ 24.360264][ T1884] CPU: 26 UID: 0 PID: 1884 Comm: dockerd Tainted: G S 6.16.0-04436-g00ff643264f9 #1 VOLUNTARY [ 24.372434][ T1884] Tainted: [S]=CPU_OUT_OF_SPEC [ 24.377290][ T1884] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019 [ 24.388664][ T1884] RIP: 0010:down_read (arch/x86/include/asm/atomic64_64.h:79 include/linux/atomic/atomic-arch-fallback.h:2723 include/linux/atomic/atomic-long.h:163 include/linux/atomic/atomic-instrumented.h:3298 kernel/locking/rwsem.c:250 kernel/locking/rwsem.c:1245 kernel/locking/rwsem.c:1259 kernel/locking/rwsem.c:1524) [ 24.393632][ T1884] Code: 00 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 53 48 89 fb e8 ae c0 ff ff be 00 01 00 00 <f0> 48 0f c1 33 48 81 c6 00 01 00 00 78 3f 48 b8 07 00 00 00 00 00 All code ======== 0: 00 00 add %al,(%rax) 2: 00 90 90 90 90 90 add %dl,-0x6f6f6f70(%rax) 8: 90 nop 9: 90 nop a: 90 nop b: 90 nop c: 90 nop d: 90 nop e: 90 nop f: 90 nop 10: 90 nop 11: 90 nop 12: 90 nop 13: f3 0f 1e fa endbr64 17: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) 1c: 53 push %rbx 1d: 48 89 fb mov %rdi,%rbx 20: e8 ae c0 ff ff call 0xffffffffffffc0d3 25: be 00 01 00 00 mov $0x100,%esi 2a:* f0 48 0f c1 33 lock xadd %rsi,(%rbx) <-- trapping instruction 2f: 48 81 c6 00 01 00 00 add $0x100,%rsi 36: 78 3f js 0x77 38: 48 rex.W 39: b8 07 00 00 00 mov $0x7,%eax ... Code starting with the faulting instruction =========================================== 0: f0 48 0f c1 33 lock xadd %rsi,(%rbx) 5: 48 81 c6 00 01 00 00 add $0x100,%rsi c: 78 3f js 0x4d e: 48 rex.W f: b8 07 00 00 00 mov $0x7,%eax ... [ 24.413635][ T1884] RSP: 0018:ffffc900211f3918 EFLAGS: 00010246 [ 24.419821][ T1884] RAX: 0000000000000000 RBX: 00000000000000a8 RCX: 00000000000001f0 [ 24.427912][ T1884] RDX: 00000000000001ef RSI: 0000000000000100 RDI: 00000000000000a8 [ 24.436003][ T1884] RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 [ 24.444092][ T1884] R10: 0000000000000000 R11: 0000000000000000 R12: 00000000000000a8 [ 24.452181][ T1884] R13: ffff88c1841e87a0 R14: ffff88c08b28ad68 R15: ffff88c0ce5b2200 [ 24.460273][ T1884] FS: 00007ff8a3fff6c0(0000) GS:ffff88f027468000(0000) knlGS:0000000000000000 [ 24.469318][ T1884] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 24.476016][ T1884] CR2: 00000000000000a8 CR3: 000000010e760004 CR4: 00000000007726f0 [ 24.484104][ T1884] PKRU: 55555554 [ 24.487751][ T1884] Call Trace: [ 24.491131][ T1884] <TASK> [ 24.494147][ T1884] __del_gendisk (block/genhd.c:745) [ 24.498810][ T1884] cleanup_mapped_device (drivers/md/dm.c:2243) dm_mod [ 24.504959][ T1884] __dm_destroy (include/linux/list.h:373 drivers/md/dm.c:2397 drivers/md/dm.c:2730) dm_mod [ 24.510321][ T1884] ? __pfx_dev_remove (drivers/md/dm-ioctl.c:978) dm_mod [ 24.516029][ T1884] dev_remove (drivers/md/dm-ioctl.c:1029) dm_mod [ 24.521215][ T1884] ctl_ioctl (drivers/md/dm-ioctl.c:2101) dm_mod [ 24.526310][ T1884] dm_ctl_ioctl (drivers/md/dm-ioctl.c:2123) dm_mod [ 24.531399][ T1884] __x64_sys_ioctl (fs/ioctl.c:51 fs/ioctl.c:598 fs/ioctl.c:584 fs/ioctl.c:584) [ 24.536040][ T1884] ? change_protection_range (mm/mprotect.c:498 mm/mprotect.c:526) [ 24.541726][ T1884] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) [ 24.546279][ T1884] ? get_page_from_freelist (mm/page_alloc.c:1083 mm/page_alloc.c:1702 mm/page_alloc.c:1712 mm/page_alloc.c:3669) [ 24.551869][ T1884] ? __alloc_frozen_pages_noprof (mm/page_alloc.c:4959) [ 24.557894][ T1884] ? mod_memcg_lruvec_state (mm/memcontrol.c:576 mm/memcontrol.c:564 mm/memcontrol.c:754) [ 24.563495][ T1884] ? __lruvec_stat_mod_folio (mm/memcontrol.c:800) [ 24.569006][ T1884] ? __folio_mod_stat (mm/rmap.c:1414) [ 24.573905][ T1884] ? folio_add_new_anon_rmap (arch/x86/include/asm/bitops.h:206 arch/x86/include/asm/bitops.h:238 include/asm-generic/bitops/instrumented-non-atomic.h:142 include/linux/page-flags.h:867 include/linux/page-flags.h:888 include/linux/mm.h:992 mm/rmap.c:1609) [ 24.579493][ T1884] ? do_anonymous_page (mm/memory.c:5153) [ 24.584646][ T1884] ? __handle_mm_fault (mm/memory.c:6230) [ 24.589805][ T1884] ? do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94) [ 24.594520][ T1884] ? count_memcg_events (mm/memcontrol.c:576 mm/memcontrol.c:564 mm/memcontrol.c:848) [ 24.599749][ T1884] ? handle_mm_fault (include/linux/memcontrol.h:978 include/linux/memcontrol.h:985 mm/memory.c:6264 mm/memory.c:6425) [ 24.604635][ T1884] ? do_user_addr_fault (arch/x86/mm/fault.c:1337) [ 24.609863][ T1884] ? clear_bhb_loop (arch/x86/entry/entry_64.S:1548) [ 24.614572][ T1884] ? clear_bhb_loop (arch/x86/entry/entry_64.S:1548) [ 24.619279][ T1884] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 24.625200][ T1884] RIP: 0033:0x7ff9476c3d1b [ 24.629648][ T1884] Code: 00 48 89 44 24 18 31 c0 48 8d 44 24 60 c7 04 24 10 00 00 00 48 89 44 24 08 48 8d 44 24 20 48 89 44 24 10 b8 10 00 00 00 0f 05 <89> c2 3d 00 f0 ff ff 77 1c 48 8b 44 24 18 64 48 2b 04 25 28 00 00 All code ======== 0: 00 48 89 add %cl,-0x77(%rax) 3: 44 24 18 rex.R and $0x18,%al 6: 31 c0 xor %eax,%eax 8: 48 8d 44 24 60 lea 0x60(%rsp),%rax d: c7 04 24 10 00 00 00 movl $0x10,(%rsp) 14: 48 89 44 24 08 mov %rax,0x8(%rsp) 19: 48 8d 44 24 20 lea 0x20(%rsp),%rax 1e: 48 89 44 24 10 mov %rax,0x10(%rsp) 23: b8 10 00 00 00 mov $0x10,%eax 28: 0f 05 syscall 2a:* 89 c2 mov %eax,%edx <-- trapping instruction 2c: 3d 00 f0 ff ff cmp $0xfffff000,%eax 31: 77 1c ja 0x4f 33: 48 8b 44 24 18 mov 0x18(%rsp),%rax 38: 64 fs 39: 48 rex.W 3a: 2b .byte 0x2b 3b: 04 25 add $0x25,%al 3d: 28 00 sub %al,(%rax) ... Code starting with the faulting instruction =========================================== 0: 89 c2 mov %eax,%edx 2: 3d 00 f0 ff ff cmp $0xfffff000,%eax 7: 77 1c ja 0x25 9: 48 8b 44 24 18 mov 0x18(%rsp),%rax e: 64 fs f: 48 rex.W 10: 2b .byte 0x2b 11: 04 25 add $0x25,%al 13: 28 00 sub %al,(%rax) The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20250808/202508081000.cb134e91-lkp@intel.com -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.