[PATCH v2] block: fix kobject double initialization in add_disk

Zheng Qixing posted 1 patch 1 month, 3 weeks ago
block/blk-sysfs.c | 12 +++++-------
block/blk.h       |  1 +
block/genhd.c     |  2 ++
3 files changed, 8 insertions(+), 7 deletions(-)
[PATCH v2] block: fix kobject double initialization in add_disk
Posted by Zheng Qixing 1 month, 3 weeks ago
From: Zheng Qixing <zhengqixing@huawei.com>

Device-mapper can call add_disk() multiple times for the same gendisk
due to its two-phase creation process (dm create + dm load). This leads
to kobject double initialization errors when the underlying iSCSI devices
become temporarily unavailable and then reappear.

However, if the first add_disk() call fails and is retried, the queue_kobj
gets initialized twice, causing:

kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
something is seriously wrong.
 Call Trace:
  <TASK>
  dump_stack_lvl+0x5b/0x80
  kobject_init.cold+0x43/0x51
  blk_register_queue+0x46/0x280
  add_disk_fwnode+0xb5/0x280
  dm_setup_md_queue+0x194/0x1c0
  table_load+0x297/0x2d0
  ctl_ioctl+0x2a2/0x480
  dm_ctl_ioctl+0xe/0x20
  __x64_sys_ioctl+0xc7/0x110
  do_syscall_64+0x72/0x390
  entry_SYSCALL_64_after_hwframe+0x76/0x7e

Fix this by separating kobject initialization from sysfs registration:
 - Initialize queue_kobj early during gendisk allocation
 - add_disk() only adds the already-initialized kobject to sysfs
 - del_gendisk() removes from sysfs but doesn't destroy the kobject
 - Final cleanup happens when the disk is released

Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
---
 block/blk-sysfs.c | 12 +++++-------
 block/blk.h       |  1 +
 block/genhd.c     |  2 ++
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 396cded255ea..c5cf79a20842 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj)
 	/* nothing to do here, all data is associated with the parent gendisk */
 }
 
-static const struct kobj_type blk_queue_ktype = {
+const struct kobj_type blk_queue_ktype = {
 	.default_groups = blk_queue_attr_groups,
 	.sysfs_ops	= &queue_sysfs_ops,
 	.release	= blk_queue_release,
@@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk)
 	struct request_queue *q = disk->queue;
 	int ret;
 
-	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
 	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
 	if (ret < 0)
-		goto out_put_queue_kobj;
+		return ret;
 
 	if (queue_is_mq(q)) {
 		ret = blk_mq_sysfs_register(disk);
 		if (ret)
-			goto out_put_queue_kobj;
+			goto out_del_queue_kobj;
 	}
 	mutex_lock(&q->sysfs_lock);
 
@@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk)
 	mutex_unlock(&q->sysfs_lock);
 	if (queue_is_mq(q))
 		blk_mq_sysfs_unregister(disk);
-out_put_queue_kobj:
-	kobject_put(&disk->queue_kobj);
+out_del_queue_kobj:
+	kobject_del(&disk->queue_kobj);
 	return ret;
 }
 
@@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk)
 		elevator_set_none(q);
 
 	blk_debugfs_remove(disk);
-	kobject_put(&disk->queue_kobj);
 }
diff --git a/block/blk.h b/block/blk.h
index 0a2eccf28ca4..46f566f9b126 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -29,6 +29,7 @@ struct elevator_tags;
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
+extern const struct kobj_type blk_queue_ktype;
 extern struct dentry *blk_debugfs_root;
 
 struct blk_flush_queue {
diff --git a/block/genhd.c b/block/genhd.c
index c26733f6324b..9bbc38d12792 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1303,6 +1303,7 @@ static void disk_release(struct device *dev)
 	disk_free_zone_resources(disk);
 	xa_destroy(&disk->part_tbl);
 
+	kobject_put(&disk->queue_kobj);
 	disk->queue->disk = NULL;
 	blk_put_queue(disk->queue);
 
@@ -1486,6 +1487,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
 	INIT_LIST_HEAD(&disk->slave_bdevs);
 #endif
 	mutex_init(&disk->rqos_state_mutex);
+	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
 	return disk;
 
 out_erase_part0:
-- 
2.39.2
Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Jens Axboe 1 month, 3 weeks ago
On Fri, 08 Aug 2025 13:36:09 +0800, Zheng Qixing wrote:
> Device-mapper can call add_disk() multiple times for the same gendisk
> due to its two-phase creation process (dm create + dm load). This leads
> to kobject double initialization errors when the underlying iSCSI devices
> become temporarily unavailable and then reappear.
> 
> However, if the first add_disk() call fails and is retried, the queue_kobj
> gets initialized twice, causing:
> 
> [...]

Applied, thanks!

[1/1] block: fix kobject double initialization in add_disk
      commit: 343dc5423bfe876c12bb80c56f5e44286e442a07

Best regards,
-- 
Jens Axboe
Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Ming Lei 1 month, 3 weeks ago
On Fri, Aug 08, 2025 at 01:36:09PM +0800, Zheng Qixing wrote:
> From: Zheng Qixing <zhengqixing@huawei.com>
> 
> Device-mapper can call add_disk() multiple times for the same gendisk
> due to its two-phase creation process (dm create + dm load). This leads
> to kobject double initialization errors when the underlying iSCSI devices
> become temporarily unavailable and then reappear.
> 
> However, if the first add_disk() call fails and is retried, the queue_kobj
> gets initialized twice, causing:
> 
> kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
> something is seriously wrong.
>  Call Trace:
>   <TASK>
>   dump_stack_lvl+0x5b/0x80
>   kobject_init.cold+0x43/0x51
>   blk_register_queue+0x46/0x280
>   add_disk_fwnode+0xb5/0x280
>   dm_setup_md_queue+0x194/0x1c0
>   table_load+0x297/0x2d0
>   ctl_ioctl+0x2a2/0x480
>   dm_ctl_ioctl+0xe/0x20
>   __x64_sys_ioctl+0xc7/0x110
>   do_syscall_64+0x72/0x390
>   entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Fix this by separating kobject initialization from sysfs registration:
>  - Initialize queue_kobj early during gendisk allocation
>  - add_disk() only adds the already-initialized kobject to sysfs
>  - del_gendisk() removes from sysfs but doesn't destroy the kobject
>  - Final cleanup happens when the disk is released
> 
> Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
> Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>

Reviewed-by: Ming Lei <ming.lei@redhat.com>

Thanks,
Ming
Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Christoph Hellwig 1 month, 3 weeks ago
On Fri, Aug 08, 2025 at 01:36:09PM +0800, Zheng Qixing wrote:
> From: Zheng Qixing <zhengqixing@huawei.com>
> 
> Device-mapper can call add_disk() multiple times for the same gendisk
> due to its two-phase creation process (dm create + dm load).

We'll need to fix that, instead of adding complex workarounds in the
block layer for something that should not happen.
Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Yu Kuai 1 month, 3 weeks ago
Hi,

在 2025/08/10 23:08, Christoph Hellwig 写道:
> On Fri, Aug 08, 2025 at 01:36:09PM +0800, Zheng Qixing wrote:
>> From: Zheng Qixing <zhengqixing@huawei.com>
>>
>> Device-mapper can call add_disk() multiple times for the same gendisk
>> due to its two-phase creation process (dm create + dm load).
> 
> We'll need to fix that, instead of adding complex workarounds in the
> block layer for something that should not happen.
> 
AFAIK, a thorough will require complex refactor in the device mapper,
I feel it's better to fix the regression the easy way first, and it's
better for backport. Or do you have any suggestions how to fix this?

Thanks,
Kuai

> 
> .
> 

Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Yu Kuai 1 month, 3 weeks ago
+CC device mappper, since this is a regerssion in this driver.

在 2025/08/08 13:36, Zheng Qixing 写道:
> From: Zheng Qixing <zhengqixing@huawei.com>
> 
> Device-mapper can call add_disk() multiple times for the same gendisk
> due to its two-phase creation process (dm create + dm load). This leads
> to kobject double initialization errors when the underlying iSCSI devices
> become temporarily unavailable and then reappear.
> 
> However, if the first add_disk() call fails and is retried, the queue_kobj
> gets initialized twice, causing:
> 
> kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
> something is seriously wrong.
>   Call Trace:
>    <TASK>
>    dump_stack_lvl+0x5b/0x80
>    kobject_init.cold+0x43/0x51
>    blk_register_queue+0x46/0x280
>    add_disk_fwnode+0xb5/0x280
>    dm_setup_md_queue+0x194/0x1c0
>    table_load+0x297/0x2d0
>    ctl_ioctl+0x2a2/0x480
>    dm_ctl_ioctl+0xe/0x20
>    __x64_sys_ioctl+0xc7/0x110
>    do_syscall_64+0x72/0x390
>    entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Fix this by separating kobject initialization from sysfs registration:
>   - Initialize queue_kobj early during gendisk allocation
>   - add_disk() only adds the already-initialized kobject to sysfs
>   - del_gendisk() removes from sysfs but doesn't destroy the kobject
>   - Final cleanup happens when the disk is released
> 
> Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
> Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
> ---
>   block/blk-sysfs.c | 12 +++++-------
>   block/blk.h       |  1 +
>   block/genhd.c     |  2 ++
>   3 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 396cded255ea..c5cf79a20842 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj)
>   	/* nothing to do here, all data is associated with the parent gendisk */
>   }
>   
> -static const struct kobj_type blk_queue_ktype = {
> +const struct kobj_type blk_queue_ktype = {
>   	.default_groups = blk_queue_attr_groups,
>   	.sysfs_ops	= &queue_sysfs_ops,
>   	.release	= blk_queue_release,
> @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk)
>   	struct request_queue *q = disk->queue;
>   	int ret;
>   
> -	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
>   	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
>   	if (ret < 0)
> -		goto out_put_queue_kobj;
> +		return ret;
>   
>   	if (queue_is_mq(q)) {
>   		ret = blk_mq_sysfs_register(disk);
>   		if (ret)
> -			goto out_put_queue_kobj;
> +			goto out_del_queue_kobj;
>   	}
>   	mutex_lock(&q->sysfs_lock);
>   
> @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk)
>   	mutex_unlock(&q->sysfs_lock);
>   	if (queue_is_mq(q))
>   		blk_mq_sysfs_unregister(disk);
> -out_put_queue_kobj:
> -	kobject_put(&disk->queue_kobj);
> +out_del_queue_kobj:
> +	kobject_del(&disk->queue_kobj);
>   	return ret;
>   }
>   
> @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk)
>   		elevator_set_none(q);
>   
>   	blk_debugfs_remove(disk);
> -	kobject_put(&disk->queue_kobj);
>   }
> diff --git a/block/blk.h b/block/blk.h
> index 0a2eccf28ca4..46f566f9b126 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -29,6 +29,7 @@ struct elevator_tags;
>   /* Max future timer expiry for timeouts */
>   #define BLK_MAX_TIMEOUT		(5 * HZ)
>   
> +extern const struct kobj_type blk_queue_ktype;
>   extern struct dentry *blk_debugfs_root;
>   
>   struct blk_flush_queue {
> diff --git a/block/genhd.c b/block/genhd.c
> index c26733f6324b..9bbc38d12792 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1303,6 +1303,7 @@ static void disk_release(struct device *dev)
>   	disk_free_zone_resources(disk);
>   	xa_destroy(&disk->part_tbl);
>   
> +	kobject_put(&disk->queue_kobj);
>   	disk->queue->disk = NULL;
>   	blk_put_queue(disk->queue);
>   
> @@ -1486,6 +1487,7 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id,
>   	INIT_LIST_HEAD(&disk->slave_bdevs);
>   #endif
>   	mutex_init(&disk->rqos_state_mutex);
> +	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
>   	return disk;
>   
>   out_erase_part0:
> 

LGTM
Reviewed-by: Yu Kuai <yukuai3@huawei.com>

Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Nilay Shroff 1 month, 3 weeks ago

On 8/8/25 11:06 AM, Zheng Qixing wrote:
> From: Zheng Qixing <zhengqixing@huawei.com>
> 
> Device-mapper can call add_disk() multiple times for the same gendisk
> due to its two-phase creation process (dm create + dm load). This leads
> to kobject double initialization errors when the underlying iSCSI devices
> become temporarily unavailable and then reappear.
> 
> However, if the first add_disk() call fails and is retried, the queue_kobj
> gets initialized twice, causing:
> 
> kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
> something is seriously wrong.
>  Call Trace:
>   <TASK>
>   dump_stack_lvl+0x5b/0x80
>   kobject_init.cold+0x43/0x51
>   blk_register_queue+0x46/0x280
>   add_disk_fwnode+0xb5/0x280
>   dm_setup_md_queue+0x194/0x1c0
>   table_load+0x297/0x2d0
>   ctl_ioctl+0x2a2/0x480
>   dm_ctl_ioctl+0xe/0x20
>   __x64_sys_ioctl+0xc7/0x110
>   do_syscall_64+0x72/0x390
>   entry_SYSCALL_64_after_hwframe+0x76/0x7e
> 
> Fix this by separating kobject initialization from sysfs registration:
>  - Initialize queue_kobj early during gendisk allocation
>  - add_disk() only adds the already-initialized kobject to sysfs
>  - del_gendisk() removes from sysfs but doesn't destroy the kobject
>  - Final cleanup happens when the disk is released
> 
> Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
> Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
> Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
> ---
>  block/blk-sysfs.c | 12 +++++-------
>  block/blk.h       |  1 +
>  block/genhd.c     |  2 ++
>  3 files changed, 8 insertions(+), 7 deletions(-)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index 396cded255ea..c5cf79a20842 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj)
>  	/* nothing to do here, all data is associated with the parent gendisk */
>  }
>  
> -static const struct kobj_type blk_queue_ktype = {
> +const struct kobj_type blk_queue_ktype = {
>  	.default_groups = blk_queue_attr_groups,
>  	.sysfs_ops	= &queue_sysfs_ops,
>  	.release	= blk_queue_release,
> @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk)
>  	struct request_queue *q = disk->queue;
>  	int ret;
>  
> -	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
>  	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
>  	if (ret < 0)
> -		goto out_put_queue_kobj;
> +		return ret;
>  
>  	if (queue_is_mq(q)) {
>  		ret = blk_mq_sysfs_register(disk);
>  		if (ret)
> -			goto out_put_queue_kobj;
> +			goto out_del_queue_kobj;
>  	}
>  	mutex_lock(&q->sysfs_lock);
>  
> @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk)
>  	mutex_unlock(&q->sysfs_lock);
>  	if (queue_is_mq(q))
>  		blk_mq_sysfs_unregister(disk);
> -out_put_queue_kobj:
> -	kobject_put(&disk->queue_kobj);
> +out_del_queue_kobj:
> +	kobject_del(&disk->queue_kobj);
>  	return ret;
>  }
>  
> @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk)
>  		elevator_set_none(q);
>  
>  	blk_debugfs_remove(disk);
> -	kobject_put(&disk->queue_kobj);
>  }
Shouldn't we replace kobject_put() with kobject_del() here in 
blk_unregister_queue()? 

Thanks,
--Nilay
Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Yu Kuai 1 month, 3 weeks ago
Hi,

在 2025/08/08 15:15, Nilay Shroff 写道:
> 
> 
> On 8/8/25 11:06 AM, Zheng Qixing wrote:
>> From: Zheng Qixing <zhengqixing@huawei.com>
>>
>> Device-mapper can call add_disk() multiple times for the same gendisk
>> due to its two-phase creation process (dm create + dm load). This leads
>> to kobject double initialization errors when the underlying iSCSI devices
>> become temporarily unavailable and then reappear.
>>
>> However, if the first add_disk() call fails and is retried, the queue_kobj
>> gets initialized twice, causing:
>>
>> kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
>> something is seriously wrong.
>>   Call Trace:
>>    <TASK>
>>    dump_stack_lvl+0x5b/0x80
>>    kobject_init.cold+0x43/0x51
>>    blk_register_queue+0x46/0x280
>>    add_disk_fwnode+0xb5/0x280
>>    dm_setup_md_queue+0x194/0x1c0
>>    table_load+0x297/0x2d0
>>    ctl_ioctl+0x2a2/0x480
>>    dm_ctl_ioctl+0xe/0x20
>>    __x64_sys_ioctl+0xc7/0x110
>>    do_syscall_64+0x72/0x390
>>    entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>
>> Fix this by separating kobject initialization from sysfs registration:
>>   - Initialize queue_kobj early during gendisk allocation
>>   - add_disk() only adds the already-initialized kobject to sysfs
>>   - del_gendisk() removes from sysfs but doesn't destroy the kobject
>>   - Final cleanup happens when the disk is released
>>
>> Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
>> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
>> Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
>> Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
>> ---
>>   block/blk-sysfs.c | 12 +++++-------
>>   block/blk.h       |  1 +
>>   block/genhd.c     |  2 ++
>>   3 files changed, 8 insertions(+), 7 deletions(-)
>>
>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>> index 396cded255ea..c5cf79a20842 100644
>> --- a/block/blk-sysfs.c
>> +++ b/block/blk-sysfs.c
>> @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj)
>>   	/* nothing to do here, all data is associated with the parent gendisk */
>>   }
>>   
>> -static const struct kobj_type blk_queue_ktype = {
>> +const struct kobj_type blk_queue_ktype = {
>>   	.default_groups = blk_queue_attr_groups,
>>   	.sysfs_ops	= &queue_sysfs_ops,
>>   	.release	= blk_queue_release,
>> @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk)
>>   	struct request_queue *q = disk->queue;
>>   	int ret;
>>   
>> -	kobject_init(&disk->queue_kobj, &blk_queue_ktype);
>>   	ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
>>   	if (ret < 0)
>> -		goto out_put_queue_kobj;
>> +		return ret;
>>   
>>   	if (queue_is_mq(q)) {
>>   		ret = blk_mq_sysfs_register(disk);
>>   		if (ret)
>> -			goto out_put_queue_kobj;
>> +			goto out_del_queue_kobj;
>>   	}
>>   	mutex_lock(&q->sysfs_lock);
>>   
>> @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk)
>>   	mutex_unlock(&q->sysfs_lock);
>>   	if (queue_is_mq(q))
>>   		blk_mq_sysfs_unregister(disk);
>> -out_put_queue_kobj:
>> -	kobject_put(&disk->queue_kobj);
>> +out_del_queue_kobj:
>> +	kobject_del(&disk->queue_kobj);
>>   	return ret;
>>   }
>>   
>> @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk)
>>   		elevator_set_none(q);
>>   
>>   	blk_debugfs_remove(disk);
>> -	kobject_put(&disk->queue_kobj);
>>   }
> Shouldn't we replace kobject_put() with kobject_del() here in
> blk_unregister_queue()?

Looks like you missed that kobject_del() is called before the
kobject_put().

         /* Now that we've deleted all child objects, we can delete the 
queue. */
         kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
         kobject_del(&disk->queue_kobj);

         if (queue_is_mq(q))
                 elevator_set_none(q);

         blk_debugfs_remove(disk);
         kobject_put(&disk->queue_kobj);

> 
> Thanks,
> --Nilay
> .
> 

Re: [PATCH v2] block: fix kobject double initialization in add_disk
Posted by Nilay Shroff 1 month, 3 weeks ago

On 8/8/25 12:53 PM, Yu Kuai wrote:
> Hi,
> 
> 在 2025/08/08 15:15, Nilay Shroff 写道:
>>
>>
>> On 8/8/25 11:06 AM, Zheng Qixing wrote:
>>> From: Zheng Qixing <zhengqixing@huawei.com>
>>>
>>> Device-mapper can call add_disk() multiple times for the same gendisk
>>> due to its two-phase creation process (dm create + dm load). This leads
>>> to kobject double initialization errors when the underlying iSCSI devices
>>> become temporarily unavailable and then reappear.
>>>
>>> However, if the first add_disk() call fails and is retried, the queue_kobj
>>> gets initialized twice, causing:
>>>
>>> kobject: kobject (ffff88810c27bb90): tried to init an initialized object,
>>> something is seriously wrong.
>>>   Call Trace:
>>>    <TASK>
>>>    dump_stack_lvl+0x5b/0x80
>>>    kobject_init.cold+0x43/0x51
>>>    blk_register_queue+0x46/0x280
>>>    add_disk_fwnode+0xb5/0x280
>>>    dm_setup_md_queue+0x194/0x1c0
>>>    table_load+0x297/0x2d0
>>>    ctl_ioctl+0x2a2/0x480
>>>    dm_ctl_ioctl+0xe/0x20
>>>    __x64_sys_ioctl+0xc7/0x110
>>>    do_syscall_64+0x72/0x390
>>>    entry_SYSCALL_64_after_hwframe+0x76/0x7e
>>>
>>> Fix this by separating kobject initialization from sysfs registration:
>>>   - Initialize queue_kobj early during gendisk allocation
>>>   - add_disk() only adds the already-initialized kobject to sysfs
>>>   - del_gendisk() removes from sysfs but doesn't destroy the kobject
>>>   - Final cleanup happens when the disk is released
>>>
>>> Fixes: 2bd85221a625 ("block: untangle request_queue refcounting from sysfs")
>>> Reported-by: Li Lingfeng <lilingfeng3@huawei.com>
>>> Closes: https://lore.kernel.org/all/83591d0b-2467-433c-bce0-5581298eb161@huawei.com/
>>> Signed-off-by: Zheng Qixing <zhengqixing@huawei.com>
>>> ---
>>>   block/blk-sysfs.c | 12 +++++-------
>>>   block/blk.h       |  1 +
>>>   block/genhd.c     |  2 ++
>>>   3 files changed, 8 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
>>> index 396cded255ea..c5cf79a20842 100644
>>> --- a/block/blk-sysfs.c
>>> +++ b/block/blk-sysfs.c
>>> @@ -847,7 +847,7 @@ static void blk_queue_release(struct kobject *kobj)
>>>       /* nothing to do here, all data is associated with the parent gendisk */
>>>   }
>>>   -static const struct kobj_type blk_queue_ktype = {
>>> +const struct kobj_type blk_queue_ktype = {
>>>       .default_groups = blk_queue_attr_groups,
>>>       .sysfs_ops    = &queue_sysfs_ops,
>>>       .release    = blk_queue_release,
>>> @@ -875,15 +875,14 @@ int blk_register_queue(struct gendisk *disk)
>>>       struct request_queue *q = disk->queue;
>>>       int ret;
>>>   -    kobject_init(&disk->queue_kobj, &blk_queue_ktype);
>>>       ret = kobject_add(&disk->queue_kobj, &disk_to_dev(disk)->kobj, "queue");
>>>       if (ret < 0)
>>> -        goto out_put_queue_kobj;
>>> +        return ret;
>>>         if (queue_is_mq(q)) {
>>>           ret = blk_mq_sysfs_register(disk);
>>>           if (ret)
>>> -            goto out_put_queue_kobj;
>>> +            goto out_del_queue_kobj;
>>>       }
>>>       mutex_lock(&q->sysfs_lock);
>>>   @@ -934,8 +933,8 @@ int blk_register_queue(struct gendisk *disk)
>>>       mutex_unlock(&q->sysfs_lock);
>>>       if (queue_is_mq(q))
>>>           blk_mq_sysfs_unregister(disk);
>>> -out_put_queue_kobj:
>>> -    kobject_put(&disk->queue_kobj);
>>> +out_del_queue_kobj:
>>> +    kobject_del(&disk->queue_kobj);
>>>       return ret;
>>>   }
>>>   @@ -986,5 +985,4 @@ void blk_unregister_queue(struct gendisk *disk)
>>>           elevator_set_none(q);
>>>         blk_debugfs_remove(disk);
>>> -    kobject_put(&disk->queue_kobj);
>>>   }
>> Shouldn't we replace kobject_put() with kobject_del() here in
>> blk_unregister_queue()?
> 
> Looks like you missed that kobject_del() is called before the
> kobject_put().
> 
>         /* Now that we've deleted all child objects, we can delete the queue. */
>         kobject_uevent(&disk->queue_kobj, KOBJ_REMOVE);
>         kobject_del(&disk->queue_kobj);
> 
>         if (queue_is_mq(q))
>                 elevator_set_none(q);
> 
>         blk_debugfs_remove(disk);
>         kobject_put(&disk->queue_kobj);
> 
>>
Oh yes I missed to notice it since that was not part of the
patch. Thanks! 

This patch now looks good to me:
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>