[PATCH v7 2/4] md: init bioset in mddev_init

linan122@huawei.com posted 4 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v7 2/4] md: init bioset in mddev_init
Posted by linan122@huawei.com 3 months, 2 weeks ago
From: Li Nan <linan122@huawei.com>

IO operations may be needed before md_run(), such as updating metadata
after writing sysfs. Without bioset, this triggers a NULL pointer
dereference as below:

 BUG: kernel NULL pointer dereference, address: 0000000000000020
 Call Trace:
  md_update_sb+0x658/0xe00
  new_level_store+0xc5/0x120
  md_attr_store+0xc9/0x1e0
  sysfs_kf_write+0x6f/0xa0
  kernfs_fop_write_iter+0x141/0x2a0
  vfs_write+0x1fc/0x5a0
  ksys_write+0x79/0x180
  __x64_sys_write+0x1d/0x30
  x64_sys_call+0x2818/0x2880
  do_syscall_64+0xa9/0x580
  entry_SYSCALL_64_after_hwframe+0x4b/0x53

Reproducer
```
  mdadm -CR /dev/md0 -l1 -n2 /dev/sd[cd]
  echo inactive > /sys/block/md0/md/array_state
  echo 10 > /sys/block/md0/md/new_level
```

Fixes: d981ed841930 ("md: Add new_level sysfs interface")
Signed-off-by: Li Nan <linan122@huawei.com>
---
 drivers/md/md.c | 74 +++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 36 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f6fd55a1637b..51f0201e4906 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -730,6 +730,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
 
 int mddev_init(struct mddev *mddev)
 {
+	int err = 0;
+
 	if (!IS_ENABLED(CONFIG_MD_BITMAP))
 		mddev->bitmap_id = ID_BITMAP_NONE;
 	else
@@ -741,8 +743,26 @@ int mddev_init(struct mddev *mddev)
 
 	if (percpu_ref_init(&mddev->writes_pending, no_op,
 			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
-		percpu_ref_exit(&mddev->active_io);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto exit_acitve_io;
+	}
+
+	if (!bioset_initialized(&mddev->bio_set)) {
+		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+		if (err)
+			goto exit_writes_pending;
+	}
+	if (!bioset_initialized(&mddev->sync_set)) {
+		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+		if (err)
+			goto exit_bio_set;
+	}
+
+	if (!bioset_initialized(&mddev->io_clone_set)) {
+		err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
+				  offsetof(struct md_io_clone, bio_clone), 0);
+		if (err)
+			goto exit_sync_set;
 	}
 
 	/* We want to start with the refcount at zero */
@@ -773,11 +793,24 @@ int mddev_init(struct mddev *mddev)
 	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
 
 	return 0;
+
+exit_sync_set:
+	bioset_exit(&mddev->sync_set);
+exit_bio_set:
+	bioset_exit(&mddev->bio_set);
+exit_writes_pending:
+	percpu_ref_exit(&mddev->writes_pending);
+exit_acitve_io:
+	percpu_ref_exit(&mddev->active_io);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mddev_init);
 
 void mddev_destroy(struct mddev *mddev)
 {
+	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->sync_set);
+	bioset_exit(&mddev->io_clone_set);
 	percpu_ref_exit(&mddev->active_io);
 	percpu_ref_exit(&mddev->writes_pending);
 }
@@ -6393,29 +6426,9 @@ int md_run(struct mddev *mddev)
 		nowait = nowait && bdev_nowait(rdev->bdev);
 	}
 
-	if (!bioset_initialized(&mddev->bio_set)) {
-		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (err)
-			return err;
-	}
-	if (!bioset_initialized(&mddev->sync_set)) {
-		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
-		if (err)
-			goto exit_bio_set;
-	}
-
-	if (!bioset_initialized(&mddev->io_clone_set)) {
-		err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
-				  offsetof(struct md_io_clone, bio_clone), 0);
-		if (err)
-			goto exit_sync_set;
-	}
-
 	pers = get_pers(mddev->level, mddev->clevel);
-	if (!pers) {
-		err = -EINVAL;
-		goto abort;
-	}
+	if (!pers)
+		return -EINVAL;
 	if (mddev->level != pers->head.id) {
 		mddev->level = pers->head.id;
 		mddev->new_level = pers->head.id;
@@ -6426,8 +6439,7 @@ int md_run(struct mddev *mddev)
 	    pers->start_reshape == NULL) {
 		/* This personality cannot handle reshaping... */
 		put_pers(pers);
-		err = -EINVAL;
-		goto abort;
+		return -EINVAL;
 	}
 
 	if (pers->sync_request) {
@@ -6554,12 +6566,6 @@ int md_run(struct mddev *mddev)
 	mddev->private = NULL;
 	put_pers(pers);
 	md_bitmap_destroy(mddev);
-abort:
-	bioset_exit(&mddev->io_clone_set);
-exit_sync_set:
-	bioset_exit(&mddev->sync_set);
-exit_bio_set:
-	bioset_exit(&mddev->bio_set);
 	return err;
 }
 EXPORT_SYMBOL_GPL(md_run);
@@ -6784,10 +6790,6 @@ static void __md_stop(struct mddev *mddev)
 	mddev->private = NULL;
 	put_pers(pers);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
-	bioset_exit(&mddev->bio_set);
-	bioset_exit(&mddev->sync_set);
-	bioset_exit(&mddev->io_clone_set);
 }
 
 void md_stop(struct mddev *mddev)
-- 
2.39.2
Re: [PATCH v7 2/4] md: init bioset in mddev_init
Posted by Yu Kuai 3 months, 1 week ago
Hi,

在 2025/10/27 15:29, linan122@huawei.com 写道:
> From: Li Nan <linan122@huawei.com>
>
> IO operations may be needed before md_run(), such as updating metadata
> after writing sysfs. Without bioset, this triggers a NULL pointer
> dereference as below:
>
>   BUG: kernel NULL pointer dereference, address: 0000000000000020
>   Call Trace:
>    md_update_sb+0x658/0xe00
>    new_level_store+0xc5/0x120
>    md_attr_store+0xc9/0x1e0
>    sysfs_kf_write+0x6f/0xa0
>    kernfs_fop_write_iter+0x141/0x2a0
>    vfs_write+0x1fc/0x5a0
>    ksys_write+0x79/0x180
>    __x64_sys_write+0x1d/0x30
>    x64_sys_call+0x2818/0x2880
>    do_syscall_64+0xa9/0x580
>    entry_SYSCALL_64_after_hwframe+0x4b/0x53
>
> Reproducer
> ```
>    mdadm -CR /dev/md0 -l1 -n2 /dev/sd[cd]
>    echo inactive > /sys/block/md0/md/array_state
>    echo 10 > /sys/block/md0/md/new_level
> ```
>
> Fixes: d981ed841930 ("md: Add new_level sysfs interface")
> Signed-off-by: Li Nan <linan122@huawei.com>
> ---
>   drivers/md/md.c | 74 +++++++++++++++++++++++++------------------------
>   1 file changed, 38 insertions(+), 36 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index f6fd55a1637b..51f0201e4906 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -730,6 +730,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
>   
>   int mddev_init(struct mddev *mddev)
>   {
> +	int err = 0;
> +
>   	if (!IS_ENABLED(CONFIG_MD_BITMAP))
>   		mddev->bitmap_id = ID_BITMAP_NONE;
>   	else
> @@ -741,8 +743,26 @@ int mddev_init(struct mddev *mddev)
>   
>   	if (percpu_ref_init(&mddev->writes_pending, no_op,
>   			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
> -		percpu_ref_exit(&mddev->active_io);
> -		return -ENOMEM;
> +		err = -ENOMEM;
> +		goto exit_acitve_io;
> +	}
> +
> +	if (!bioset_initialized(&mddev->bio_set)) {
> +		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);

mddev_init() can only be called once for one mddev, no need to test if bioset
is initialized here.

> +		if (err)
> +			goto exit_writes_pending;
> +	}
> +	if (!bioset_initialized(&mddev->sync_set)) {

same here.

> +		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
> +		if (err)
> +			goto exit_bio_set;
> +	}
> +
> +	if (!bioset_initialized(&mddev->io_clone_set)) {

And here.

Thanks,
Kuai

> +		err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
> +				  offsetof(struct md_io_clone, bio_clone), 0);
> +		if (err)
> +			goto exit_sync_set;
>   	}
>   
>   	/* We want to start with the refcount at zero */
> @@ -773,11 +793,24 @@ int mddev_init(struct mddev *mddev)
>   	INIT_WORK(&mddev->del_work, mddev_delayed_delete);
>   
>   	return 0;
> +
> +exit_sync_set:
> +	bioset_exit(&mddev->sync_set);
> +exit_bio_set:
> +	bioset_exit(&mddev->bio_set);
> +exit_writes_pending:
> +	percpu_ref_exit(&mddev->writes_pending);
> +exit_acitve_io:
> +	percpu_ref_exit(&mddev->active_io);
> +	return err;
>   }
>   EXPORT_SYMBOL_GPL(mddev_init);
>   
>   void mddev_destroy(struct mddev *mddev)
>   {
> +	bioset_exit(&mddev->bio_set);
> +	bioset_exit(&mddev->sync_set);
> +	bioset_exit(&mddev->io_clone_set);
>   	percpu_ref_exit(&mddev->active_io);
>   	percpu_ref_exit(&mddev->writes_pending);
>   }
> @@ -6393,29 +6426,9 @@ int md_run(struct mddev *mddev)
>   		nowait = nowait && bdev_nowait(rdev->bdev);
>   	}
>   
> -	if (!bioset_initialized(&mddev->bio_set)) {
> -		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
> -		if (err)
> -			return err;
> -	}
> -	if (!bioset_initialized(&mddev->sync_set)) {
> -		err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
> -		if (err)
> -			goto exit_bio_set;
> -	}
> -
> -	if (!bioset_initialized(&mddev->io_clone_set)) {
> -		err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
> -				  offsetof(struct md_io_clone, bio_clone), 0);
> -		if (err)
> -			goto exit_sync_set;
> -	}
> -
>   	pers = get_pers(mddev->level, mddev->clevel);
> -	if (!pers) {
> -		err = -EINVAL;
> -		goto abort;
> -	}
> +	if (!pers)
> +		return -EINVAL;
>   	if (mddev->level != pers->head.id) {
>   		mddev->level = pers->head.id;
>   		mddev->new_level = pers->head.id;
> @@ -6426,8 +6439,7 @@ int md_run(struct mddev *mddev)
>   	    pers->start_reshape == NULL) {
>   		/* This personality cannot handle reshaping... */
>   		put_pers(pers);
> -		err = -EINVAL;
> -		goto abort;
> +		return -EINVAL;
>   	}
>   
>   	if (pers->sync_request) {
> @@ -6554,12 +6566,6 @@ int md_run(struct mddev *mddev)
>   	mddev->private = NULL;
>   	put_pers(pers);
>   	md_bitmap_destroy(mddev);
> -abort:
> -	bioset_exit(&mddev->io_clone_set);
> -exit_sync_set:
> -	bioset_exit(&mddev->sync_set);
> -exit_bio_set:
> -	bioset_exit(&mddev->bio_set);
>   	return err;
>   }
>   EXPORT_SYMBOL_GPL(md_run);
> @@ -6784,10 +6790,6 @@ static void __md_stop(struct mddev *mddev)
>   	mddev->private = NULL;
>   	put_pers(pers);
>   	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
> -
> -	bioset_exit(&mddev->bio_set);
> -	bioset_exit(&mddev->sync_set);
> -	bioset_exit(&mddev->io_clone_set);
>   }
>   
>   void md_stop(struct mddev *mddev)
Re: [PATCH v7 2/4] md: init bioset in mddev_init
Posted by Li Nan 3 months, 1 week ago

在 2025/10/28 20:01, Yu Kuai 写道:
> Hi,
> 
> 在 2025/10/27 15:29, linan122@huawei.com 写道:
>> From: Li Nan <linan122@huawei.com>
>>
>> IO operations may be needed before md_run(), such as updating metadata
>> after writing sysfs. Without bioset, this triggers a NULL pointer
>> dereference as below:
>>
>>    BUG: kernel NULL pointer dereference, address: 0000000000000020
>>    Call Trace:
>>     md_update_sb+0x658/0xe00
>>     new_level_store+0xc5/0x120
>>     md_attr_store+0xc9/0x1e0
>>     sysfs_kf_write+0x6f/0xa0
>>     kernfs_fop_write_iter+0x141/0x2a0
>>     vfs_write+0x1fc/0x5a0
>>     ksys_write+0x79/0x180
>>     __x64_sys_write+0x1d/0x30
>>     x64_sys_call+0x2818/0x2880
>>     do_syscall_64+0xa9/0x580
>>     entry_SYSCALL_64_after_hwframe+0x4b/0x53
>>
>> Reproducer
>> ```
>>     mdadm -CR /dev/md0 -l1 -n2 /dev/sd[cd]
>>     echo inactive > /sys/block/md0/md/array_state
>>     echo 10 > /sys/block/md0/md/new_level
>> ```
>>
>> Fixes: d981ed841930 ("md: Add new_level sysfs interface")
>> Signed-off-by: Li Nan <linan122@huawei.com>
>> ---
>>    drivers/md/md.c | 74 +++++++++++++++++++++++++------------------------
>>    1 file changed, 38 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index f6fd55a1637b..51f0201e4906 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -730,6 +730,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
>>    
>>    int mddev_init(struct mddev *mddev)
>>    {
>> +	int err = 0;
>> +
>>    	if (!IS_ENABLED(CONFIG_MD_BITMAP))
>>    		mddev->bitmap_id = ID_BITMAP_NONE;
>>    	else
>> @@ -741,8 +743,26 @@ int mddev_init(struct mddev *mddev)
>>    
>>    	if (percpu_ref_init(&mddev->writes_pending, no_op,
>>    			    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
>> -		percpu_ref_exit(&mddev->active_io);
>> -		return -ENOMEM;
>> +		err = -ENOMEM;
>> +		goto exit_acitve_io;
>> +	}
>> +
>> +	if (!bioset_initialized(&mddev->bio_set)) {
>> +		err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
> 
> mddev_init() can only be called once for one mddev, no need to test if bioset
> is initialized here.
> 

I will fix it in next version. Thanks for your review.

>> +		if (err)
>> +			goto exit_writes_pending;
>> +	}
>> +	if (!bioset_initialized(&mddev->sync_set)) {
> 
> same here.
> 

-- 
Thanks,
Nan