drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
From: Yu Kuai <yukuai3@huawei.com>
Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix
null-ptr-deference for 'active_io' by moving percpu_ref_exit() to
__md_stop(), however, the commit also moving 'writes_pending' to
__md_stop(), and this will cause mdadm tests broken:
BUG: kernel NULL pointer dereference, address: 0000000000000038
Oops: 0000 [#1] PREEMPT SMP
CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37
RIP: 0010:free_percpu+0x465/0x670
Call Trace:
<TASK>
__percpu_ref_exit+0x48/0x70
percpu_ref_exit+0x1a/0x90
__md_stop+0xe9/0x170
do_md_stop+0x1e1/0x7b0
md_ioctl+0x90c/0x1aa0
blkdev_ioctl+0x19b/0x400
vfs_ioctl+0x20/0x50
__x64_sys_ioctl+0xba/0xe0
do_syscall_64+0x6c/0xe0
entry_SYSCALL_64_after_hwframe+0x63/0xcd
And the problem can be reporduced 100% by following test:
mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force
echo inactive > /sys/block/md0/md/array_state
echo read-auto > /sys/block/md0/md/array_state
echo inactive > /sys/block/md0/md/array_state
Root cause:
// start raid
raid1_run
mddev_init_writes_pending
percpu_ref_init
// inactive raid
array_state_store
do_md_stop
__md_stop
percpu_ref_exit
// start raid again
array_state_store
do_md_run
raid1_run
mddev_init_writes_pending
if (mddev->writes_pending.percpu_count_ptr)
// won't reinit
// inactive raid again
...
percpu_ref_exit
-> null-ptr-deference
Before the commit, 'writes_pending' is exited when mddev is freed, and
it's safe to restart raid because mddev_init_writes_pending() already make
sure that 'writes_pending' will only be initialized once.
Fix the prblem by moving 'writes_pending' back, it's a litter hard to find
the relationship between alloc memory and free memory, however, code
changes is much less and we lived with this for a long time already.
Fixes: 3e453522593d ("md: Free resources in __md_stop")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
drivers/md/md.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 161231e01faa..06f262050400 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6265,7 +6265,6 @@ static void __md_stop(struct mddev *mddev)
module_put(pers->owner);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- percpu_ref_exit(&mddev->writes_pending);
percpu_ref_exit(&mddev->active_io);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
@@ -6278,6 +6277,7 @@ void md_stop(struct mddev *mddev)
*/
__md_stop_writes(mddev);
__md_stop(mddev);
+ percpu_ref_exit(&mddev->writes_pending);
}
EXPORT_SYMBOL_GPL(md_stop);
@@ -7848,6 +7848,7 @@ static void md_free_disk(struct gendisk *disk)
{
struct mddev *mddev = disk->private_data;
+ percpu_ref_exit(&mddev->writes_pending);
mddev_free(mddev);
}
--
2.39.2
On Tue, Mar 28, 2023 at 2:44 AM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > From: Yu Kuai <yukuai3@huawei.com> > > Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix > null-ptr-deference for 'active_io' by moving percpu_ref_exit() to > __md_stop(), however, the commit also moving 'writes_pending' to > __md_stop(), and this will cause mdadm tests broken: > > BUG: kernel NULL pointer dereference, address: 0000000000000038 > Oops: 0000 [#1] PREEMPT SMP > CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37 > RIP: 0010:free_percpu+0x465/0x670 > Call Trace: > <TASK> > __percpu_ref_exit+0x48/0x70 > percpu_ref_exit+0x1a/0x90 > __md_stop+0xe9/0x170 > do_md_stop+0x1e1/0x7b0 > md_ioctl+0x90c/0x1aa0 > blkdev_ioctl+0x19b/0x400 > vfs_ioctl+0x20/0x50 > __x64_sys_ioctl+0xba/0xe0 > do_syscall_64+0x6c/0xe0 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > > And the problem can be reporduced 100% by following test: > > mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force > echo inactive > /sys/block/md0/md/array_state > echo read-auto > /sys/block/md0/md/array_state > echo inactive > /sys/block/md0/md/array_state > > Root cause: > > // start raid > raid1_run > mddev_init_writes_pending > percpu_ref_init > > // inactive raid > array_state_store > do_md_stop > __md_stop > percpu_ref_exit > > // start raid again > array_state_store > do_md_run > raid1_run > mddev_init_writes_pending > if (mddev->writes_pending.percpu_count_ptr) > // won't reinit > > // inactive raid again > ... > percpu_ref_exit > -> null-ptr-deference > > Before the commit, 'writes_pending' is exited when mddev is freed, and > it's safe to restart raid because mddev_init_writes_pending() already make > sure that 'writes_pending' will only be initialized once. > > Fix the prblem by moving 'writes_pending' back, it's a litter hard to find > the relationship between alloc memory and free memory, however, code > changes is much less and we lived with this for a long time already. > > Fixes: 3e453522593d ("md: Free resources in __md_stop") > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> Applied to md-fixes. Thanks! Song
On Tue, Mar 28, 2023 at 5:44 PM Yu Kuai <yukuai1@huaweicloud.com> wrote: > > From: Yu Kuai <yukuai3@huawei.com> > > Commit 3e453522593d ("md: Free resources in __md_stop") tried to fix > null-ptr-deference for 'active_io' by moving percpu_ref_exit() to > __md_stop(), however, the commit also moving 'writes_pending' to > __md_stop(), and this will cause mdadm tests broken: > > BUG: kernel NULL pointer dereference, address: 0000000000000038 > Oops: 0000 [#1] PREEMPT SMP > CPU: 15 PID: 17830 Comm: mdadm Not tainted 6.3.0-rc3-next-20230324-00009-g520d37 > RIP: 0010:free_percpu+0x465/0x670 > Call Trace: > <TASK> > __percpu_ref_exit+0x48/0x70 > percpu_ref_exit+0x1a/0x90 > __md_stop+0xe9/0x170 > do_md_stop+0x1e1/0x7b0 > md_ioctl+0x90c/0x1aa0 > blkdev_ioctl+0x19b/0x400 > vfs_ioctl+0x20/0x50 > __x64_sys_ioctl+0xba/0xe0 > do_syscall_64+0x6c/0xe0 > entry_SYSCALL_64_after_hwframe+0x63/0xcd > > And the problem can be reporduced 100% by following test: > > mdadm -CR /dev/md0 -l1 -n1 /dev/sda --force > echo inactive > /sys/block/md0/md/array_state > echo read-auto > /sys/block/md0/md/array_state > echo inactive > /sys/block/md0/md/array_state > > Root cause: > > // start raid > raid1_run > mddev_init_writes_pending > percpu_ref_init > > // inactive raid > array_state_store > do_md_stop > __md_stop > percpu_ref_exit > > // start raid again > array_state_store > do_md_run > raid1_run > mddev_init_writes_pending > if (mddev->writes_pending.percpu_count_ptr) > // won't reinit > > // inactive raid again > ... > percpu_ref_exit > -> null-ptr-deference > > Before the commit, 'writes_pending' is exited when mddev is freed, and > it's safe to restart raid because mddev_init_writes_pending() already make > sure that 'writes_pending' will only be initialized once. > > Fix the prblem by moving 'writes_pending' back, it's a litter hard to find > the relationship between alloc memory and free memory, however, code > changes is much less and we lived with this for a long time already. > > Fixes: 3e453522593d ("md: Free resources in __md_stop") > > Signed-off-by: Yu Kuai <yukuai3@huawei.com> > --- > drivers/md/md.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/drivers/md/md.c b/drivers/md/md.c > index 161231e01faa..06f262050400 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -6265,7 +6265,6 @@ static void __md_stop(struct mddev *mddev) > module_put(pers->owner); > clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); > > - percpu_ref_exit(&mddev->writes_pending); > percpu_ref_exit(&mddev->active_io); > bioset_exit(&mddev->bio_set); > bioset_exit(&mddev->sync_set); > @@ -6278,6 +6277,7 @@ void md_stop(struct mddev *mddev) > */ > __md_stop_writes(mddev); > __md_stop(mddev); > + percpu_ref_exit(&mddev->writes_pending); > } > > EXPORT_SYMBOL_GPL(md_stop); > @@ -7848,6 +7848,7 @@ static void md_free_disk(struct gendisk *disk) > { > struct mddev *mddev = disk->private_data; > > + percpu_ref_exit(&mddev->writes_pending); > mddev_free(mddev); > } > > -- > 2.39.2 > Hi Yu Kuai Thanks for this. This patch is ok for me. But I have another one something like this: diff --git a/drivers/md/md.c b/drivers/md/md.c index 39e49e5d7182..be07b2b1b717 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5577,8 +5577,6 @@ static void no_op(struct percpu_ref *r) {} int mddev_init_writes_pending(struct mddev *mddev) { - if (mddev->writes_pending.percpu_count_ptr) - return 0; if (percpu_ref_init(&mddev->writes_pending, no_op, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0) return -ENOMEM; @@ -6260,7 +6258,6 @@ static void __md_stop(struct mddev *mddev) module_put(pers->owner); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - percpu_ref_exit(&mddev->writes_pending); percpu_ref_exit(&mddev->active_io); bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 68a9e2d9985b..6ba975ed4533 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3210,6 +3210,7 @@ static void raid1_free(struct mddev *mddev, void *priv) kfree(conf->barrier); bioset_exit(&conf->bio_split); kfree(conf); + percpu_ref_exit(&mddev->writes_pending); } static int raid1_resize(struct mddev *mddev, sector_t sectors) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6c66357f92f5..22f0ccb0823b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4303,6 +4303,7 @@ static void raid10_free(struct mddev *mddev, void *priv) kfree(conf->mirrors_new); bioset_exit(&conf->bio_split); kfree(conf); + percpu_ref_exit(&mddev->writes_pending); } static void raid10_quiesce(struct mddev *mddev, int quiesce) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 7b820b81d8c2..0df9908b3bde 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -8087,6 +8087,7 @@ static void raid5_free(struct mddev *mddev, void *priv) struct r5conf *conf = priv; free_conf(conf); + percpu_ref_exit(&mddev->writes_pending); acct_bioset_exit(mddev); mddev->to_remove = &raid5_attrs_group; } raid0 doesn't need writes_pending, so we alloc writes_pending in pers->run. In the function mddev_init_writes_pending, it checks if writes_pending is freed or not. I guess the reason is to avoid re-alloc memory during takeover(.e.g raid1->raid10). But it makes the alloc/free sequence a little mess. If we free writes_pending in pers->free, it doesn't need to check if writes_pending is valid in mddev_init_writes_pending again and it's easy to maintain in future. Anyway, the patch md: fix regression for null-ptr-deference in __md_stop() is good for me. Reviewed-by: Xiao Ni <xni@redhat.com> -- Best Regards Xiao Ni
© 2016 - 2024 Red Hat, Inc.