kernel/padata.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
A recent patch that addressed a UAF introduced a reference count leak:
the parallel_data refcount is incremented unconditionally, regardless
of the return value of queue_work(). If the work item is already queued,
the incremented refcount is never decremented.
Fix this by checking the return value of queue_work() and decrementing
the refcount when necessary.
Resolves:
Unreferenced object 0xffff9d9f421e3d80 (size 192):
comm "cryptomgr_probe", pid 157, jiffies 4294694003
hex dump (first 32 bytes):
80 8b cf 41 9f 9d ff ff b8 97 e0 89 ff ff ff ff ...A............
d0 97 e0 89 ff ff ff ff 19 00 00 00 1f 88 23 00 ..............#.
backtrace (crc 838fb36):
__kmalloc_cache_noprof+0x284/0x320
padata_alloc_pd+0x20/0x1e0
padata_alloc_shell+0x3b/0xa0
0xffffffffc040a54d
cryptomgr_probe+0x43/0xc0
kthread+0xf6/0x1f0
ret_from_fork+0x2f/0x50
ret_from_fork_asm+0x1a/0x30
Fixes: dd7d37ccf6b1 ("padata: avoid UAF for reorder_work")
Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek@oracle.com>
---
kernel/padata.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/kernel/padata.c b/kernel/padata.c
index b3d4eacc4f5d..7eee94166357 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -358,7 +358,8 @@ static void padata_reorder(struct parallel_data *pd)
* To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish.
*/
padata_get_pd(pd);
- queue_work(pinst->serial_wq, &pd->reorder_work);
+ if (!queue_work(pinst->serial_wq, &pd->reorder_work))
+ padata_put_pd(pd);
}
}
--
2.43.0
Hi Dominik, I wanted to report a regression observed while running the `kselftest-mm` suite, specifically the `mm_run_vmtests_sh_migration_migration_shared_anon` test, on an Arm64 Marvell Thunder X2 (TX2) system. The kernel was built using defconfig with the additional config fragment from: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/mm/config This works fine on v6.15-rc7. A bisect identified this patch as introducing the failure. Bisected it on the tag "v6.15-rc7-7-g4a95bc121ccd" at repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Failure log: 11193 03:29:14.806502 # # running ./migration 11194 03:29:14.806876 # # ------------------- 11195 03:29:14.820939 # # TAP version 13 11196 03:29:14.821236 # # 1..6 11197 03:29:14.821519 # # # Starting 6 tests from 1 test cases. 11198 03:29:14.821773 # # # RUN migration.private_anon ... 11199 03:29:34.602964 # # # OK migration.private_anon 11200 03:29:34.603418 # # ok 1 migration.private_anon 11201 03:29:34.603687 # # # RUN migration.shared_anon ... 11202 03:29:34.973479 # # Didn't migrate 1 pages 11203 03:29:34.973855 # # # migration.c:175:shared_anon:Expected migrate(ptr, self->n1, self->n2) (-2) == 0 (0) 11204 03:29:34.984787 # # # shared_anon: Test terminated by assertion 11205 03:29:34.985105 # # # FAIL migration.shared_anon 11206 03:29:34.985365 # # not ok 2 migration.shared_anon 11207 03:29:34.988568 # # # RUN migration.private_anon_thp ... 11208 03:29:54.597572 # # # OK migration.private_anon_thp 11209 03:29:54.597951 # # ok 3 migration.private_anon_thp 11210 03:29:54.598487 # # # RUN migration.shared_anon_thp ... 11211 03:29:55.011183 # # Didn't migrate 1 pages 11212 03:29:55.011524 # # # migration.c:241:shared_anon_thp:Expected migrate(ptr, self->n1, self->n2) (-2) == 0 (0) 11213 03:29:55.022519 # # # shared_anon_thp: Test terminated by assertion 11214 03:29:55.022834 # # # FAIL migration.shared_anon_thp 11215 03:29:55.027864 # # not ok 4 migration.shared_anon_thp 11216 03:29:55.028156 # # # RUN migration.private_anon_htlb ... 11217 03:30:14.595327 # # # OK migration.private_anon_htlb 11218 03:30:14.595777 # # ok 5 migration.private_anon_htlb 11219 03:30:14.596398 # # # RUN migration.shared_anon_htlb ... 11220 03:30:34.595239 # # # OK migration.shared_anon_htlb 11221 03:30:34.595623 # # ok 6 migration.shared_anon_htlb 11222 03:30:34.595859 # # # FAILED: 4 / 6 tests passed. 11223 03:30:34.603816 # # # Totals: pass:4 fail:2 xfail:0 xpass:0 skip:0 error:0 11224 03:30:34.604110 # # [FAIL] 11225 03:30:34.604342 # not ok 55 migration # exit=1 Thanks, Aishwarya
On Thu, May 22, 2025 at 02:10:41PM +0100, Aishwarya wrote:
>
> A bisect identified this patch as introducing the failure. Bisected
> it on the tag "v6.15-rc7-7-g4a95bc121ccd" at repo:
> git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
What if you revert the patch in question as well as the one it
was supposed to fix, i.e., commit dd7d37ccf6b1 ("padata: avoid
UAF for reorder_work")? I've attached both reverts together as
a patch.
I think the original fix was broken since the bug is actually
in the Crypto API.
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/kernel/padata.c b/kernel/padata.c
index 7eee94166357..e0af15779d80 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -352,15 +352,8 @@ static void padata_reorder(struct parallel_data *pd)
smp_mb();
reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
- if (!list_empty(&reorder->list) && padata_find_next(pd, false)) {
- /*
- * Other context(eg. the padata_serial_worker) can finish the request.
- * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish.
- */
- padata_get_pd(pd);
- if (!queue_work(pinst->serial_wq, &pd->reorder_work))
- padata_put_pd(pd);
- }
+ if (!list_empty(&reorder->list) && padata_find_next(pd, false))
+ queue_work(pinst->serial_wq, &pd->reorder_work);
}
static void invoke_padata_reorder(struct work_struct *work)
@@ -371,8 +364,6 @@ static void invoke_padata_reorder(struct work_struct *work)
pd = container_of(work, struct parallel_data, reorder_work);
padata_reorder(pd);
local_bh_enable();
- /* Pairs with putting the reorder_work in the serial_wq */
- padata_put_pd(pd);
}
static void padata_serial_worker(struct work_struct *serial_work)
On Thu, 2025-05-22 at 14:10 +0100, Aishwarya wrote: > Hi Dominik, > > I wanted to report a regression observed while running the > `kselftest-mm` suite, specifically the > `mm_run_vmtests_sh_migration_migration_shared_anon` test, on an > Arm64 Marvell Thunder X2 (TX2) system. > > The kernel was built using defconfig with the additional config > fragment from: > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/ > testing/selftests/mm/config > > This works fine on v6.15-rc7. > > A bisect identified this patch as introducing the failure. Bisected > it on the tag "v6.15-rc7-7-g4a95bc121ccd" at repo: > git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git > > Failure log: > 11193 03:29:14.806502 # # running ./migration > 11194 03:29:14.806876 # # ------------------- > 11195 03:29:14.820939 # # TAP version 13 > 11196 03:29:14.821236 # # 1..6 > 11197 03:29:14.821519 # # # Starting 6 tests from 1 test cases. > 11198 03:29:14.821773 # # # RUN migration.private_anon ... > 11199 03:29:34.602964 # # # OK migration.private_anon > 11200 03:29:34.603418 # # ok 1 migration.private_anon > 11201 03:29:34.603687 # # # RUN migration.shared_anon ... > 11202 03:29:34.973479 # # Didn't migrate 1 pages > 11203 03:29:34.973855 # # # migration.c:175:shared_anon:Expected migrate(ptr, > self->n1, self->n2) (-2) == 0 (0) > 11204 03:29:34.984787 # # # shared_anon: Test terminated by assertion > 11205 03:29:34.985105 # # # FAIL migration.shared_anon > 11206 03:29:34.985365 # # not ok 2 migration.shared_anon > 11207 03:29:34.988568 # # # RUN migration.private_anon_thp ... > 11208 03:29:54.597572 # # # OK migration.private_anon_thp > 11209 03:29:54.597951 # # ok 3 migration.private_anon_thp > 11210 03:29:54.598487 # # # RUN migration.shared_anon_thp ... > 11211 03:29:55.011183 # # Didn't migrate 1 pages > 11212 03:29:55.011524 # # # migration.c:241:shared_anon_thp:Expected > migrate(ptr, > self->n1, self->n2) (-2) == 0 (0) > 11213 03:29:55.022519 # # # shared_anon_thp: Test terminated by assertion > 11214 03:29:55.022834 # # # FAIL migration.shared_anon_thp > 11215 03:29:55.027864 # # not ok 4 migration.shared_anon_thp > 11216 03:29:55.028156 # # # RUN migration.private_anon_htlb ... > 11217 03:30:14.595327 # # # OK migration.private_anon_htlb > 11218 03:30:14.595777 # # ok 5 migration.private_anon_htlb > 11219 03:30:14.596398 # # # RUN migration.shared_anon_htlb ... > 11220 03:30:34.595239 # # # OK migration.shared_anon_htlb > 11221 03:30:34.595623 # # ok 6 migration.shared_anon_htlb > 11222 03:30:34.595859 # # # FAILED: 4 / 6 tests passed. > 11223 03:30:34.603816 # # # Totals: pass:4 fail:2 xfail:0 xpass:0 skip:0 > error:0 > 11224 03:30:34.604110 # # [FAIL] > 11225 03:30:34.604342 # not ok 55 migration # exit=1 > > Thanks, > Aishwarya Hi, Looking at the test, I don't think this is related. The test allocates some pages and attempts to migrate them between two NUMA nodes. It seems to be a selftest for memory management code, and I don't see how this patch could affect its outcome. Do you see this failure consistently, or is it only happening occasionally? I'm wondering if it might have passed by chance when testing the real culprit during the bisect. Regards, Dominik
On Thu, May 22, 2025 at 01:44:29PM +0000, Dominik Grzegorzek wrote: > On Thu, 2025-05-22 at 14:10 +0100, Aishwarya wrote: > > self->n1, self->n2) (-2) == 0 (0) > > 11213 03:29:55.022519 # # # shared_anon_thp: Test terminated by assertion > > 11214 03:29:55.022834 # # # FAIL migration.shared_anon_thp > > 11215 03:29:55.027864 # # not ok 4 migration.shared_anon_thp > > 11216 03:29:55.028156 # # # RUN migration.private_anon_htlb ... > > 11217 03:30:14.595327 # # # OK migration.private_anon_htlb > > 11218 03:30:14.595777 # # ok 5 migration.private_anon_htlb > > 11219 03:30:14.596398 # # # RUN migration.shared_anon_htlb ... > > 11220 03:30:34.595239 # # # OK migration.shared_anon_htlb > > 11221 03:30:34.595623 # # ok 6 migration.shared_anon_htlb > Looking at the test, I don't think this is related. The test allocates some > pages and attempts to migrate them between two NUMA nodes. It seems to be a > selftest for memory management code, and I don't see how this patch could affect > its outcome. The hugepages code does make use of padata and it's a hugepages test so there's some potential connection there, definitely not an obvious one though.
On Thu, May 22, 2025 at 03:02:14PM +0100, Mark Brown wrote: > > The hugepages code does make use of padata and it's a hugepages test so > there's some potential connection there, definitely not an obvious one > though. I just had a look at this and the hugepage use of padata has nothing to do with the patch in question at all. In fact, the two users of padata pretty much live in separate universes. The only connection between them is the shared set of work structs. So I think is a false-positive bisection result. Thanks, -- Email: Herbert Xu <herbert@gondor.apana.org.au> Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On Sun, May 18, 2025 at 07:45:31PM +0200, Dominik Grzegorzek wrote:
> A recent patch that addressed a UAF introduced a reference count leak:
> the parallel_data refcount is incremented unconditionally, regardless
> of the return value of queue_work(). If the work item is already queued,
> the incremented refcount is never decremented.
>
> Fix this by checking the return value of queue_work() and decrementing
> the refcount when necessary.
>
> Resolves:
>
> Unreferenced object 0xffff9d9f421e3d80 (size 192):
> comm "cryptomgr_probe", pid 157, jiffies 4294694003
> hex dump (first 32 bytes):
> 80 8b cf 41 9f 9d ff ff b8 97 e0 89 ff ff ff ff ...A............
> d0 97 e0 89 ff ff ff ff 19 00 00 00 1f 88 23 00 ..............#.
> backtrace (crc 838fb36):
> __kmalloc_cache_noprof+0x284/0x320
> padata_alloc_pd+0x20/0x1e0
> padata_alloc_shell+0x3b/0xa0
> 0xffffffffc040a54d
> cryptomgr_probe+0x43/0xc0
> kthread+0xf6/0x1f0
> ret_from_fork+0x2f/0x50
> ret_from_fork_asm+0x1a/0x30
>
> Fixes: dd7d37ccf6b1 ("padata: avoid UAF for reorder_work")
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek@oracle.com>
I missed it too. Thanks!
Acked-by: Daniel Jordan <daniel.m.jordan@oracle.com>
On Sun, May 18, 2025 at 07:45:31PM +0200, Dominik Grzegorzek wrote:
> A recent patch that addressed a UAF introduced a reference count leak:
> the parallel_data refcount is incremented unconditionally, regardless
> of the return value of queue_work(). If the work item is already queued,
> the incremented refcount is never decremented.
>
> Fix this by checking the return value of queue_work() and decrementing
> the refcount when necessary.
>
> Resolves:
>
> Unreferenced object 0xffff9d9f421e3d80 (size 192):
> comm "cryptomgr_probe", pid 157, jiffies 4294694003
> hex dump (first 32 bytes):
> 80 8b cf 41 9f 9d ff ff b8 97 e0 89 ff ff ff ff ...A............
> d0 97 e0 89 ff ff ff ff 19 00 00 00 1f 88 23 00 ..............#.
> backtrace (crc 838fb36):
> __kmalloc_cache_noprof+0x284/0x320
> padata_alloc_pd+0x20/0x1e0
> padata_alloc_shell+0x3b/0xa0
> 0xffffffffc040a54d
> cryptomgr_probe+0x43/0xc0
> kthread+0xf6/0x1f0
> ret_from_fork+0x2f/0x50
> ret_from_fork_asm+0x1a/0x30
>
> Fixes: dd7d37ccf6b1 ("padata: avoid UAF for reorder_work")
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek@oracle.com>
> ---
> kernel/padata.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
Patch applied. Thanks.
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
On 2025/5/19 1:45, Dominik Grzegorzek wrote:
> A recent patch that addressed a UAF introduced a reference count leak:
> the parallel_data refcount is incremented unconditionally, regardless
> of the return value of queue_work(). If the work item is already queued,
> the incremented refcount is never decremented.
>
> Fix this by checking the return value of queue_work() and decrementing
> the refcount when necessary.
>
> Resolves:
>
> Unreferenced object 0xffff9d9f421e3d80 (size 192):
> comm "cryptomgr_probe", pid 157, jiffies 4294694003
> hex dump (first 32 bytes):
> 80 8b cf 41 9f 9d ff ff b8 97 e0 89 ff ff ff ff ...A............
> d0 97 e0 89 ff ff ff ff 19 00 00 00 1f 88 23 00 ..............#.
> backtrace (crc 838fb36):
> __kmalloc_cache_noprof+0x284/0x320
> padata_alloc_pd+0x20/0x1e0
> padata_alloc_shell+0x3b/0xa0
> 0xffffffffc040a54d
> cryptomgr_probe+0x43/0xc0
> kthread+0xf6/0x1f0
> ret_from_fork+0x2f/0x50
> ret_from_fork_asm+0x1a/0x30
>
> Fixes: dd7d37ccf6b1 ("padata: avoid UAF for reorder_work")
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek@oracle.com>
> ---
> kernel/padata.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/padata.c b/kernel/padata.c
> index b3d4eacc4f5d..7eee94166357 100644
> --- a/kernel/padata.c
> +++ b/kernel/padata.c
> @@ -358,7 +358,8 @@ static void padata_reorder(struct parallel_data *pd)
> * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish.
> */
> padata_get_pd(pd);
> - queue_work(pinst->serial_wq, &pd->reorder_work);
> + if (!queue_work(pinst->serial_wq, &pd->reorder_work))
> + padata_put_pd(pd);
> }
> }
>
Thank you. I missed this case.
LGTM
Thanks,
Ridong
© 2016 - 2025 Red Hat, Inc.