mm/slab_common.c | 102 ++++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 40 deletions(-)
Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment
if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64.
However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16.
This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16],
resulting in kmem_buckets_create() attempting to create a kmem_cache for
size 16 twice. This duplication triggers warnings on boot:
[ 2.325108] ------------[ cut here ]------------
[ 2.325135] kmem_cache of name 'memdup_user-16' already exists
[ 2.325783] WARNING: CPU: 0 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0
[ 2.327957] Modules linked in:
[ 2.328550] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5mm-unstable-arm64+ #12
[ 2.328683] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024
[ 2.328790] pstate: 61000009 (nZCv daif -PAN -UAO -TCO +DIT -SSBS BTYPE=--)
[ 2.328911] pc : __kmem_cache_create_args+0xb8/0x3b0
[ 2.328930] lr : __kmem_cache_create_args+0xb8/0x3b0
[ 2.328942] sp : ffff800083d6fc50
[ 2.328961] x29: ffff800083d6fc50 x28: f2ff0000c1674410 x27: ffff8000820b0598
[ 2.329061] x26: 000000007fffffff x25: 0000000000000010 x24: 0000000000002000
[ 2.329101] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388
[ 2.329118] x20: f2ff0000c1674410 x19: f5ff0000c16364c0 x18: ffff800083d80030
[ 2.329135] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[ 2.329152] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120
[ 2.329169] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000
[ 2.329194] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
[ 2.329210] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[ 2.329226] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
[ 2.329291] Call trace:
[ 2.329407] __kmem_cache_create_args+0xb8/0x3b0
[ 2.329499] kmem_buckets_create+0xfc/0x320
[ 2.329526] init_user_buckets+0x34/0x78
[ 2.329540] do_one_initcall+0x64/0x3c8
[ 2.329550] kernel_init_freeable+0x26c/0x578
[ 2.329562] kernel_init+0x3c/0x258
[ 2.329574] ret_from_fork+0x10/0x20
[ 2.329698] ---[ end trace 0000000000000000 ]---
[ 2.403704] ------------[ cut here ]------------
[ 2.404716] kmem_cache of name 'msg_msg-16' already exists
[ 2.404801] WARNING: CPU: 2 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0
[ 2.404842] Modules linked in:
[ 2.404971] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.0-rc5mm-unstable-arm64+ #12
[ 2.405026] Tainted: [W]=WARN
[ 2.405043] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024
[ 2.405057] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 2.405079] pc : __kmem_cache_create_args+0xb8/0x3b0
[ 2.405100] lr : __kmem_cache_create_args+0xb8/0x3b0
[ 2.405111] sp : ffff800083d6fc50
[ 2.405115] x29: ffff800083d6fc50 x28: fbff0000c1674410 x27: ffff8000820b0598
[ 2.405135] x26: 000000000000ffd0 x25: 0000000000000010 x24: 0000000000006000
[ 2.405153] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388
[ 2.405169] x20: fbff0000c1674410 x19: fdff0000c163d6c0 x18: ffff800083d80030
[ 2.405185] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[ 2.405201] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120
[ 2.405217] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000
[ 2.405233] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000
[ 2.405248] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000
[ 2.405271] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000
[ 2.405287] Call trace:
[ 2.405293] __kmem_cache_create_args+0xb8/0x3b0
[ 2.405305] kmem_buckets_create+0xfc/0x320
[ 2.405315] init_msg_buckets+0x34/0x78
[ 2.405326] do_one_initcall+0x64/0x3c8
[ 2.405337] kernel_init_freeable+0x26c/0x578
[ 2.405348] kernel_init+0x3c/0x258
[ 2.405360] ret_from_fork+0x10/0x20
[ 2.405370] ---[ end trace 0000000000000000 ]---
To address this, alias kmem_cache for sizes smaller than min alignment
to the aligned sized kmem_cache, as done with the default system kmalloc
bucket.
Cc: <stable@vger.kernel.org> # 6.11.x
Fixes: b32801d1255b ("mm/slab: Introduce kmem_buckets_create() and family")
Signed-off-by: Koichiro Den <koichiro.den@gmail.com>
---
mm/slab_common.c | 102 ++++++++++++++++++++++++++++-------------------
1 file changed, 62 insertions(+), 40 deletions(-)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3d26c257ed8b..64140561dd0e 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -354,6 +354,38 @@ struct kmem_cache *__kmem_cache_create_args(const char *name,
}
EXPORT_SYMBOL(__kmem_cache_create_args);
+static unsigned int __kmalloc_minalign(void)
+{
+ unsigned int minalign = dma_get_cache_alignment();
+
+ if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
+ is_swiotlb_allocated())
+ minalign = ARCH_KMALLOC_MINALIGN;
+
+ return max(minalign, arch_slab_minalign());
+}
+
+static unsigned int __kmalloc_aligned_size(unsigned int idx)
+{
+ unsigned int aligned_size = kmalloc_info[idx].size;
+ unsigned int minalign = __kmalloc_minalign();
+
+ if (minalign > ARCH_KMALLOC_MINALIGN)
+ aligned_size = ALIGN(aligned_size, minalign);
+
+ return aligned_size;
+}
+
+static unsigned int __kmalloc_aligned_idx(unsigned int idx)
+{
+ unsigned int minalign = __kmalloc_minalign();
+
+ if (minalign > ARCH_KMALLOC_MINALIGN)
+ return __kmalloc_index(__kmalloc_aligned_size(idx), false);
+
+ return idx;
+}
+
static struct kmem_cache *kmem_buckets_cache __ro_after_init;
/**
@@ -381,7 +413,10 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
void (*ctor)(void *))
{
kmem_buckets *b;
- int idx;
+ unsigned int idx;
+ unsigned long mask = 0;
+
+ BUILD_BUG_ON(ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]) > BITS_PER_LONG);
/*
* When the separate buckets API is not built in, just return
@@ -402,43 +437,47 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags,
for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) {
char *short_size, *cache_name;
+ unsigned int aligned_size = __kmalloc_aligned_size(idx);
+ unsigned int aligned_idx = __kmalloc_aligned_idx(idx);
unsigned int cache_useroffset, cache_usersize;
- unsigned int size;
+ /* this might be an aliased kmem_cache */
if (!kmalloc_caches[KMALLOC_NORMAL][idx])
continue;
- size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size;
- if (!size)
- continue;
-
short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-');
if (WARN_ON(!short_size))
goto fail;
- cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
- if (WARN_ON(!cache_name))
- goto fail;
-
- if (useroffset >= size) {
+ if (useroffset >= aligned_size) {
cache_useroffset = 0;
cache_usersize = 0;
} else {
cache_useroffset = useroffset;
- cache_usersize = min(size - cache_useroffset, usersize);
+ cache_usersize = min(aligned_size - cache_useroffset, usersize);
}
- (*b)[idx] = kmem_cache_create_usercopy(cache_name, size,
- 0, flags, cache_useroffset,
- cache_usersize, ctor);
- kfree(cache_name);
- if (WARN_ON(!(*b)[idx]))
- goto fail;
+
+ if (!(*b)[aligned_idx]) {
+ cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1);
+ if (WARN_ON(!cache_name))
+ goto fail;
+ (*b)[aligned_idx] = kmem_cache_create_usercopy(cache_name, aligned_size,
+ 0, flags, cache_useroffset,
+ cache_usersize, ctor);
+ if (WARN_ON(!(*b)[aligned_idx])) {
+ kfree(cache_name);
+ goto fail;
+ }
+ set_bit(aligned_idx, &mask);
+ }
+ if (idx != aligned_idx)
+ (*b)[idx] = (*b)[aligned_idx];
}
return b;
fail:
- for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++)
+ for_each_set_bit(idx, &mask, ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]))
kmem_cache_destroy((*b)[idx]);
kmem_cache_free(kmem_buckets_cache, b);
@@ -871,24 +910,12 @@ void __init setup_kmalloc_cache_index_table(void)
}
}
-static unsigned int __kmalloc_minalign(void)
-{
- unsigned int minalign = dma_get_cache_alignment();
-
- if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) &&
- is_swiotlb_allocated())
- minalign = ARCH_KMALLOC_MINALIGN;
-
- return max(minalign, arch_slab_minalign());
-}
-
static void __init
-new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
+new_kmalloc_cache(unsigned int idx, enum kmalloc_cache_type type)
{
slab_flags_t flags = 0;
- unsigned int minalign = __kmalloc_minalign();
- unsigned int aligned_size = kmalloc_info[idx].size;
- int aligned_idx = idx;
+ unsigned int aligned_size = __kmalloc_aligned_size(idx);
+ unsigned int aligned_idx = __kmalloc_aligned_idx(idx);
if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) {
flags |= SLAB_RECLAIM_ACCOUNT;
@@ -914,11 +941,6 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
if (IS_ENABLED(CONFIG_MEMCG) && (type == KMALLOC_NORMAL))
flags |= SLAB_NO_MERGE;
- if (minalign > ARCH_KMALLOC_MINALIGN) {
- aligned_size = ALIGN(aligned_size, minalign);
- aligned_idx = __kmalloc_index(aligned_size, false);
- }
-
if (!kmalloc_caches[type][aligned_idx])
kmalloc_caches[type][aligned_idx] = create_kmalloc_cache(
kmalloc_info[aligned_idx].name[type],
@@ -934,7 +956,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
*/
void __init create_kmalloc_caches(void)
{
- int i;
+ unsigned int i;
enum kmalloc_cache_type type;
/*
--
2.43.0
On Tue, Nov 05, 2024 at 12:08:37AM +0900, Koichiro Den wrote: > Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > resulting in kmem_buckets_create() attempting to create a kmem_cache for > size 16 twice. This duplication triggers warnings on boot: Wouldn't this be easier? +++ b/arch/arm64/include/asm/cache.h @@ -33,7 +33,11 @@ * the CPU. */ #define ARCH_DMA_MINALIGN (128) +#ifdef CONFIG_KASAN_HW_TAGS +#define ARCH_KMALLOC_MINALIGN (16) +#else #define ARCH_KMALLOC_MINALIGN (8) +#endif #ifndef __ASSEMBLY__
On 11/4/24 19:00, Matthew Wilcox wrote: > On Tue, Nov 05, 2024 at 12:08:37AM +0900, Koichiro Den wrote: >> Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment >> if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. >> However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. >> This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], >> resulting in kmem_buckets_create() attempting to create a kmem_cache for >> size 16 twice. This duplication triggers warnings on boot: > > Wouldn't this be easier? They wanted it to depend on actual HW capability / kernel parameter, see d949a8155d13 ("mm: make minimum slab alignment a runtime property") Also Catalin's commit referenced above was part of the series that made the alignment more dynamic for other cases IIRC. So I doubt we can simply reduce it back to a build-time constant. > +++ b/arch/arm64/include/asm/cache.h > @@ -33,7 +33,11 @@ > * the CPU. > */ > #define ARCH_DMA_MINALIGN (128) > +#ifdef CONFIG_KASAN_HW_TAGS > +#define ARCH_KMALLOC_MINALIGN (16) > +#else > #define ARCH_KMALLOC_MINALIGN (8) > +#endif > > #ifndef __ASSEMBLY__ > >
On Mon, Nov 04, 2024 at 07:16:20PM +0100, Vlastimil Babka wrote: > On 11/4/24 19:00, Matthew Wilcox wrote: > > On Tue, Nov 05, 2024 at 12:08:37AM +0900, Koichiro Den wrote: > >> Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > >> if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > >> However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > >> This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > >> resulting in kmem_buckets_create() attempting to create a kmem_cache for > >> size 16 twice. This duplication triggers warnings on boot: > > > > Wouldn't this be easier? > > They wanted it to depend on actual HW capability / kernel parameter, see > d949a8155d13 ("mm: make minimum slab alignment a runtime property") > > Also Catalin's commit referenced above was part of the series that made the > alignment more dynamic for other cases IIRC. So I doubt we can simply reduce > it back to a build-time constant. I principle, I wouldn't reduce it back to constant though the 8 vs 16 difference is not significant. It matter if one enables KASAN_HW_TAGS and wants to run it on hardware without MTE, getting the *-8 caches back. That said, I haven't managed to trigger this warning yet. Do I need other config options than KASAN_HW_TAGS and DEBUG_VM? -- Catalin
On Mon, Nov 04, 2024 at 10:22:37PM +0000, Catalin Marinas wrote: > On Mon, Nov 04, 2024 at 07:16:20PM +0100, Vlastimil Babka wrote: > > On 11/4/24 19:00, Matthew Wilcox wrote: > > > On Tue, Nov 05, 2024 at 12:08:37AM +0900, Koichiro Den wrote: > > >> Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > > >> if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > > >> However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > > >> This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > > >> resulting in kmem_buckets_create() attempting to create a kmem_cache for > > >> size 16 twice. This duplication triggers warnings on boot: > > > > > > Wouldn't this be easier? > > > > They wanted it to depend on actual HW capability / kernel parameter, see > > d949a8155d13 ("mm: make minimum slab alignment a runtime property") > > > > Also Catalin's commit referenced above was part of the series that made the > > alignment more dynamic for other cases IIRC. So I doubt we can simply reduce > > it back to a build-time constant. > > I principle, I wouldn't reduce it back to constant though the 8 vs 16 > difference is not significant. It matter if one enables KASAN_HW_TAGS > and wants to run it on hardware without MTE, getting the *-8 caches > back. > > That said, I haven't managed to trigger this warning yet. Do I need > other config options than KASAN_HW_TAGS and DEBUG_VM? Ah, I was missing SLAB_BUCKETS. I'll have a look tomorrow. -- Catalin
On Mon, Nov 04, 2024 at 10:28:37PM +0000, Catalin Marinas wrote: > On Mon, Nov 04, 2024 at 10:22:37PM +0000, Catalin Marinas wrote: > > On Mon, Nov 04, 2024 at 07:16:20PM +0100, Vlastimil Babka wrote: > > > On 11/4/24 19:00, Matthew Wilcox wrote: > > > > On Tue, Nov 05, 2024 at 12:08:37AM +0900, Koichiro Den wrote: > > > >> Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > > > >> if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > > > >> However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > > > >> This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > > > >> resulting in kmem_buckets_create() attempting to create a kmem_cache for > > > >> size 16 twice. This duplication triggers warnings on boot: > > > > > > > > Wouldn't this be easier? > > > > > > They wanted it to depend on actual HW capability / kernel parameter, see > > > d949a8155d13 ("mm: make minimum slab alignment a runtime property") > > > > > > Also Catalin's commit referenced above was part of the series that made the > > > alignment more dynamic for other cases IIRC. So I doubt we can simply reduce > > > it back to a build-time constant. > > > > I principle, I wouldn't reduce it back to constant though the 8 vs 16 > > difference is not significant. It matter if one enables KASAN_HW_TAGS > > and wants to run it on hardware without MTE, getting the *-8 caches > > back. > > > > That said, I haven't managed to trigger this warning yet. Do I need > > other config options than KASAN_HW_TAGS and DEBUG_VM? > > Ah, I was missing SLAB_BUCKETS. I'll have a look tomorrow. > > -- > Catalin Thank you for reviewing. I just sent v2 so please take a look at that instead. v2: https://lore.kernel.org/linux-mm/20241105022747.2819151-1-koichiro.den@gmail.com/
On 11/4/24 16:08, Koichiro Den wrote: > Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > resulting in kmem_buckets_create() attempting to create a kmem_cache for > size 16 twice. This duplication triggers warnings on boot: > > [ 2.325108] ------------[ cut here ]------------ > [ 2.325135] kmem_cache of name 'memdup_user-16' already exists > [ 2.325783] WARNING: CPU: 0 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > [ 2.327957] Modules linked in: > [ 2.328550] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5mm-unstable-arm64+ #12 > [ 2.328683] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > [ 2.328790] pstate: 61000009 (nZCv daif -PAN -UAO -TCO +DIT -SSBS BTYPE=--) > [ 2.328911] pc : __kmem_cache_create_args+0xb8/0x3b0 > [ 2.328930] lr : __kmem_cache_create_args+0xb8/0x3b0 > [ 2.328942] sp : ffff800083d6fc50 > [ 2.328961] x29: ffff800083d6fc50 x28: f2ff0000c1674410 x27: ffff8000820b0598 > [ 2.329061] x26: 000000007fffffff x25: 0000000000000010 x24: 0000000000002000 > [ 2.329101] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > [ 2.329118] x20: f2ff0000c1674410 x19: f5ff0000c16364c0 x18: ffff800083d80030 > [ 2.329135] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > [ 2.329152] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > [ 2.329169] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > [ 2.329194] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > [ 2.329210] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > [ 2.329226] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > [ 2.329291] Call trace: > [ 2.329407] __kmem_cache_create_args+0xb8/0x3b0 > [ 2.329499] kmem_buckets_create+0xfc/0x320 > [ 2.329526] init_user_buckets+0x34/0x78 > [ 2.329540] do_one_initcall+0x64/0x3c8 > [ 2.329550] kernel_init_freeable+0x26c/0x578 > [ 2.329562] kernel_init+0x3c/0x258 > [ 2.329574] ret_from_fork+0x10/0x20 > [ 2.329698] ---[ end trace 0000000000000000 ]--- > > [ 2.403704] ------------[ cut here ]------------ > [ 2.404716] kmem_cache of name 'msg_msg-16' already exists > [ 2.404801] WARNING: CPU: 2 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > [ 2.404842] Modules linked in: > [ 2.404971] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.0-rc5mm-unstable-arm64+ #12 > [ 2.405026] Tainted: [W]=WARN > [ 2.405043] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > [ 2.405057] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > [ 2.405079] pc : __kmem_cache_create_args+0xb8/0x3b0 > [ 2.405100] lr : __kmem_cache_create_args+0xb8/0x3b0 > [ 2.405111] sp : ffff800083d6fc50 > [ 2.405115] x29: ffff800083d6fc50 x28: fbff0000c1674410 x27: ffff8000820b0598 > [ 2.405135] x26: 000000000000ffd0 x25: 0000000000000010 x24: 0000000000006000 > [ 2.405153] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > [ 2.405169] x20: fbff0000c1674410 x19: fdff0000c163d6c0 x18: ffff800083d80030 > [ 2.405185] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > [ 2.405201] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > [ 2.405217] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > [ 2.405233] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > [ 2.405248] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > [ 2.405271] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > [ 2.405287] Call trace: > [ 2.405293] __kmem_cache_create_args+0xb8/0x3b0 > [ 2.405305] kmem_buckets_create+0xfc/0x320 > [ 2.405315] init_msg_buckets+0x34/0x78 > [ 2.405326] do_one_initcall+0x64/0x3c8 > [ 2.405337] kernel_init_freeable+0x26c/0x578 > [ 2.405348] kernel_init+0x3c/0x258 > [ 2.405360] ret_from_fork+0x10/0x20 > [ 2.405370] ---[ end trace 0000000000000000 ]--- > > To address this, alias kmem_cache for sizes smaller than min alignment > to the aligned sized kmem_cache, as done with the default system kmalloc > bucket. > > Cc: <stable@vger.kernel.org> # 6.11.x > Fixes: b32801d1255b ("mm/slab: Introduce kmem_buckets_create() and family") > Signed-off-by: Koichiro Den <koichiro.den@gmail.com> > --- Thanks for catching this. Wonder if we could make this a lot simpler in kmem_buckets_create() by starting with the current: size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; and adding: aligned_idx = __kmalloc_index(size, false); now the rest of the loop iteration would work with aligned_idx and if it differs from idx, we assign the cache pointer also to idx, etc. This should avoid duplicating the alignment calculation as we just extract from kmalloc_caches[] the result of what new_kmalloc_cache() already did? Vlastimil > mm/slab_common.c | 102 ++++++++++++++++++++++++++++------------------- > 1 file changed, 62 insertions(+), 40 deletions(-) > > diff --git a/mm/slab_common.c b/mm/slab_common.c > index 3d26c257ed8b..64140561dd0e 100644 > --- a/mm/slab_common.c > +++ b/mm/slab_common.c > @@ -354,6 +354,38 @@ struct kmem_cache *__kmem_cache_create_args(const char *name, > } > EXPORT_SYMBOL(__kmem_cache_create_args); > > +static unsigned int __kmalloc_minalign(void) > +{ > + unsigned int minalign = dma_get_cache_alignment(); > + > + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && > + is_swiotlb_allocated()) > + minalign = ARCH_KMALLOC_MINALIGN; > + > + return max(minalign, arch_slab_minalign()); > +} > + > +static unsigned int __kmalloc_aligned_size(unsigned int idx) > +{ > + unsigned int aligned_size = kmalloc_info[idx].size; > + unsigned int minalign = __kmalloc_minalign(); > + > + if (minalign > ARCH_KMALLOC_MINALIGN) > + aligned_size = ALIGN(aligned_size, minalign); > + > + return aligned_size; > +} > + > +static unsigned int __kmalloc_aligned_idx(unsigned int idx) > +{ > + unsigned int minalign = __kmalloc_minalign(); > + > + if (minalign > ARCH_KMALLOC_MINALIGN) > + return __kmalloc_index(__kmalloc_aligned_size(idx), false); > + > + return idx; > +} > + > static struct kmem_cache *kmem_buckets_cache __ro_after_init; > > /** > @@ -381,7 +413,10 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, > void (*ctor)(void *)) > { > kmem_buckets *b; > - int idx; > + unsigned int idx; > + unsigned long mask = 0; > + > + BUILD_BUG_ON(ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]) > BITS_PER_LONG); > > /* > * When the separate buckets API is not built in, just return > @@ -402,43 +437,47 @@ kmem_buckets *kmem_buckets_create(const char *name, slab_flags_t flags, > > for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) { > char *short_size, *cache_name; > + unsigned int aligned_size = __kmalloc_aligned_size(idx); > + unsigned int aligned_idx = __kmalloc_aligned_idx(idx); > unsigned int cache_useroffset, cache_usersize; > - unsigned int size; > > + /* this might be an aliased kmem_cache */ > if (!kmalloc_caches[KMALLOC_NORMAL][idx]) > continue; > > - size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; > - if (!size) > - continue; > - > short_size = strchr(kmalloc_caches[KMALLOC_NORMAL][idx]->name, '-'); > if (WARN_ON(!short_size)) > goto fail; > > - cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); > - if (WARN_ON(!cache_name)) > - goto fail; > - > - if (useroffset >= size) { > + if (useroffset >= aligned_size) { > cache_useroffset = 0; > cache_usersize = 0; > } else { > cache_useroffset = useroffset; > - cache_usersize = min(size - cache_useroffset, usersize); > + cache_usersize = min(aligned_size - cache_useroffset, usersize); > } > - (*b)[idx] = kmem_cache_create_usercopy(cache_name, size, > - 0, flags, cache_useroffset, > - cache_usersize, ctor); > - kfree(cache_name); > - if (WARN_ON(!(*b)[idx])) > - goto fail; > + > + if (!(*b)[aligned_idx]) { > + cache_name = kasprintf(GFP_KERNEL, "%s-%s", name, short_size + 1); > + if (WARN_ON(!cache_name)) > + goto fail; > + (*b)[aligned_idx] = kmem_cache_create_usercopy(cache_name, aligned_size, > + 0, flags, cache_useroffset, > + cache_usersize, ctor); > + if (WARN_ON(!(*b)[aligned_idx])) { > + kfree(cache_name); > + goto fail; > + } > + set_bit(aligned_idx, &mask); > + } > + if (idx != aligned_idx) > + (*b)[idx] = (*b)[aligned_idx]; > } > > return b; > > fail: > - for (idx = 0; idx < ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL]); idx++) > + for_each_set_bit(idx, &mask, ARRAY_SIZE(kmalloc_caches[KMALLOC_NORMAL])) > kmem_cache_destroy((*b)[idx]); > kmem_cache_free(kmem_buckets_cache, b); > > @@ -871,24 +910,12 @@ void __init setup_kmalloc_cache_index_table(void) > } > } > > -static unsigned int __kmalloc_minalign(void) > -{ > - unsigned int minalign = dma_get_cache_alignment(); > - > - if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && > - is_swiotlb_allocated()) > - minalign = ARCH_KMALLOC_MINALIGN; > - > - return max(minalign, arch_slab_minalign()); > -} > - > static void __init > -new_kmalloc_cache(int idx, enum kmalloc_cache_type type) > +new_kmalloc_cache(unsigned int idx, enum kmalloc_cache_type type) > { > slab_flags_t flags = 0; > - unsigned int minalign = __kmalloc_minalign(); > - unsigned int aligned_size = kmalloc_info[idx].size; > - int aligned_idx = idx; > + unsigned int aligned_size = __kmalloc_aligned_size(idx); > + unsigned int aligned_idx = __kmalloc_aligned_idx(idx); > > if ((KMALLOC_RECLAIM != KMALLOC_NORMAL) && (type == KMALLOC_RECLAIM)) { > flags |= SLAB_RECLAIM_ACCOUNT; > @@ -914,11 +941,6 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type) > if (IS_ENABLED(CONFIG_MEMCG) && (type == KMALLOC_NORMAL)) > flags |= SLAB_NO_MERGE; > > - if (minalign > ARCH_KMALLOC_MINALIGN) { > - aligned_size = ALIGN(aligned_size, minalign); > - aligned_idx = __kmalloc_index(aligned_size, false); > - } > - > if (!kmalloc_caches[type][aligned_idx]) > kmalloc_caches[type][aligned_idx] = create_kmalloc_cache( > kmalloc_info[aligned_idx].name[type], > @@ -934,7 +956,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type) > */ > void __init create_kmalloc_caches(void) > { > - int i; > + unsigned int i; > enum kmalloc_cache_type type; > > /*
On Mon, Nov 04, 2024 at 04:40:12PM +0100, Vlastimil Babka wrote: > On 11/4/24 16:08, Koichiro Den wrote: > > Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > > if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > > However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > > This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > > resulting in kmem_buckets_create() attempting to create a kmem_cache for > > size 16 twice. This duplication triggers warnings on boot: > > > > [ 2.325108] ------------[ cut here ]------------ > > [ 2.325135] kmem_cache of name 'memdup_user-16' already exists > > [ 2.325783] WARNING: CPU: 0 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.327957] Modules linked in: > > [ 2.328550] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5mm-unstable-arm64+ #12 > > [ 2.328683] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > > [ 2.328790] pstate: 61000009 (nZCv daif -PAN -UAO -TCO +DIT -SSBS BTYPE=--) > > [ 2.328911] pc : __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.328930] lr : __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.328942] sp : ffff800083d6fc50 > > [ 2.328961] x29: ffff800083d6fc50 x28: f2ff0000c1674410 x27: ffff8000820b0598 > > [ 2.329061] x26: 000000007fffffff x25: 0000000000000010 x24: 0000000000002000 > > [ 2.329101] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > > [ 2.329118] x20: f2ff0000c1674410 x19: f5ff0000c16364c0 x18: ffff800083d80030 > > [ 2.329135] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > > [ 2.329152] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > > [ 2.329169] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > > [ 2.329194] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > > [ 2.329210] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > > [ 2.329226] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > > [ 2.329291] Call trace: > > [ 2.329407] __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.329499] kmem_buckets_create+0xfc/0x320 > > [ 2.329526] init_user_buckets+0x34/0x78 > > [ 2.329540] do_one_initcall+0x64/0x3c8 > > [ 2.329550] kernel_init_freeable+0x26c/0x578 > > [ 2.329562] kernel_init+0x3c/0x258 > > [ 2.329574] ret_from_fork+0x10/0x20 > > [ 2.329698] ---[ end trace 0000000000000000 ]--- > > > > [ 2.403704] ------------[ cut here ]------------ > > [ 2.404716] kmem_cache of name 'msg_msg-16' already exists > > [ 2.404801] WARNING: CPU: 2 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.404842] Modules linked in: > > [ 2.404971] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.0-rc5mm-unstable-arm64+ #12 > > [ 2.405026] Tainted: [W]=WARN > > [ 2.405043] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > > [ 2.405057] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > > [ 2.405079] pc : __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.405100] lr : __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.405111] sp : ffff800083d6fc50 > > [ 2.405115] x29: ffff800083d6fc50 x28: fbff0000c1674410 x27: ffff8000820b0598 > > [ 2.405135] x26: 000000000000ffd0 x25: 0000000000000010 x24: 0000000000006000 > > [ 2.405153] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > > [ 2.405169] x20: fbff0000c1674410 x19: fdff0000c163d6c0 x18: ffff800083d80030 > > [ 2.405185] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > > [ 2.405201] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > > [ 2.405217] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > > [ 2.405233] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > > [ 2.405248] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > > [ 2.405271] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > > [ 2.405287] Call trace: > > [ 2.405293] __kmem_cache_create_args+0xb8/0x3b0 > > [ 2.405305] kmem_buckets_create+0xfc/0x320 > > [ 2.405315] init_msg_buckets+0x34/0x78 > > [ 2.405326] do_one_initcall+0x64/0x3c8 > > [ 2.405337] kernel_init_freeable+0x26c/0x578 > > [ 2.405348] kernel_init+0x3c/0x258 > > [ 2.405360] ret_from_fork+0x10/0x20 > > [ 2.405370] ---[ end trace 0000000000000000 ]--- > > > > To address this, alias kmem_cache for sizes smaller than min alignment > > to the aligned sized kmem_cache, as done with the default system kmalloc > > bucket. > > > > Cc: <stable@vger.kernel.org> # 6.11.x > > Fixes: b32801d1255b ("mm/slab: Introduce kmem_buckets_create() and family") > > Signed-off-by: Koichiro Den <koichiro.den@gmail.com> > > --- > > Thanks for catching this. > Wonder if we could make this a lot simpler in kmem_buckets_create() by > starting with the current: > > size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; > > and adding: > > aligned_idx = __kmalloc_index(size, false); > > now the rest of the loop iteration would work with aligned_idx and if it > differs from idx, we assign the cache pointer also to idx, etc. > > This should avoid duplicating the alignment calculation as we just extract > from kmalloc_caches[] the result of what new_kmalloc_cache() already did? Yeah, I was thinking similar stuff -- the aligned stuff is the alias for the actual stuff. I like the bitmask for tracking the aliases. :) -- Kees Cook
On Mon, Nov 04, 2024 at 01:32:25PM -0800, Kees Cook wrote: > On Mon, Nov 04, 2024 at 04:40:12PM +0100, Vlastimil Babka wrote: > > On 11/4/24 16:08, Koichiro Den wrote: > > > Commit b035f5a6d852 ("mm: slab: reduce the kmalloc() minimum alignment > > > if DMA bouncing possible") reduced ARCH_KMALLOC_MINALIGN to 8 on arm64. > > > However, with KASAN_HW_TAGS enabled, arch_slab_minalign() becomes 16. > > > This causes kmalloc_caches[*][8] to be aliased to kmalloc_caches[*][16], > > > resulting in kmem_buckets_create() attempting to create a kmem_cache for > > > size 16 twice. This duplication triggers warnings on boot: > > > > > > [ 2.325108] ------------[ cut here ]------------ > > > [ 2.325135] kmem_cache of name 'memdup_user-16' already exists > > > [ 2.325783] WARNING: CPU: 0 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.327957] Modules linked in: > > > [ 2.328550] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc5mm-unstable-arm64+ #12 > > > [ 2.328683] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > > > [ 2.328790] pstate: 61000009 (nZCv daif -PAN -UAO -TCO +DIT -SSBS BTYPE=--) > > > [ 2.328911] pc : __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.328930] lr : __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.328942] sp : ffff800083d6fc50 > > > [ 2.328961] x29: ffff800083d6fc50 x28: f2ff0000c1674410 x27: ffff8000820b0598 > > > [ 2.329061] x26: 000000007fffffff x25: 0000000000000010 x24: 0000000000002000 > > > [ 2.329101] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > > > [ 2.329118] x20: f2ff0000c1674410 x19: f5ff0000c16364c0 x18: ffff800083d80030 > > > [ 2.329135] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > > > [ 2.329152] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > > > [ 2.329169] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > > > [ 2.329194] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > > > [ 2.329210] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > > > [ 2.329226] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > > > [ 2.329291] Call trace: > > > [ 2.329407] __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.329499] kmem_buckets_create+0xfc/0x320 > > > [ 2.329526] init_user_buckets+0x34/0x78 > > > [ 2.329540] do_one_initcall+0x64/0x3c8 > > > [ 2.329550] kernel_init_freeable+0x26c/0x578 > > > [ 2.329562] kernel_init+0x3c/0x258 > > > [ 2.329574] ret_from_fork+0x10/0x20 > > > [ 2.329698] ---[ end trace 0000000000000000 ]--- > > > > > > [ 2.403704] ------------[ cut here ]------------ > > > [ 2.404716] kmem_cache of name 'msg_msg-16' already exists > > > [ 2.404801] WARNING: CPU: 2 PID: 1 at mm/slab_common.c:107 __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.404842] Modules linked in: > > > [ 2.404971] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W 6.12.0-rc5mm-unstable-arm64+ #12 > > > [ 2.405026] Tainted: [W]=WARN > > > [ 2.405043] Hardware name: QEMU QEMU Virtual Machine, BIOS 2024.02-2 03/11/2024 > > > [ 2.405057] pstate: 60400009 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) > > > [ 2.405079] pc : __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.405100] lr : __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.405111] sp : ffff800083d6fc50 > > > [ 2.405115] x29: ffff800083d6fc50 x28: fbff0000c1674410 x27: ffff8000820b0598 > > > [ 2.405135] x26: 000000000000ffd0 x25: 0000000000000010 x24: 0000000000006000 > > > [ 2.405153] x23: ffff800083d6fce8 x22: ffff8000832222e8 x21: ffff800083222388 > > > [ 2.405169] x20: fbff0000c1674410 x19: fdff0000c163d6c0 x18: ffff800083d80030 > > > [ 2.405185] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000 > > > [ 2.405201] x14: 0000000000000000 x13: 0a73747369786520 x12: 79646165726c6120 > > > [ 2.405217] x11: 656820747563205b x10: 2d2d2d2d2d2d2d2d x9 : 0000000000000000 > > > [ 2.405233] x8 : 0000000000000000 x7 : 0000000000000000 x6 : 0000000000000000 > > > [ 2.405248] x5 : 0000000000000000 x4 : 0000000000000000 x3 : 0000000000000000 > > > [ 2.405271] x2 : 0000000000000000 x1 : 0000000000000000 x0 : 0000000000000000 > > > [ 2.405287] Call trace: > > > [ 2.405293] __kmem_cache_create_args+0xb8/0x3b0 > > > [ 2.405305] kmem_buckets_create+0xfc/0x320 > > > [ 2.405315] init_msg_buckets+0x34/0x78 > > > [ 2.405326] do_one_initcall+0x64/0x3c8 > > > [ 2.405337] kernel_init_freeable+0x26c/0x578 > > > [ 2.405348] kernel_init+0x3c/0x258 > > > [ 2.405360] ret_from_fork+0x10/0x20 > > > [ 2.405370] ---[ end trace 0000000000000000 ]--- > > > > > > To address this, alias kmem_cache for sizes smaller than min alignment > > > to the aligned sized kmem_cache, as done with the default system kmalloc > > > bucket. > > > > > > Cc: <stable@vger.kernel.org> # 6.11.x > > > Fixes: b32801d1255b ("mm/slab: Introduce kmem_buckets_create() and family") > > > Signed-off-by: Koichiro Den <koichiro.den@gmail.com> > > > --- > > > > Thanks for catching this. > > Wonder if we could make this a lot simpler in kmem_buckets_create() by > > starting with the current: > > > > size = kmalloc_caches[KMALLOC_NORMAL][idx]->object_size; > > > > and adding: > > > > aligned_idx = __kmalloc_index(size, false); > > > > now the rest of the loop iteration would work with aligned_idx and if it > > differs from idx, we assign the cache pointer also to idx, etc. > > > > This should avoid duplicating the alignment calculation as we just extract > > from kmalloc_caches[] the result of what new_kmalloc_cache() already did? > > Yeah, I was thinking similar stuff -- the aligned stuff is the alias for > the actual stuff. > > I like the bitmask for tracking the aliases. :) Thanks for reviewing, Vlastimil and Kees. It sounds reasonable, I'll send v2 soon. > > -- > Kees Cook
© 2016 - 2024 Red Hat, Inc.