mm/slub.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-)
From: yangshiguang <yangshiguang@xiaomi.com>
From: yangshiguang <yangshiguang@xiaomi.com>
set_track_prepare() can incur lock recursion.
The issue is that it is called from hrtimer_start_range_ns
holding the per_cpu(hrtimer_bases)[n].lock, but when enabled
CONFIG_DEBUG_OBJECTS_TIMERS, may wake up kswapd in set_track_prepare,
and try to hold the per_cpu(hrtimer_bases)[n].lock.
Avoid deadlock caused by implicitly waking up kswapd by
passing in allocation flags.
The oops looks something like:
BUG: spinlock recursion on CPU#3, swapper/3/0
lock: 0xffffff8a4bf29c80, .magic: dead4ead, .owner: swapper/3/0, .owner_cpu: 3
Hardware name: Qualcomm Technologies, Inc. Popsicle based on SM8850 (DT)
Call trace:
spin_bug+0x0
_raw_spin_lock_irqsave+0x80
hrtimer_try_to_cancel+0x94
task_contending+0x10c
enqueue_dl_entity+0x2a4
dl_server_start+0x74
enqueue_task_fair+0x568
enqueue_task+0xac
do_activate_task+0x14c
ttwu_do_activate+0xcc
try_to_wake_up+0x6c8
default_wake_function+0x20
autoremove_wake_function+0x1c
__wake_up+0xac
wakeup_kswapd+0x19c
wake_all_kswapds+0x78
__alloc_pages_slowpath+0x1ac
__alloc_pages_noprof+0x298
stack_depot_save_flags+0x6b0
stack_depot_save+0x14
set_track_prepare+0x5c
___slab_alloc+0xccc
__kmalloc_cache_noprof+0x470
__set_page_owner+0x2bc
post_alloc_hook[jt]+0x1b8
prep_new_page+0x28
get_page_from_freelist+0x1edc
__alloc_pages_noprof+0x13c
alloc_slab_page+0x244
allocate_slab+0x7c
___slab_alloc+0x8e8
kmem_cache_alloc_noprof+0x450
debug_objects_fill_pool+0x22c
debug_object_activate+0x40
enqueue_hrtimer[jt]+0xdc
hrtimer_start_range_ns+0x5f8
...
Signed-off-by: yangshiguang <yangshiguang@xiaomi.com>
Fixes: 5cf909c553e9 ("mm/slub: use stackdepot to save stack trace in objects")
Cc: stable@vger.kernel.org
---
v1 -> v2:
propagate gfp flags to set_track_prepare()
v2 -> v3:
Remove the gfp restriction in set_track_prepare()
[1]https://lore.kernel.org/all/20250801065121.876793-1-yangshiguang1011@163.com/
[2]https://lore.kernel.org/all/20250814111641.380629-2-yangshiguang1011@163.com/
---
mm/slub.c | 22 ++++++++++++----------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 30003763d224..443411713e2f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -962,19 +962,21 @@ static struct track *get_track(struct kmem_cache *s, void *object,
}
#ifdef CONFIG_STACKDEPOT
-static noinline depot_stack_handle_t set_track_prepare(void)
+static noinline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
{
depot_stack_handle_t handle;
unsigned long entries[TRACK_ADDRS_COUNT];
unsigned int nr_entries;
+ /* Preemption is disabled in ___slab_alloc() */
+ gfp_flags &= ~(__GFP_DIRECT_RECLAIM);
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
- handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+ handle = stack_depot_save(entries, nr_entries, gfp_flags);
return handle;
}
#else
-static inline depot_stack_handle_t set_track_prepare(void)
+static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags)
{
return 0;
}
@@ -996,9 +998,9 @@ static void set_track_update(struct kmem_cache *s, void *object,
}
static __always_inline void set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, unsigned long addr)
+ enum track_item alloc, unsigned long addr, gfp_t gfp_flags)
{
- depot_stack_handle_t handle = set_track_prepare();
+ depot_stack_handle_t handle = set_track_prepare(gfp_flags);
set_track_update(s, object, alloc, addr, handle);
}
@@ -1921,9 +1923,9 @@ static inline bool free_debug_processing(struct kmem_cache *s,
static inline void slab_pad_check(struct kmem_cache *s, struct slab *slab) {}
static inline int check_object(struct kmem_cache *s, struct slab *slab,
void *object, u8 val) { return 1; }
-static inline depot_stack_handle_t set_track_prepare(void) { return 0; }
+static inline depot_stack_handle_t set_track_prepare(gfp_t gfp_flags) { return 0; }
static inline void set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, unsigned long addr) {}
+ enum track_item alloc, unsigned long addr, gfp_t gfp_flags) {}
static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
@@ -3878,7 +3880,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* tracking info and return the object.
*/
if (s->flags & SLAB_STORE_USER)
- set_track(s, freelist, TRACK_ALLOC, addr);
+ set_track(s, freelist, TRACK_ALLOC, addr, gfpflags);
return freelist;
}
@@ -3910,7 +3912,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto new_objects;
if (s->flags & SLAB_STORE_USER)
- set_track(s, freelist, TRACK_ALLOC, addr);
+ set_track(s, freelist, TRACK_ALLOC, addr, gfpflags);
return freelist;
}
@@ -4422,7 +4424,7 @@ static noinline void free_to_partial_list(
depot_stack_handle_t handle = 0;
if (s->flags & SLAB_STORE_USER)
- handle = set_track_prepare();
+ handle = set_track_prepare(__GFP_NOWARN);
spin_lock_irqsave(&n->list_lock, flags);
--
2.43.0
On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: > Avoid deadlock caused by implicitly waking up kswapd by > passing in allocation flags. [...] > + /* Preemption is disabled in ___slab_alloc() */ > + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to be better.
On 8/25/25 14:40, Matthew Wilcox wrote: > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: >> Avoid deadlock caused by implicitly waking up kswapd by >> passing in allocation flags. > [...] >> + /* Preemption is disabled in ___slab_alloc() */ >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); > > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to > be better. It was suggested by Harry here: https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry/ I think the comment is enough? Disabling preemption means we can't direct reclaim, but we can wake up kswapd. If the slab caller context is such that we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? (we should get some common helpers for these kinds of gfp flag manipulations already)
On Mon, Aug 25, 2025 at 05:42:52PM +0200, Vlastimil Babka wrote: > On 8/25/25 14:40, Matthew Wilcox wrote: > > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: > >> Avoid deadlock caused by implicitly waking up kswapd by > >> passing in allocation flags. > > [...] > >> + /* Preemption is disabled in ___slab_alloc() */ > >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); > > > > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to > > be better. > > It was suggested by Harry here: > https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry > > I think the comment is enough? Disabling preemption means we can't direct > reclaim, but we can wake up kswapd. If the slab caller context is such that > we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. To make it a little bit more verbose, this ^^ explanation can be added to the changelog? > But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? That sounds good. > (we should get some common helpers for these kinds of gfp flag manipulations > already) Any ideas for its name? gfp_dont_try_too_hard(), gfp_adjust_lightweight(), gfp_adjust_mayfail(), ... I'm not good at naming :/ -- Cheers, Harry / Hyeonggon
On 8/27/25 07:17, Harry Yoo wrote: > On Mon, Aug 25, 2025 at 05:42:52PM +0200, Vlastimil Babka wrote: >> On 8/25/25 14:40, Matthew Wilcox wrote: >> > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: >> >> Avoid deadlock caused by implicitly waking up kswapd by >> >> passing in allocation flags. >> > [...] >> >> + /* Preemption is disabled in ___slab_alloc() */ >> >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); >> > >> > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to >> > be better. >> >> It was suggested by Harry here: >> https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry >> >> I think the comment is enough? Disabling preemption means we can't direct >> reclaim, but we can wake up kswapd. If the slab caller context is such that >> we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. > > To make it a little bit more verbose, this ^^ explanation can be added to the > changelog? > >> But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? > > That sounds good. > >> (we should get some common helpers for these kinds of gfp flag manipulations >> already) > > Any ideas for its name? > > gfp_dont_try_too_hard(), > gfp_adjust_lightweight(), > gfp_adjust_mayfail(), > ... > > I'm not good at naming :/ Looks like there's already gfp_nested_mask() for these purposes. I'm not sure if it should be allowing GFP_ATOMIC (thus __GFP_HIGH) as it does though. Seems to contradict the comment about not exhausing reserves. Wonder if that was raised during review... The masking out of __GFP_DIRECT_RECLAIM is specific to the slab case so we don't need a helper for that (unless we find other users). It could be then e.g. gfp_nested_mask_noblock() ?
On 8/27/25 10:00, Vlastimil Babka wrote: > On 8/27/25 07:17, Harry Yoo wrote: >> On Mon, Aug 25, 2025 at 05:42:52PM +0200, Vlastimil Babka wrote: >>> On 8/25/25 14:40, Matthew Wilcox wrote: >>> > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: >>> >> Avoid deadlock caused by implicitly waking up kswapd by >>> >> passing in allocation flags. >>> > [...] >>> >> + /* Preemption is disabled in ___slab_alloc() */ >>> >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); >>> > >>> > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to >>> > be better. >>> >>> It was suggested by Harry here: >>> https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry >>> >>> I think the comment is enough? Disabling preemption means we can't direct >>> reclaim, but we can wake up kswapd. If the slab caller context is such that >>> we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. >> >> To make it a little bit more verbose, this ^^ explanation can be added to the >> changelog? >> >>> But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? >> >> That sounds good. >> >>> (we should get some common helpers for these kinds of gfp flag manipulations >>> already) >> >> Any ideas for its name? >> >> gfp_dont_try_too_hard(), >> gfp_adjust_lightweight(), >> gfp_adjust_mayfail(), >> ... >> >> I'm not good at naming :/ > > Looks like there's already gfp_nested_mask() for these purposes. I'm not > sure if it should be allowing GFP_ATOMIC (thus __GFP_HIGH) as it does > though. Seems to contradict the comment about not exhausing reserves. Wonder > if that was raised during review... Looks like I mentioned it but inconclusively. https://lore.kernel.org/linux-xfs/1fb5b8f3-d8c7-4350-888a-ad8f4d54bc66@suse.cz/ Anyway that's orthogonal to using the helper here right now. > The masking out of __GFP_DIRECT_RECLAIM is specific to the slab case so we > don't need a helper for that (unless we find other users). It could be then > e.g. gfp_nested_mask_noblock() ? >
At 2025-08-27 13:17:31, "Harry Yoo" <harry.yoo@oracle.com> wrote: >On Mon, Aug 25, 2025 at 05:42:52PM +0200, Vlastimil Babka wrote: >> On 8/25/25 14:40, Matthew Wilcox wrote: >> > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: >> >> Avoid deadlock caused by implicitly waking up kswapd by >> >> passing in allocation flags. >> > [...] >> >> + /* Preemption is disabled in ___slab_alloc() */ >> >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); >> > >> > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to >> > be better. >> >> It was suggested by Harry here: >> https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry >> >> I think the comment is enough? Disabling preemption means we can't direct >> reclaim, but we can wake up kswapd. If the slab caller context is such that >> we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. > >To make it a little bit more verbose, this ^^ explanation can be added to the >changelog? ok, will be easier to understand. > >> But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? > >That sounds good.> >> (we should get some common helpers for these kinds of gfp flag manipulations >> already) > >Any ideas for its name? > >gfp_dont_try_too_hard(), >gfp_adjust_lightweight(), >gfp_adjust_mayfail(), >... > >I'm not good at naming :/ > How about this? /* Preemption is disabled in ___slab_alloc() */ - gfp_flags &= ~(__GFP_DIRECT_RECLAIM); + gfp_flags = (gfp_flags & ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL)) | + __GFP_NOWARN; >-- >Cheers, >Harry / Hyeonggon
On 8/27/25 09:45, yangshiguang wrote: > > > > > At 2025-08-27 13:17:31, "Harry Yoo" <harry.yoo@oracle.com> wrote: >>On Mon, Aug 25, 2025 at 05:42:52PM +0200, Vlastimil Babka wrote: >>> On 8/25/25 14:40, Matthew Wilcox wrote: >>> > On Mon, Aug 25, 2025 at 08:17:37PM +0800, yangshiguang1011@163.com wrote: >>> >> Avoid deadlock caused by implicitly waking up kswapd by >>> >> passing in allocation flags. >>> > [...] >>> >> + /* Preemption is disabled in ___slab_alloc() */ >>> >> + gfp_flags &= ~(__GFP_DIRECT_RECLAIM); >>> > >>> > If you don't mean __GFP_KSWAPD_RECLAIM here, the explanation needs to >>> > be better. >>> >>> It was suggested by Harry here: >>> https://lore.kernel.org/all/aKKhUoUkRNDkFYYb@harry >>> >>> I think the comment is enough? Disabling preemption means we can't direct >>> reclaim, but we can wake up kswapd. If the slab caller context is such that >>> we can't, __GFP_KSWAPD_RECLAIM already won't be in the gfp_flags. >> >>To make it a little bit more verbose, this ^^ explanation can be added to the > >>changelog? > > > ok, will be easier to understand. > >> >>> But I think we should mask our also __GFP_NOFAIL and add __GFP_NOWARN? >> > >>That sounds good.> >>> (we should get some common helpers for these kinds of gfp flag manipulations >>> already) >> >>Any ideas for its name? >> >>gfp_dont_try_too_hard(), >>gfp_adjust_lightweight(), >>gfp_adjust_mayfail(), >>... >> >>I'm not good at naming :/ > >> > > How about this? > > /* Preemption is disabled in ___slab_alloc() */ > - gfp_flags &= ~(__GFP_DIRECT_RECLAIM); > + gfp_flags = (gfp_flags & ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL)) | > + __GFP_NOWARN; I'd suggest using gfp_nested_flags() and & ~(__GFP_DIRECT_RECLAIM); > >-- >>Cheers, >>Harry / Hyeonggon
© 2016 - 2025 Red Hat, Inc.