include/linux/debugobjects.h | 5 +++++ include/linux/slab.h | 11 +++++++++++ include/linux/slab_def.h | 2 ++ lib/debugobjects.c | 37 ++++++++++++++++++++++++++++++++++++ mm/slab.h | 4 ++-- mm/slub.c | 34 +++++++++++++++++++++++++++++---- 6 files changed, 87 insertions(+), 6 deletions(-)
From: Ben Greear <greearb@candelatech.com>
When debugobjects detects a problem, it may be in generic code
where the backtraces currently printed are not helpful.
To try to improve this, store the backtrace of where the
debug_obj was created and print that out when problems
are found.
Also print out slub info for the object held by the
debug_obj. In my particular case, this was not super
useful, appearantly because of all of the KASAN and other
debugging I have enabled. Still, might provide a few
clues.
Signed-off-by: Ben Greear <greearb@candelatech.com>
---
include/linux/debugobjects.h | 5 +++++
include/linux/slab.h | 11 +++++++++++
include/linux/slab_def.h | 2 ++
lib/debugobjects.c | 37 ++++++++++++++++++++++++++++++++++++
mm/slab.h | 4 ++--
mm/slub.c | 34 +++++++++++++++++++++++++++++----
6 files changed, 87 insertions(+), 6 deletions(-)
diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 32444686b6ff..8e8df719bd88 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -31,6 +31,11 @@ struct debug_obj {
unsigned int astate;
void *object;
const struct debug_obj_descr *descr;
+#ifdef CONFIG_STACKDEPOT
+#define DEBUG_OBJ_ADDRS_COUNT 16
+ /* Including stackdepot.h blows up the build, so open-code the handle. */
+ u64 trace_handle;
+#endif
};
/**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8228d1276a2f..87a5da669eaf 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -793,4 +793,15 @@ int slab_dead_cpu(unsigned int cpu);
#define slab_dead_cpu NULL
#endif
+
+/**
+ * Calling this on allocated memory will print debugging info
+ * about the object, if CONFIG_SLAB_DEBUG is enabled.
+ */
+#ifdef CONFIG_SLUB_DEBUG
+void slab_print_mem_info(const void *x);
+#else
+static inline void slab_print_mem_info(const void *x) { }
+#endif
+
#endif /* _LINUX_SLAB_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index a61e7d55d0d3..db3ce19be339 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -121,4 +121,6 @@ static inline int objs_per_slab(const struct kmem_cache *cache,
return cache->num;
}
+static inline void slab_print_mem_info(const void *x) { }
+
#endif /* _LINUX_SLAB_DEF_H */
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a517256a270b..1f458e473bc5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -17,6 +17,8 @@
#include <linux/hash.h>
#include <linux/kmemleak.h>
#include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/stackdepot.h>
#define ODEBUG_HASH_BITS 14
#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)
@@ -216,6 +218,33 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
return obj;
}
+#ifdef CONFIG_STACKDEPOT
+static void debug_print_stack_info(struct debug_obj *object)
+{
+ if (object->trace_handle) {
+ pr_err("debugobjects: debug_obj allocated at:\n");
+ stack_depot_print(object->trace_handle);
+ pr_err("end of stack dump\n");
+ }
+}
+
+static noinline depot_stack_handle_t set_track_prepare(void)
+{
+ depot_stack_handle_t trace_handle;
+ unsigned long entries[DEBUG_OBJ_ADDRS_COUNT];
+ unsigned int nr_entries;
+
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+ trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+ return trace_handle;
+}
+
+#else
+static void debug_print_stack_info(struct debug_obj *object) { }
+static depot_stack_handle_t set_track_prepare(void) { }
+#endif
+
static struct debug_obj *
alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
{
@@ -272,6 +301,12 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
obj->state = ODEBUG_STATE_NONE;
obj->astate = 0;
hlist_add_head(&obj->node, &b->list);
+
+ /* Save stack of where object was created */
+#ifdef CONFIG_STACKDEPOT
+ /* kernel backtrace */
+ obj->trace_handle = set_track_prepare();
+#endif
}
return obj;
}
@@ -515,6 +550,8 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
"object: %p object type: %s hint: %pS\n",
msg, obj_states[obj->state], obj->astate,
obj->object, descr->name, hint);
+ debug_print_stack_info(obj);
+ slab_print_mem_info(obj->object);
}
debug_objects_warnings++;
}
diff --git a/mm/slab.h b/mm/slab.h
index 799a315695c6..d977c093e90d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -399,14 +399,14 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
#else
DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
#endif
-extern void print_tracking(struct kmem_cache *s, void *object);
+extern void print_tracking(struct kmem_cache *s, const void *object);
long validate_slab_cache(struct kmem_cache *s);
static inline bool __slub_debug_enabled(void)
{
return static_branch_unlikely(&slub_debug_enabled);
}
#else
-static inline void print_tracking(struct kmem_cache *s, void *object)
+static inline void print_tracking(struct kmem_cache *s, const void *object)
{
}
static inline bool __slub_debug_enabled(void)
diff --git a/mm/slub.c b/mm/slub.c
index f7940048138c..7d0c648369ab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -760,10 +760,10 @@ static inline unsigned int get_info_end(struct kmem_cache *s)
return s->inuse;
}
-static struct track *get_track(struct kmem_cache *s, void *object,
+static struct track *get_track(struct kmem_cache *s, const void *object,
enum track_item alloc)
{
- struct track *p;
+ const struct track *p;
p = object + get_info_end(s);
@@ -841,11 +841,14 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
#endif
}
-void print_tracking(struct kmem_cache *s, void *object)
+void print_tracking(struct kmem_cache *s, const void *object)
{
unsigned long pr_time = jiffies;
- if (!(s->flags & SLAB_STORE_USER))
+ if (!(s->flags & SLAB_STORE_USER)) {
+ pr_err("slub-print-tracking, STORE-USER not enabled, cache: %s flags: 0x%x\n",
+ s->name, s->flags);
return;
+ }
print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
@@ -1712,6 +1715,29 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
return flags | slub_debug_local;
}
+
+void slab_print_mem_info(const void *x)
+{
+ struct slab *slab;
+
+ if (unlikely(ZERO_OR_NULL_PTR(x)))
+ return;
+
+ slab = virt_to_slab(x);
+ if (!slab) {
+ pr_err("slub-print-mem-info, could not find slab for virt addr: %p\n",
+ x);
+ return;
+ }
+
+ pr_err("slab-info, slab: %p\n", slab);
+ print_slab_info(slab);
+ pr_err("slab-tracking-info: cache: %s (%p)\n",
+ slab->slab_cache->name, slab->slab_cache);
+ print_tracking(slab->slab_cache, x);
+ pr_err("end of slab-tracking-info\n");
+}
+
#else /* !CONFIG_SLUB_DEBUG */
static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
static inline
--
2.41.0
Hello,
kernel test robot noticed "WARNING:possible_recursive_locking_detected" on:
commit: d132b3de19193c996402718d76e63f797d4259a9 ("[PATCH/RFC] debugobjects/slub: Print slab info and backtrace.")
url: https://github.com/intel-lab-lkp/linux/commits/greearb-candelatech-com/debugobjects-slub-Print-slab-info-and-backtrace/20231103-093945
base: git://git.kernel.org/cgit/linux/kernel/git/vbabka/slab.git for-next
patch link: https://lore.kernel.org/all/20231103013704.1232723-1-greearb@candelatech.com/
patch subject: [PATCH/RFC] debugobjects/slub: Print slab info and backtrace.
in testcase: boot
compiler: gcc-7
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
(please refer to attached dmesg/kmsg for entire log/backtrace)
the issue doesn't always happen for this commit in our tests (shows 39 times
in 50 runs), but keeps clean on parent.
90f055df112162fd d132b3de19193c996402718d76e
---------------- ---------------------------
fail:runs %reproduction fail:runs
| | |
:51 76% 39:50 dmesg.WARNING:possible_recursive_locking_detected
:51 76% 39:50 dmesg.WARNING:possible_recursive_locking_detected_swapper_is_trying_to_acquire_lock:at:__debug_check_no_obj_freed_but_task_is_already_holding_lock:at:__debug_object_init/0x
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202311091331.f2eaf4f7-oliver.sang@intel.com
[ 2.997328][ T1] WARNING: possible recursive locking detected
[ 2.997927][ T1] 6.6.0-rc4-00006-gd132b3de1919 #1 Tainted: G T
[ 2.997927][ T1] --------------------------------------------
[ 2.997927][ T1] swapper/1 is trying to acquire lock:
[ 2.997927][ T1] ffffffff85425790 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_check_no_obj_freed (lib/debugobjects.c:1057)
[ 2.997927][ T1]
[ 2.997927][ T1] but task is already holding lock:
[ 2.997927][ T1] ffffffff8531abc0 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_object_init (lib/debugobjects.c:528 lib/debugobjects.c:667)
[ 2.997927][ T1]
[ 2.997927][ T1] other info that might help us debug this:
[ 2.997927][ T1] Possible unsafe locking scenario:
[ 2.997927][ T1]
[ 2.997927][ T1] CPU0
[ 2.997927][ T1] ----
[ 2.997927][ T1] lock(&obj_hash[i].lock);
[ 2.997927][ T1]
[ 2.997927][ T1] *** DEADLOCK ***
[ 2.997927][ T1]
[ 2.997927][ T1] May be due to missing lock nesting notation
[ 2.997927][ T1]
[ 2.997927][ T1] 1 lock held by swapper/1:
[ 2.997927][ T1] #0: ffffffff8531abc0 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_object_init (lib/debugobjects.c:528 lib/debugobjects.c:667)
[ 2.997927][ T1]
[ 2.997927][ T1] stack backtrace:
[ 2.997927][ T1] CPU: 0 PID: 1 Comm: swapper Tainted: G T 6.6.0-rc4-00006-gd132b3de1919 #1 2ef50b864013db82ef063ed2d8014756ac6c0996
[ 2.997927][ T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 2.997927][ T1] Call Trace:
[ 2.997927][ T1] <TASK>
[ 2.997927][ T1] __lock_acquire (kernel/locking/lockdep.c:5137)
[ 2.997927][ T1] ? local_clock (arch/x86/include/asm/preempt.h:85 kernel/sched/clock.c:316)
[ 2.997927][ T1] ? lock_release (kernel/locking/lockdep.c:223 kernel/locking/lockdep.c:352 kernel/locking/lockdep.c:5435 kernel/locking/lockdep.c:5773)
[ 2.997927][ T1] lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755)
[ 2.997927][ T1] ? __debug_check_no_obj_freed (lib/debugobjects.c:1057)
[ 2.997927][ T1] _raw_spin_lock_irqsave (include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162)
[ 2.997927][ T1] ? __debug_check_no_obj_freed (lib/debugobjects.c:1057)
[ 2.997927][ T1] __debug_check_no_obj_freed (lib/debugobjects.c:1057)
[ 2.997927][ T1] ? local_clock (arch/x86/include/asm/preempt.h:85 kernel/sched/clock.c:316)
[ 2.997927][ T1] ? lock_release (kernel/locking/lockdep.c:223 kernel/locking/lockdep.c:352 kernel/locking/lockdep.c:5435 kernel/locking/lockdep.c:5773)
[ 2.997927][ T1] free_unref_page_prepare (mm/page_alloc.c:1146 mm/page_alloc.c:2312)
[ 2.997927][ T1] ? find_held_lock (kernel/locking/lockdep.c:5243)
[ 2.997927][ T1] free_unref_page (mm/page_alloc.c:2405)
[ 2.997927][ T1] __stack_depot_save (lib/stackdepot.c:443)
[ 2.997927][ T1] set_track_prepare (mm/kmemleak.c:620)
[ 2.997927][ T1] ? pm_runtime_init (drivers/base/power/runtime.c:1731)
[ 2.997927][ T1] ? device_initialize (drivers/base/core.c:3102)
[ 2.997927][ T1] ? device_register (drivers/base/core.c:3706)
[ 2.997927][ T1] ? subsys_register (drivers/base/bus.c:1230)
[ 2.997927][ T1] ? container_dev_init (drivers/base/container.c:39)
[ 2.997927][ T1] ? kernel_init_freeable (init/main.c:1327 init/main.c:1547)
[ 2.997927][ T1] ? kernel_init (init/main.c:1439)
[ 2.997927][ T1] ? ret_from_fork (arch/x86/kernel/process.c:153)
[ 2.997927][ T1] ? ret_from_fork_asm (arch/x86/entry/entry_64.S:312)
[ 2.997927][ T1] lookup_object_or_alloc (lib/debugobjects.c:308 lib/debugobjects.c:624)
[ 2.997927][ T1] __debug_object_init (lib/debugobjects.c:672)
[ 2.997927][ T1] pm_runtime_init (drivers/base/power/runtime.c:1731)
[ 2.997927][ T1] device_initialize (drivers/base/core.c:3102)
[ 2.997927][ T1] device_register (drivers/base/core.c:3706)
[ 2.997927][ T1] subsys_register (drivers/base/bus.c:1230)
[ 2.997927][ T1] container_dev_init (drivers/base/container.c:39)
[ 2.997927][ T1] kernel_init_freeable (init/main.c:1327 init/main.c:1547)
[ 2.997927][ T1] ? wait_for_completion (kernel/sched/completion.c:149)
[ 2.997927][ T1] ? rest_init (init/main.c:1429)
[ 2.997927][ T1] kernel_init (init/main.c:1439)
[ 2.997927][ T1] ret_from_fork (arch/x86/kernel/process.c:153)
[ 2.997927][ T1] ? rest_init (init/main.c:1429)
[ 2.997927][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:312)
[ 2.997927][ T1] </TASK>
[ 3.000434][ T1] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[ 3.001966][ T1] futex hash table entries: 256 (order: 2, 24576 bytes, linear)
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231109/202311091331.f2eaf4f7-oliver.sang@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Thu, Nov 02 2023 at 18:37, greearb@candelatech.com wrote:
> When debugobjects detects a problem, it may be in generic code
> where the backtraces currently printed are not helpful.
>
> To try to improve this, store the backtrace of where the
> debug_obj was created and print that out when problems
> are found.
To make debugobjects massively slower than it is already.
> Also print out slub info for the object held by the
> debug_obj. In my particular case, this was not super
> useful, appearantly because of all of the KASAN and other
> debugging I have enabled. Still, might provide a few
> clues.
So either it is useful or not. You could have disabled the other
debugging to figure this out, no?
Aside of that this is a separate issue and wants to be split out into a
separate patch if at all.
> diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
> index 32444686b6ff..8e8df719bd88 100644
> --- a/include/linux/debugobjects.h
> +++ b/include/linux/debugobjects.h
> @@ -31,6 +31,11 @@ struct debug_obj {
> unsigned int astate;
> void *object;
> const struct debug_obj_descr *descr;
> +#ifdef CONFIG_STACKDEPOT
If at all then this wants to be CONFIG_DEBUG_OBJECTS_STACKTRACE or such.
> +#define DEBUG_OBJ_ADDRS_COUNT 16
Don't glue a define into the struct. That's unreadable.
> + /* Including stackdepot.h blows up the build, so open-code the handle. */
Seriously no. The obvious fix is to move 'struct debug_obj' into the C
file as it is not used anywhere else.
> + u64 trace_handle;
> +#endif
> };
> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
> index a517256a270b..1f458e473bc5 100644
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -17,6 +17,8 @@
> #include <linux/hash.h>
> #include <linux/kmemleak.h>
> #include <linux/cpu.h>
> +#include <linux/slab.h>
> +#include <linux/stackdepot.h>
>
> #define ODEBUG_HASH_BITS 14
> #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)
> @@ -216,6 +218,33 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
> return obj;
> }
>
> +#ifdef CONFIG_STACKDEPOT
> +static void debug_print_stack_info(struct debug_obj *object)
> +{
> + if (object->trace_handle) {
> + pr_err("debugobjects: debug_obj allocated at:\n");
> + stack_depot_print(object->trace_handle);
> + pr_err("end of stack dump\n");
> + }
> +}
> +
> +static noinline depot_stack_handle_t set_track_prepare(void)
> +{
> + depot_stack_handle_t trace_handle;
> + unsigned long entries[DEBUG_OBJ_ADDRS_COUNT];
> + unsigned int nr_entries;
https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations
> +
> + nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
> + trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
> +
> + return trace_handle;
> +}
> +
> +#else
> +static void debug_print_stack_info(struct debug_obj *object) { }
> +static depot_stack_handle_t set_track_prepare(void) { }
> +#endif
> +
> static struct debug_obj *
> alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
> {
> @@ -272,6 +301,12 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
> obj->state = ODEBUG_STATE_NONE;
> obj->astate = 0;
> hlist_add_head(&obj->node, &b->list);
> +
> + /* Save stack of where object was created */
> +#ifdef CONFIG_STACKDEPOT
> + /* kernel backtrace */
> + obj->trace_handle = set_track_prepare();
Handing the object pointer to the function spares the ugly ifdef.
Also what on earth means set_track_prepare()? If I hadn't seen the
function before then I'd had a hard time to decode this. Self
explanatory function names spare also commenting the obvious. I.e. this
whole thing boils down to:
debug_save_stack_trace(obj);
or such.
Thanks,
tglx
© 2016 - 2025 Red Hat, Inc.