[PATCH/RFC] debugobjects/slub: Print slab info and backtrace.

greearb@candelatech.com posted 1 patch 2 years, 1 month ago
include/linux/debugobjects.h |  5 +++++
include/linux/slab.h         | 11 +++++++++++
include/linux/slab_def.h     |  2 ++
lib/debugobjects.c           | 37 ++++++++++++++++++++++++++++++++++++
mm/slab.h                    |  4 ++--
mm/slub.c                    | 34 +++++++++++++++++++++++++++++----
6 files changed, 87 insertions(+), 6 deletions(-)
[PATCH/RFC] debugobjects/slub: Print slab info and backtrace.
Posted by greearb@candelatech.com 2 years, 1 month ago
From: Ben Greear <greearb@candelatech.com>

When debugobjects detects a problem, it may be in generic code
where the backtraces currently printed are not helpful.

To try to improve this, store the backtrace of where the
debug_obj was created and print that out when problems
are found.

Also print out slub info for the object held by the
debug_obj.  In my particular case, this was not super
useful, appearantly because of all of the KASAN and other
debugging I have enabled.  Still, might provide a few
clues.

Signed-off-by: Ben Greear <greearb@candelatech.com>
---
 include/linux/debugobjects.h |  5 +++++
 include/linux/slab.h         | 11 +++++++++++
 include/linux/slab_def.h     |  2 ++
 lib/debugobjects.c           | 37 ++++++++++++++++++++++++++++++++++++
 mm/slab.h                    |  4 ++--
 mm/slub.c                    | 34 +++++++++++++++++++++++++++++----
 6 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 32444686b6ff..8e8df719bd88 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -31,6 +31,11 @@ struct debug_obj {
 	unsigned int		astate;
 	void			*object;
 	const struct debug_obj_descr *descr;
+#ifdef CONFIG_STACKDEPOT
+#define DEBUG_OBJ_ADDRS_COUNT 16
+	/* Including stackdepot.h blows up the build, so open-code the handle. */
+	u64 trace_handle;
+#endif
 };
 
 /**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8228d1276a2f..87a5da669eaf 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -793,4 +793,15 @@ int slab_dead_cpu(unsigned int cpu);
 #define slab_dead_cpu		NULL
 #endif
 
+
+/**
+ * Calling this on allocated memory will print debugging info
+ * about the object, if CONFIG_SLAB_DEBUG is enabled.
+ */
+#ifdef CONFIG_SLUB_DEBUG
+void slab_print_mem_info(const void *x);
+#else
+static inline void slab_print_mem_info(const void *x) { }
+#endif
+
 #endif	/* _LINUX_SLAB_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index a61e7d55d0d3..db3ce19be339 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -121,4 +121,6 @@ static inline int objs_per_slab(const struct kmem_cache *cache,
 	return cache->num;
 }
 
+static inline void slab_print_mem_info(const void *x) { }
+
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index a517256a270b..1f458e473bc5 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -17,6 +17,8 @@
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
 #include <linux/cpu.h>
+#include <linux/slab.h>
+#include <linux/stackdepot.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
@@ -216,6 +218,33 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
 	return obj;
 }
 
+#ifdef CONFIG_STACKDEPOT
+static void debug_print_stack_info(struct debug_obj *object)
+{
+	if (object->trace_handle) {
+		pr_err("debugobjects: debug_obj allocated at:\n");
+		stack_depot_print(object->trace_handle);
+		pr_err("end of stack dump\n");
+	}
+}
+
+static noinline depot_stack_handle_t set_track_prepare(void)
+{
+	depot_stack_handle_t trace_handle;
+	unsigned long entries[DEBUG_OBJ_ADDRS_COUNT];
+	unsigned int nr_entries;
+
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+	trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+	return trace_handle;
+}
+
+#else
+static void debug_print_stack_info(struct debug_obj *object) { }
+static depot_stack_handle_t set_track_prepare(void) { }
+#endif
+
 static struct debug_obj *
 alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
 {
@@ -272,6 +301,12 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
 		obj->state  = ODEBUG_STATE_NONE;
 		obj->astate = 0;
 		hlist_add_head(&obj->node, &b->list);
+
+		/* Save stack of where object was created */
+#ifdef CONFIG_STACKDEPOT
+		/* kernel backtrace */
+		obj->trace_handle = set_track_prepare();
+#endif
 	}
 	return obj;
 }
@@ -515,6 +550,8 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 				 "object: %p object type: %s hint: %pS\n",
 			msg, obj_states[obj->state], obj->astate,
 			obj->object, descr->name, hint);
+		debug_print_stack_info(obj);
+		slab_print_mem_info(obj->object);
 	}
 	debug_objects_warnings++;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 799a315695c6..d977c093e90d 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -399,14 +399,14 @@ DECLARE_STATIC_KEY_TRUE(slub_debug_enabled);
 #else
 DECLARE_STATIC_KEY_FALSE(slub_debug_enabled);
 #endif
-extern void print_tracking(struct kmem_cache *s, void *object);
+extern void print_tracking(struct kmem_cache *s, const void *object);
 long validate_slab_cache(struct kmem_cache *s);
 static inline bool __slub_debug_enabled(void)
 {
 	return static_branch_unlikely(&slub_debug_enabled);
 }
 #else
-static inline void print_tracking(struct kmem_cache *s, void *object)
+static inline void print_tracking(struct kmem_cache *s, const void *object)
 {
 }
 static inline bool __slub_debug_enabled(void)
diff --git a/mm/slub.c b/mm/slub.c
index f7940048138c..7d0c648369ab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -760,10 +760,10 @@ static inline unsigned int get_info_end(struct kmem_cache *s)
 		return s->inuse;
 }
 
-static struct track *get_track(struct kmem_cache *s, void *object,
+static struct track *get_track(struct kmem_cache *s, const void *object,
 	enum track_item alloc)
 {
-	struct track *p;
+	const struct track *p;
 
 	p = object + get_info_end(s);
 
@@ -841,11 +841,14 @@ static void print_track(const char *s, struct track *t, unsigned long pr_time)
 #endif
 }
 
-void print_tracking(struct kmem_cache *s, void *object)
+void print_tracking(struct kmem_cache *s, const void *object)
 {
 	unsigned long pr_time = jiffies;
-	if (!(s->flags & SLAB_STORE_USER))
+	if (!(s->flags & SLAB_STORE_USER)) {
+		pr_err("slub-print-tracking, STORE-USER not enabled, cache: %s flags: 0x%x\n",
+		       s->name, s->flags);
 		return;
+	}
 
 	print_track("Allocated", get_track(s, object, TRACK_ALLOC), pr_time);
 	print_track("Freed", get_track(s, object, TRACK_FREE), pr_time);
@@ -1712,6 +1715,29 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
 
 	return flags | slub_debug_local;
 }
+
+void slab_print_mem_info(const void *x)
+{
+	struct slab *slab;
+
+	if (unlikely(ZERO_OR_NULL_PTR(x)))
+		return;
+
+	slab = virt_to_slab(x);
+	if (!slab) {
+		pr_err("slub-print-mem-info, could not find slab for virt addr: %p\n",
+		       x);
+		return;
+	}
+
+	pr_err("slab-info, slab: %p\n", slab);
+	print_slab_info(slab);
+	pr_err("slab-tracking-info: cache: %s (%p)\n",
+	       slab->slab_cache->name, slab->slab_cache);
+	print_tracking(slab->slab_cache, x);
+	pr_err("end of slab-tracking-info\n");
+}
+
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s, void *object) {}
 static inline
-- 
2.41.0
Re: [PATCH/RFC] debugobjects/slub: Print slab info and backtrace.
Posted by kernel test robot 2 years, 1 month ago

Hello,

kernel test robot noticed "WARNING:possible_recursive_locking_detected" on:

commit: d132b3de19193c996402718d76e63f797d4259a9 ("[PATCH/RFC] debugobjects/slub:  Print slab info and backtrace.")
url: https://github.com/intel-lab-lkp/linux/commits/greearb-candelatech-com/debugobjects-slub-Print-slab-info-and-backtrace/20231103-093945
base: git://git.kernel.org/cgit/linux/kernel/git/vbabka/slab.git for-next
patch link: https://lore.kernel.org/all/20231103013704.1232723-1-greearb@candelatech.com/
patch subject: [PATCH/RFC] debugobjects/slub:  Print slab info and backtrace.

in testcase: boot

compiler: gcc-7
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


the issue doesn't always happen for this commit in our tests (shows 39 times
in 50 runs), but keeps clean on parent.

90f055df112162fd d132b3de19193c996402718d76e
---------------- ---------------------------
       fail:runs  %reproduction    fail:runs
           |             |             |
           :51          76%          39:50    dmesg.WARNING:possible_recursive_locking_detected
           :51          76%          39:50    dmesg.WARNING:possible_recursive_locking_detected_swapper_is_trying_to_acquire_lock:at:__debug_check_no_obj_freed_but_task_is_already_holding_lock:at:__debug_object_init/0x



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202311091331.f2eaf4f7-oliver.sang@intel.com


[    2.997328][    T1] WARNING: possible recursive locking detected
[    2.997927][    T1] 6.6.0-rc4-00006-gd132b3de1919 #1 Tainted: G                T
[    2.997927][    T1] --------------------------------------------
[    2.997927][    T1] swapper/1 is trying to acquire lock:
[ 2.997927][ T1] ffffffff85425790 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_check_no_obj_freed (lib/debugobjects.c:1057) 
[    2.997927][    T1]
[    2.997927][    T1] but task is already holding lock:
[ 2.997927][ T1] ffffffff8531abc0 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_object_init (lib/debugobjects.c:528 lib/debugobjects.c:667) 
[    2.997927][    T1]
[    2.997927][    T1] other info that might help us debug this:
[    2.997927][    T1]  Possible unsafe locking scenario:
[    2.997927][    T1]
[    2.997927][    T1]        CPU0
[    2.997927][    T1]        ----
[    2.997927][    T1]   lock(&obj_hash[i].lock);
[    2.997927][    T1]
[    2.997927][    T1]  *** DEADLOCK ***
[    2.997927][    T1]
[    2.997927][    T1]  May be due to missing lock nesting notation
[    2.997927][    T1]
[    2.997927][    T1] 1 lock held by swapper/1:
[ 2.997927][ T1] #0: ffffffff8531abc0 (&obj_hash[i].lock){..-.}-{2:2}, at: __debug_object_init (lib/debugobjects.c:528 lib/debugobjects.c:667) 
[    2.997927][    T1]
[    2.997927][    T1] stack backtrace:
[    2.997927][    T1] CPU: 0 PID: 1 Comm: swapper Tainted: G                T  6.6.0-rc4-00006-gd132b3de1919 #1 2ef50b864013db82ef063ed2d8014756ac6c0996
[    2.997927][    T1] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[    2.997927][    T1] Call Trace:
[    2.997927][    T1]  <TASK>
[ 2.997927][ T1] __lock_acquire (kernel/locking/lockdep.c:5137) 
[ 2.997927][ T1] ? local_clock (arch/x86/include/asm/preempt.h:85 kernel/sched/clock.c:316) 
[ 2.997927][ T1] ? lock_release (kernel/locking/lockdep.c:223 kernel/locking/lockdep.c:352 kernel/locking/lockdep.c:5435 kernel/locking/lockdep.c:5773) 
[ 2.997927][ T1] lock_acquire (kernel/locking/lockdep.c:467 kernel/locking/lockdep.c:5755) 
[ 2.997927][ T1] ? __debug_check_no_obj_freed (lib/debugobjects.c:1057) 
[ 2.997927][ T1] _raw_spin_lock_irqsave (include/linux/spinlock_api_smp.h:111 kernel/locking/spinlock.c:162) 
[ 2.997927][ T1] ? __debug_check_no_obj_freed (lib/debugobjects.c:1057) 
[ 2.997927][ T1] __debug_check_no_obj_freed (lib/debugobjects.c:1057) 
[ 2.997927][ T1] ? local_clock (arch/x86/include/asm/preempt.h:85 kernel/sched/clock.c:316) 
[ 2.997927][ T1] ? lock_release (kernel/locking/lockdep.c:223 kernel/locking/lockdep.c:352 kernel/locking/lockdep.c:5435 kernel/locking/lockdep.c:5773) 
[ 2.997927][ T1] free_unref_page_prepare (mm/page_alloc.c:1146 mm/page_alloc.c:2312) 
[ 2.997927][ T1] ? find_held_lock (kernel/locking/lockdep.c:5243) 
[ 2.997927][ T1] free_unref_page (mm/page_alloc.c:2405) 
[ 2.997927][ T1] __stack_depot_save (lib/stackdepot.c:443) 
[ 2.997927][ T1] set_track_prepare (mm/kmemleak.c:620) 
[ 2.997927][ T1] ? pm_runtime_init (drivers/base/power/runtime.c:1731) 
[ 2.997927][ T1] ? device_initialize (drivers/base/core.c:3102) 
[ 2.997927][ T1] ? device_register (drivers/base/core.c:3706) 
[ 2.997927][ T1] ? subsys_register (drivers/base/bus.c:1230) 
[ 2.997927][ T1] ? container_dev_init (drivers/base/container.c:39) 
[ 2.997927][ T1] ? kernel_init_freeable (init/main.c:1327 init/main.c:1547) 
[ 2.997927][ T1] ? kernel_init (init/main.c:1439) 
[ 2.997927][ T1] ? ret_from_fork (arch/x86/kernel/process.c:153) 
[ 2.997927][ T1] ? ret_from_fork_asm (arch/x86/entry/entry_64.S:312) 
[ 2.997927][ T1] lookup_object_or_alloc (lib/debugobjects.c:308 lib/debugobjects.c:624) 
[ 2.997927][ T1] __debug_object_init (lib/debugobjects.c:672) 
[ 2.997927][ T1] pm_runtime_init (drivers/base/power/runtime.c:1731) 
[ 2.997927][ T1] device_initialize (drivers/base/core.c:3102) 
[ 2.997927][ T1] device_register (drivers/base/core.c:3706) 
[ 2.997927][ T1] subsys_register (drivers/base/bus.c:1230) 
[ 2.997927][ T1] container_dev_init (drivers/base/container.c:39) 
[ 2.997927][ T1] kernel_init_freeable (init/main.c:1327 init/main.c:1547) 
[ 2.997927][ T1] ? wait_for_completion (kernel/sched/completion.c:149) 
[ 2.997927][ T1] ? rest_init (init/main.c:1429) 
[ 2.997927][ T1] kernel_init (init/main.c:1439) 
[ 2.997927][ T1] ret_from_fork (arch/x86/kernel/process.c:153) 
[ 2.997927][ T1] ? rest_init (init/main.c:1429) 
[ 2.997927][ T1] ret_from_fork_asm (arch/x86/entry/entry_64.S:312) 
[    2.997927][    T1]  </TASK>
[    3.000434][    T1] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
[    3.001966][    T1] futex hash table entries: 256 (order: 2, 24576 bytes, linear)



The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20231109/202311091331.f2eaf4f7-oliver.sang@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH/RFC] debugobjects/slub: Print slab info and backtrace.
Posted by Thomas Gleixner 2 years, 1 month ago
On Thu, Nov 02 2023 at 18:37, greearb@candelatech.com wrote:
> When debugobjects detects a problem, it may be in generic code
> where the backtraces currently printed are not helpful.
>
> To try to improve this, store the backtrace of where the
> debug_obj was created and print that out when problems
> are found.

To make debugobjects massively slower than it is already.

> Also print out slub info for the object held by the
> debug_obj.  In my particular case, this was not super
> useful, appearantly because of all of the KASAN and other
> debugging I have enabled.  Still, might provide a few
> clues.

So either it is useful or not. You could have disabled the other
debugging to figure this out, no?

Aside of that this is a separate issue and wants to be split out into a
separate patch if at all.

> diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
> index 32444686b6ff..8e8df719bd88 100644
> --- a/include/linux/debugobjects.h
> +++ b/include/linux/debugobjects.h
> @@ -31,6 +31,11 @@ struct debug_obj {
>  	unsigned int		astate;
>  	void			*object;
>  	const struct debug_obj_descr *descr;
> +#ifdef CONFIG_STACKDEPOT

If at all then this wants to be CONFIG_DEBUG_OBJECTS_STACKTRACE or such.

> +#define DEBUG_OBJ_ADDRS_COUNT 16

Don't glue a define into the struct. That's unreadable.

> +	/* Including stackdepot.h blows up the build, so open-code the handle. */

Seriously no. The obvious fix is to move 'struct debug_obj' into the C
file as it is not used anywhere else.

> +	u64 trace_handle;
> +#endif
>  };

> diff --git a/lib/debugobjects.c b/lib/debugobjects.c
> index a517256a270b..1f458e473bc5 100644
> --- a/lib/debugobjects.c
> +++ b/lib/debugobjects.c
> @@ -17,6 +17,8 @@
>  #include <linux/hash.h>
>  #include <linux/kmemleak.h>
>  #include <linux/cpu.h>
> +#include <linux/slab.h>
> +#include <linux/stackdepot.h>
>  
>  #define ODEBUG_HASH_BITS	14
>  #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
> @@ -216,6 +218,33 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
>  	return obj;
>  }
>  
> +#ifdef CONFIG_STACKDEPOT
> +static void debug_print_stack_info(struct debug_obj *object)
> +{
> +	if (object->trace_handle) {
> +		pr_err("debugobjects: debug_obj allocated at:\n");
> +		stack_depot_print(object->trace_handle);
> +		pr_err("end of stack dump\n");
> +	}
> +}
> +
> +static noinline depot_stack_handle_t set_track_prepare(void)
> +{
> +	depot_stack_handle_t trace_handle;
> +	unsigned long entries[DEBUG_OBJ_ADDRS_COUNT];
> +	unsigned int nr_entries;

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations

> +
> +	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
> +	trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
> +
> +	return trace_handle;
> +}
> +
> +#else
> +static void debug_print_stack_info(struct debug_obj *object) { }
> +static depot_stack_handle_t set_track_prepare(void) { }
> +#endif
> +
>  static struct debug_obj *
>  alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
>  {
> @@ -272,6 +301,12 @@ alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *d
>  		obj->state  = ODEBUG_STATE_NONE;
>  		obj->astate = 0;
>  		hlist_add_head(&obj->node, &b->list);
> +
> +		/* Save stack of where object was created */
> +#ifdef CONFIG_STACKDEPOT
> +		/* kernel backtrace */
> +		obj->trace_handle = set_track_prepare();

Handing the object pointer to the function spares the ugly ifdef.

Also what on earth means set_track_prepare()? If I hadn't seen the
function before then I'd had a hard time to decode this. Self
explanatory function names spare also commenting the obvious. I.e. this
whole thing boils down to:

      debug_save_stack_trace(obj);

or such.

Thanks,

        tglx