[PATCH] mm/slub: initialize stack depot in boot process

Hyeonggon Yoo posted 1 patch 4 years, 3 months ago
There is a newer version of this series
include/linux/slab.h |  1 +
init/main.c          |  1 +
mm/slab.c            |  4 ++++
mm/slob.c            |  4 ++++
mm/slub.c            | 28 +++++++++++++++++++++++++---
5 files changed, 35 insertions(+), 3 deletions(-)
[PATCH] mm/slub: initialize stack depot in boot process
Posted by Hyeonggon Yoo 4 years, 3 months ago
commit ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in
objects") initializes stack depot while creating cache if SLAB_STORE_USER
flag is set.

This can make kernel crash because a cache can be created in various
contexts. For example if user sets slub_debug=U, kernel crashes
because create_boot_cache() calls stack_depot_init(), which tries to
allocate hash table using memblock_alloc() if slab is not available.
But memblock is also not available at that time.

This patch solves the problem by initializing stack depot early
in boot process if SLAB_STORE_USER debug flag is set globally
or the flag is set to at least one cache.

[ elver@google.com: initialize stack depot depending on slub_debug
  parameter instead of allowing stack_depot_init() can be called
  in kmem_cache_init() for simplicity. ]

Link: https://lkml.org/lkml/2022/2/28/238
Fixes: ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in objects")
Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
---
 include/linux/slab.h |  1 +
 init/main.c          |  1 +
 mm/slab.c            |  4 ++++
 mm/slob.c            |  4 ++++
 mm/slub.c            | 28 +++++++++++++++++++++++++---
 5 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 37bde99b74af..023f3f71ae35 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -139,6 +139,7 @@ struct mem_cgroup;
 /*
  * struct kmem_cache related prototypes
  */
+void __init kmem_cache_init_early(void);
 void __init kmem_cache_init(void);
 bool slab_is_available(void);
 
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c0..4fdb7975a085 100644
--- a/init/main.c
+++ b/init/main.c
@@ -835,6 +835,7 @@ static void __init mm_init(void)
 	kfence_alloc_pool();
 	report_meminit();
 	stack_depot_early_init();
+	kmem_cache_init_early();
 	mem_init();
 	mem_init_print_info();
 	kmem_cache_init();
diff --git a/mm/slab.c b/mm/slab.c
index ddf5737c63d9..80a6d01aab06 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1196,6 +1196,10 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 	}
 }
 
+void __init kmem_cache_init_early(void)
+{
+}
+
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
diff --git a/mm/slob.c b/mm/slob.c
index 60c5842215f1..00e323af8be4 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -715,6 +715,10 @@ struct kmem_cache kmem_cache_boot = {
 	.align = ARCH_KMALLOC_MINALIGN,
 };
 
+void __init kmem_cache_init_early(void)
+{
+}
+
 void __init kmem_cache_init(void)
 {
 	kmem_cache = &kmem_cache_boot;
diff --git a/mm/slub.c b/mm/slub.c
index a74afe59a403..40bcd18143b6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4221,9 +4221,6 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 	s->remote_node_defrag_ratio = 1000;
 #endif
 
-	if (s->flags & SLAB_STORE_USER && IS_ENABLED(CONFIG_STACKDEPOT))
-		stack_depot_init();
-
 	/* Initialize the pre-computed randomized freelist if slab is up */
 	if (slab_state >= UP) {
 		if (init_cache_random_seq(s))
@@ -4810,6 +4807,31 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 	return s;
 }
 
+/* Initialize stack depot if needed */
+void __init kmem_cache_init_early(void)
+{
+#ifdef CONFIG_STACKDEPOT
+	slab_flags_t block_flags;
+	char *next_block;
+	char *slab_list;
+
+	if (slub_debug & SLAB_STORE_USER)
+		goto init_stack_depot;
+
+	next_block = slub_debug_string;
+	while (next_block) {
+		next_block = parse_slub_debug_flags(next_block, &block_flags, &slab_list, false);
+		if (block_flags & SLAB_STORE_USER)
+			goto init_stack_depot;
+	}
+
+	return;
+
+init_stack_depot:
+	stack_depot_init();
+#endif
+}
+
 void __init kmem_cache_init(void)
 {
 	static __initdata struct kmem_cache boot_kmem_cache,
-- 
2.33.1
Re: [PATCH] mm/slub: initialize stack depot in boot process
Posted by Marco Elver 4 years, 3 months ago
On Mon, Feb 28, 2022 at 03:09PM +0000, Hyeonggon Yoo wrote:
> commit ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in
> objects") initializes stack depot while creating cache if SLAB_STORE_USER
> flag is set.
> 
> This can make kernel crash because a cache can be created in various
> contexts. For example if user sets slub_debug=U, kernel crashes
> because create_boot_cache() calls stack_depot_init(), which tries to
> allocate hash table using memblock_alloc() if slab is not available.
> But memblock is also not available at that time.
> 
> This patch solves the problem by initializing stack depot early
> in boot process if SLAB_STORE_USER debug flag is set globally
> or the flag is set to at least one cache.
> 
> [ elver@google.com: initialize stack depot depending on slub_debug
>   parameter instead of allowing stack_depot_init() can be called
>   in kmem_cache_init() for simplicity. ]
> 
> Link: https://lkml.org/lkml/2022/2/28/238

This would be a better permalink:
https://lore.kernel.org/all/YhyeaP8lrzKgKm5A@ip-172-31-19-208.ap-northeast-1.compute.internal/

> Fixes: ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in objects")

This commit does not exist in -next.

I assume you intend that "lib/stackdepot: Use page allocator if both
slab and memblock is unavailable" should be dropped now.

> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> ---
>  include/linux/slab.h |  1 +
>  init/main.c          |  1 +
>  mm/slab.c            |  4 ++++
>  mm/slob.c            |  4 ++++
>  mm/slub.c            | 28 +++++++++++++++++++++++++---
>  5 files changed, 35 insertions(+), 3 deletions(-)
[...]
>  
> +/* Initialize stack depot if needed */
> +void __init kmem_cache_init_early(void)
> +{
> +#ifdef CONFIG_STACKDEPOT
> +	slab_flags_t block_flags;
> +	char *next_block;
> +	char *slab_list;
> +
> +	if (slub_debug & SLAB_STORE_USER)
> +		goto init_stack_depot;
> +
> +	next_block = slub_debug_string;
> +	while (next_block) {
> +		next_block = parse_slub_debug_flags(next_block, &block_flags, &slab_list, false);
> +		if (block_flags & SLAB_STORE_USER)
> +			goto init_stack_depot;
> +	}
> +
> +	return;
> +
> +init_stack_depot:
> +	stack_depot_init();
> +#endif
> +}

You can simplify this function to avoid the goto:

	/* Initialize stack depot if needed */
	void __init kmem_cache_init_early(void)
	{
	#ifdef CONFIG_STACKDEPOT
		slab_flags_t flags = slub_debug;
		char *next_block = slub_debug_string;
		char *slab_list;
	
		for (;;) {
			if (flags & SLAB_STORE_USER) {
				stack_depot_init();
				break;
			}
			if (!next_block)
				break;
			next_block = parse_slub_debug_flags(next_block, &flags, &slab_list, false);
		}
	#endif
	}

^^ with this version, it'd also be much easier and less confusing to add
other initialization logic unrelated to stackdepot later after the loop
(should it ever be required).
Re: [PATCH] mm/slub: initialize stack depot in boot process
Posted by Hyeonggon Yoo 4 years, 3 months ago
On Mon, Feb 28, 2022 at 05:28:17PM +0100, Marco Elver wrote:
> On Mon, Feb 28, 2022 at 03:09PM +0000, Hyeonggon Yoo wrote:
> > commit ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in
> > objects") initializes stack depot while creating cache if SLAB_STORE_USER
> > flag is set.
> > 
> > This can make kernel crash because a cache can be created in various
> > contexts. For example if user sets slub_debug=U, kernel crashes
> > because create_boot_cache() calls stack_depot_init(), which tries to
> > allocate hash table using memblock_alloc() if slab is not available.
> > But memblock is also not available at that time.
> > 
> > This patch solves the problem by initializing stack depot early
> > in boot process if SLAB_STORE_USER debug flag is set globally
> > or the flag is set to at least one cache.
> > 
> > [ elver@google.com: initialize stack depot depending on slub_debug
> >   parameter instead of allowing stack_depot_init() can be called
> >   in kmem_cache_init() for simplicity. ]
> > 
> > Link: https://lkml.org/lkml/2022/2/28/238
> 
> This would be a better permalink:
> https://lore.kernel.org/all/YhyeaP8lrzKgKm5A@ip-172-31-19-208.ap-northeast-1.compute.internal/
>

Agreed.

> > Fixes: ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in objects")
> 
> This commit does not exist in -next.
> 

It did not land -next yet.

> I assume you intend that "lib/stackdepot: Use page allocator if both
> slab and memblock is unavailable" should be dropped now.
>

I did not intend that, but I agree the patch you mentioned
should be dropped now.

> > Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> > ---
> >  include/linux/slab.h |  1 +
> >  init/main.c          |  1 +
> >  mm/slab.c            |  4 ++++
> >  mm/slob.c            |  4 ++++
> >  mm/slub.c            | 28 +++++++++++++++++++++++++---
> >  5 files changed, 35 insertions(+), 3 deletions(-)
> [...]
> >  
> > +/* Initialize stack depot if needed */
> > +void __init kmem_cache_init_early(void)
> > +{
> > +#ifdef CONFIG_STACKDEPOT
> > +	slab_flags_t block_flags;
> > +	char *next_block;
> > +	char *slab_list;
> > +
> > +	if (slub_debug & SLAB_STORE_USER)
> > +		goto init_stack_depot;
> > +
> > +	next_block = slub_debug_string;
> > +	while (next_block) {
> > +		next_block = parse_slub_debug_flags(next_block, &block_flags, &slab_list, false);
> > +		if (block_flags & SLAB_STORE_USER)
> > +			goto init_stack_depot;
> > +	}
> > +
> > +	return;
> > +
> > +init_stack_depot:
> > +	stack_depot_init();
> > +#endif
> > +}
> 
> You can simplify this function to avoid the goto:
> 
> 	/* Initialize stack depot if needed */
> 	void __init kmem_cache_init_early(void)
> 	{
> 	#ifdef CONFIG_STACKDEPOT
> 		slab_flags_t flags = slub_debug;
> 		char *next_block = slub_debug_string;
> 		char *slab_list;
> 	
> 		for (;;) {
> 			if (flags & SLAB_STORE_USER) {
> 				stack_depot_init();
> 				break;
> 			}
> 			if (!next_block)
> 				break;
> 			next_block = parse_slub_debug_flags(next_block, &flags, &slab_list, false);
> 		}
> 	#endif
> 	}
> 
> ^^ with this version, it'd also be much easier and less confusing to add
> other initialization logic unrelated to stackdepot later after the loop
> (should it ever be required).

Thank you for nice suggestion, but I want to try it in
setup_slub_debug() as Vlastimil said!

Thanks.

-- 
Thank you, You are awesome!
Hyeonggon :-)
Re: [PATCH] mm/slub: initialize stack depot in boot process
Posted by Vlastimil Babka 4 years, 3 months ago
On 2/28/22 16:09, Hyeonggon Yoo wrote:
> commit ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in
> objects") initializes stack depot while creating cache if SLAB_STORE_USER
> flag is set.
> 
> This can make kernel crash because a cache can be created in various
> contexts. For example if user sets slub_debug=U, kernel crashes
> because create_boot_cache() calls stack_depot_init(), which tries to
> allocate hash table using memblock_alloc() if slab is not available.
> But memblock is also not available at that time.
> 
> This patch solves the problem by initializing stack depot early
> in boot process if SLAB_STORE_USER debug flag is set globally
> or the flag is set to at least one cache.
> 
> [ elver@google.com: initialize stack depot depending on slub_debug
>   parameter instead of allowing stack_depot_init() can be called
>   in kmem_cache_init() for simplicity. ]
> 
> Link: https://lkml.org/lkml/2022/2/28/238
> Fixes: ba10d4b46655 ("mm/slub: use stackdepot to save stack trace in objects")
> Signed-off-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>

I think a much easier approach would be to do this checking in
setup_slub_debug(). There we may either detect SLAB_STORE_USER in
global_flags, or check the flags returned by parse_slub_debug_flags() in the
while (str) cycle, in the 'else' case where slab_list is present. Both cases
would just set some variable that stack_depot_early_init() (the
!CONFIG_STACKDEPOT_ALWAYS_INIT version, or a newly consolidated one) would
check. So that would be another way to request the stack_depot_init() at a
well-defined point of boot, similar to CONFIG_STACKDEPOT_ALWAYS_INIT.
Because setup_slub_debug() is called by __setup, which is processed from
start_kernel() -> parse_args() before mm_init() -> stack_depot_early_init().

> ---
>  include/linux/slab.h |  1 +
>  init/main.c          |  1 +
>  mm/slab.c            |  4 ++++
>  mm/slob.c            |  4 ++++
>  mm/slub.c            | 28 +++++++++++++++++++++++++---
>  5 files changed, 35 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 37bde99b74af..023f3f71ae35 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -139,6 +139,7 @@ struct mem_cgroup;
>  /*
>   * struct kmem_cache related prototypes
>   */
> +void __init kmem_cache_init_early(void);
>  void __init kmem_cache_init(void);
>  bool slab_is_available(void);
>  
> diff --git a/init/main.c b/init/main.c
> index 65fa2e41a9c0..4fdb7975a085 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -835,6 +835,7 @@ static void __init mm_init(void)
>  	kfence_alloc_pool();
>  	report_meminit();
>  	stack_depot_early_init();
> +	kmem_cache_init_early();
>  	mem_init();
>  	mem_init_print_info();
>  	kmem_cache_init();
> diff --git a/mm/slab.c b/mm/slab.c
> index ddf5737c63d9..80a6d01aab06 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -1196,6 +1196,10 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
>  	}
>  }
>  
> +void __init kmem_cache_init_early(void)
> +{
> +}
> +
>  /*
>   * Initialisation.  Called after the page allocator have been initialised and
>   * before smp_init().
> diff --git a/mm/slob.c b/mm/slob.c
> index 60c5842215f1..00e323af8be4 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -715,6 +715,10 @@ struct kmem_cache kmem_cache_boot = {
>  	.align = ARCH_KMALLOC_MINALIGN,
>  };
>  
> +void __init kmem_cache_init_early(void)
> +{
> +}
> +
>  void __init kmem_cache_init(void)
>  {
>  	kmem_cache = &kmem_cache_boot;
> diff --git a/mm/slub.c b/mm/slub.c
> index a74afe59a403..40bcd18143b6 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -4221,9 +4221,6 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
>  	s->remote_node_defrag_ratio = 1000;
>  #endif
>  
> -	if (s->flags & SLAB_STORE_USER && IS_ENABLED(CONFIG_STACKDEPOT))
> -		stack_depot_init();
> -
>  	/* Initialize the pre-computed randomized freelist if slab is up */
>  	if (slab_state >= UP) {
>  		if (init_cache_random_seq(s))
> @@ -4810,6 +4807,31 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
>  	return s;
>  }
>  
> +/* Initialize stack depot if needed */
> +void __init kmem_cache_init_early(void)
> +{
> +#ifdef CONFIG_STACKDEPOT
> +	slab_flags_t block_flags;
> +	char *next_block;
> +	char *slab_list;
> +
> +	if (slub_debug & SLAB_STORE_USER)
> +		goto init_stack_depot;
> +
> +	next_block = slub_debug_string;
> +	while (next_block) {
> +		next_block = parse_slub_debug_flags(next_block, &block_flags, &slab_list, false);
> +		if (block_flags & SLAB_STORE_USER)
> +			goto init_stack_depot;
> +	}
> +
> +	return;
> +
> +init_stack_depot:
> +	stack_depot_init();
> +#endif
> +}
> +
>  void __init kmem_cache_init(void)
>  {
>  	static __initdata struct kmem_cache boot_kmem_cache,