[PATCH] stackdepot: Make max number of pools build-time configurable

Matt Fleming posted 1 patch 4 months ago
There is a newer version of this series
lib/Kconfig      | 6 ++++++
lib/stackdepot.c | 9 ++++-----
2 files changed, 10 insertions(+), 5 deletions(-)
[PATCH] stackdepot: Make max number of pools build-time configurable
Posted by Matt Fleming 4 months ago
From: Matt Fleming <mfleming@cloudflare.com>

We're hitting the WARN in depot_init_pool() about reaching the stack
depot limit. My assumption is because we have long stacks that don't
dedup very well.

Introduce a new config to allow users to set the number of maximum stack
depot pools at build time. Also, turn the silent capping into a
build-time assert to provide more obvious feedback when users set this
value too high.

Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
---
 lib/Kconfig      | 6 ++++++
 lib/stackdepot.c | 9 ++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/lib/Kconfig b/lib/Kconfig
index b38849af6f13..2c5af89daff9 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -720,6 +720,12 @@ config STACKDEPOT_MAX_FRAMES
 	default 64
 	depends on STACKDEPOT
 
+config STACKDEPOT_MAX_POOLS
+	int "Maximum number of stack depot pools to store stack traces"
+	range 1024 131071
+	default 8192
+	depends on STACKDEPOT
+
 config REF_TRACKER
 	bool
 	depends on STACKTRACE_SUPPORT
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 245d5b416699..1c24230b4a37 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -36,11 +36,7 @@
 #include <linux/memblock.h>
 #include <linux/kasan-enabled.h>
 
-#define DEPOT_POOLS_CAP 8192
-/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
-#define DEPOT_MAX_POOLS \
-	(((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
-	 (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
+#define DEPOT_MAX_POOLS CONFIG_STACKDEPOT_MAX_POOLS
 
 static bool stack_depot_disabled;
 static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
@@ -245,6 +241,9 @@ static bool depot_init_pool(void **prealloc)
 {
 	lockdep_assert_held(&pool_lock);
 
+	/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
+	BUILD_BUG_ON((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_MAX_POOLS);
+
 	if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
 		/* Bail out if we reached the pool limit. */
 		WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
-- 
2.34.1
Re: [PATCH] stackdepot: Make max number of pools build-time configurable
Posted by Andrey Konovalov 4 months ago
On Tue, Jun 10, 2025 at 1:16 PM Matt Fleming <matt@readmodwrite.com> wrote:
>
> From: Matt Fleming <mfleming@cloudflare.com>
>
> We're hitting the WARN in depot_init_pool() about reaching the stack
> depot limit. My assumption is because we have long stacks that don't
> dedup very well.

Note that this might happen if filter_irq_stacks() somehow fails in your setup.

See e.g. this:

https://lore.kernel.org/all/CA+fCnZdWgAD1pu4yyjON0ph9ae1B6iaWas0CbET+MXLNNXt5Hg@mail.gmail.com/
https://lore.kernel.org/all/44140c34-e2bd-4f6e-892c-51469edc8dfb@redhat.com/

>
> Introduce a new config to allow users to set the number of maximum stack
> depot pools at build time. Also, turn the silent capping into a
> build-time assert to provide more obvious feedback when users set this
> value too high.
>
> Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
> ---
>  lib/Kconfig      | 6 ++++++
>  lib/stackdepot.c | 9 ++++-----
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/lib/Kconfig b/lib/Kconfig
> index b38849af6f13..2c5af89daff9 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -720,6 +720,12 @@ config STACKDEPOT_MAX_FRAMES
>         default 64
>         depends on STACKDEPOT
>
> +config STACKDEPOT_MAX_POOLS
> +       int "Maximum number of stack depot pools to store stack traces"
> +       range 1024 131071
> +       default 8192
> +       depends on STACKDEPOT
> +
>  config REF_TRACKER
>         bool
>         depends on STACKTRACE_SUPPORT
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 245d5b416699..1c24230b4a37 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -36,11 +36,7 @@
>  #include <linux/memblock.h>
>  #include <linux/kasan-enabled.h>
>
> -#define DEPOT_POOLS_CAP 8192
> -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
> -#define DEPOT_MAX_POOLS \
> -       (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
> -        (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
> +#define DEPOT_MAX_POOLS CONFIG_STACKDEPOT_MAX_POOLS
>
>  static bool stack_depot_disabled;
>  static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
> @@ -245,6 +241,9 @@ static bool depot_init_pool(void **prealloc)
>  {
>         lockdep_assert_held(&pool_lock);
>
> +       /* The pool_index is offset by 1 so the first record does not have a 0 handle. */
> +       BUILD_BUG_ON((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_MAX_POOLS);
> +
>         if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
>                 /* Bail out if we reached the pool limit. */
>                 WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
> --
> 2.34.1
>
Re: [PATCH] stackdepot: Make max number of pools build-time configurable
Posted by Matt Fleming 4 months ago
On Tue, Jun 10, 2025 at 5:09 PM Andrey Konovalov <andreyknvl@gmail.com> wrote:
>
> On Tue, Jun 10, 2025 at 1:16 PM Matt Fleming <matt@readmodwrite.com> wrote:
> >
> > From: Matt Fleming <mfleming@cloudflare.com>
> >
> > We're hitting the WARN in depot_init_pool() about reaching the stack
> > depot limit. My assumption is because we have long stacks that don't
> > dedup very well.
>
> Note that this might happen if filter_irq_stacks() somehow fails in your setup.
>
> See e.g. this:
>
> https://lore.kernel.org/all/CA+fCnZdWgAD1pu4yyjON0ph9ae1B6iaWas0CbET+MXLNNXt5Hg@mail.gmail.com/
> https://lore.kernel.org/all/44140c34-e2bd-4f6e-892c-51469edc8dfb@redhat.com/

Thanks for the tip. I'll double-check if we're hitting a similar issue
before sending a v2.
Re: [PATCH] stackdepot: Make max number of pools build-time configurable
Posted by Matt Fleming 3 months, 1 week ago
On Wed, Jun 11, 2025 at 1:21 PM Matt Fleming <matt@readmodwrite.com> wrote:
>
> On Tue, Jun 10, 2025 at 5:09 PM Andrey Konovalov <andreyknvl@gmail.com> wrote:
> >
> > On Tue, Jun 10, 2025 at 1:16 PM Matt Fleming <matt@readmodwrite.com> wrote:
> > >
> > > From: Matt Fleming <mfleming@cloudflare.com>
> > >
> > > We're hitting the WARN in depot_init_pool() about reaching the stack
> > > depot limit. My assumption is because we have long stacks that don't
> > > dedup very well.
> >
> > Note that this might happen if filter_irq_stacks() somehow fails in your setup.
> >
> > See e.g. this:
> >
> > https://lore.kernel.org/all/CA+fCnZdWgAD1pu4yyjON0ph9ae1B6iaWas0CbET+MXLNNXt5Hg@mail.gmail.com/
> > https://lore.kernel.org/all/44140c34-e2bd-4f6e-892c-51469edc8dfb@redhat.com/
>
> Thanks for the tip. I'll double-check if we're hitting a similar issue
> before sending a v2.

Sorry for the delay.

filter_irq_stacks() seems to be working just fine. But I did notice
that we're doing a lot of stuff in softirqs and that might be the
cause of so many unique stack traces, e.g.

kasan_save_stack+0x27/0x4f
__kasan_record_aux_stack+0xad/0xbf
__call_rcu_common.constprop.0+0x70/0x7bf
kmem_cache_free+0x36e/0x55f
__sk_destruct+0x3f8/0x5bf
tcp_v6_rcv+0x31c3/0x3cef
ip6_protocol_deliver_rcu+0x12d/0x115f
ip6_input_finish+0xe8/0x15f
ip6_input+0xca/0x1cf
ipv6_rcv+0x307/0x38f
__netif_receive_skb_one_core+0x11f/0x1af
process_backlog+0x1d2/0x5df
__napi_poll+0xa2/0x43f
__init_scratch_end+0x70a7b79/0x98d00df
__napi_poll+0x5/0x43f
net_rx_action+0x75f/0x18af
handle_softirqs+0x15e/0x52f
do_softirq+0x40/0x5f
__local_bh_enable_ip+0x64/0x6f
__dev_queue_xmit+0x8aa/0x30ff
ip6_finish_output2+0xb96/0x1b2f
ip6_finish_output+0x5d1/0x97f
ip6_output+0x1d6/0x3cf
ip6_xmit+0xaf4/0x199f
inet6_csk_xmit+0x2cf/0x44f
__tcp_transmit_skb+0x1826/0x3bdf
tcp_write_xmit+0x1610/0x794f
__tcp_push_pending_frames+0x94/0x2df
inet_shutdown+0x247/0x31f
__sys_shutdown+0xe9/0x19f
__x64_sys_shutdown+0x53/0x7f
do_syscall_64+0x4b/0x10f
entry_SYSCALL_64_after_hwframe+0x76/0x7d

In fact, nearly 25% of the recorded stacks feature do_softirq(). I
don't think we want to filter these like we do for hard irqs though,
because the information is helpful for debugging.

Thanks,
Matt
Re: [PATCH] stackdepot: Make max number of pools build-time configurable
Posted by Marco Elver 4 months ago
On Tue, 10 Jun 2025 at 13:16, Matt Fleming <matt@readmodwrite.com> wrote:
>
> From: Matt Fleming <mfleming@cloudflare.com>
>
> We're hitting the WARN in depot_init_pool() about reaching the stack
> depot limit. My assumption is because we have long stacks that don't
> dedup very well.
>
> Introduce a new config to allow users to set the number of maximum stack
> depot pools at build time. Also, turn the silent capping into a
> build-time assert to provide more obvious feedback when users set this
> value too high.
>
> Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
> ---
>  lib/Kconfig      | 6 ++++++
>  lib/stackdepot.c | 9 ++++-----
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/lib/Kconfig b/lib/Kconfig
> index b38849af6f13..2c5af89daff9 100644
> --- a/lib/Kconfig
> +++ b/lib/Kconfig
> @@ -720,6 +720,12 @@ config STACKDEPOT_MAX_FRAMES
>         default 64
>         depends on STACKDEPOT
>
> +config STACKDEPOT_MAX_POOLS
> +       int "Maximum number of stack depot pools to store stack traces"

I wouldn't want most kernel configs change this. Can we make it an
expert option? I.e.

  int "Maximum number of stack depot pools to store stack traces" if EXPERT

> +       range 1024 131071
> +       default 8192
> +       depends on STACKDEPOT
> +
>  config REF_TRACKER
>         bool
>         depends on STACKTRACE_SUPPORT
> diff --git a/lib/stackdepot.c b/lib/stackdepot.c
> index 245d5b416699..1c24230b4a37 100644
> --- a/lib/stackdepot.c
> +++ b/lib/stackdepot.c
> @@ -36,11 +36,7 @@
>  #include <linux/memblock.h>
>  #include <linux/kasan-enabled.h>
>
> -#define DEPOT_POOLS_CAP 8192
> -/* The pool_index is offset by 1 so the first record does not have a 0 handle. */
> -#define DEPOT_MAX_POOLS \
> -       (((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_POOLS_CAP) ? \
> -        (1LL << (DEPOT_POOL_INDEX_BITS)) - 1 : DEPOT_POOLS_CAP)
> +#define DEPOT_MAX_POOLS CONFIG_STACKDEPOT_MAX_POOLS
>
>  static bool stack_depot_disabled;
>  static bool __stack_depot_early_init_requested __initdata = IS_ENABLED(CONFIG_STACKDEPOT_ALWAYS_INIT);
> @@ -245,6 +241,9 @@ static bool depot_init_pool(void **prealloc)
>  {
>         lockdep_assert_held(&pool_lock);
>
> +       /* The pool_index is offset by 1 so the first record does not have a 0 handle. */
> +       BUILD_BUG_ON((1LL << (DEPOT_POOL_INDEX_BITS)) - 1 < DEPOT_MAX_POOLS);

This is a constant expression, and could be a static_assert() (with
inverted condition) next to the '#define DEPOT_MAX_POOLS'.
BUILD_BUG_ON() is a bit less efficient (compile-time wise), and cannot
be substituted with static_assert() only if part of the expression is
a non-constant expression yet we rely on the compiler to optimize the
condition into a constant if there's a bug.

>         if (unlikely(pools_num >= DEPOT_MAX_POOLS)) {
>                 /* Bail out if we reached the pool limit. */
>                 WARN_ON_ONCE(pools_num > DEPOT_MAX_POOLS); /* should never happen */
> --
> 2.34.1
>