Since commit 4f6923fbb352 ("mm: make should_failslab always available for
fault injection") should_failslab() is unconditionally a noinline
function. This adds visible overhead to the slab allocation hotpath,
even if the function is empty. With CONFIG_FAILSLAB=y there's additional
overhead, even when the functionality is not activated by a boot
parameter or via debugfs.
The overhead can be eliminated with a static key around the callsite.
Fault injection and error injection frameworks including bpf can now be
told that this function has a static key associated, and are able to
enable and disable it accordingly.
Additionally, compile out all relevant code if neither CONFIG_FAILSLAB
nor CONFIG_FUNCTION_ERROR_INJECTION is enabled. When only the latter is
not enabled, make should_failslab() static inline instead of noinline.
To demonstrate the reduced overhead of calling an empty
should_failslab() function, a kernel build with
CONFIG_FUNCTION_ERROR_INJECTION enabled but CONFIG_FAILSLAB disabled,
and CPU mitigations enabled, was used in a qemu-kvm (virtme-ng) on AMD
Ryzen 7 2700 machine, and execution of a program trying to open() a
non-existent file was measured 3 times:
for (int i = 0; i < 10000000; i++) {
open("non_existent", O_RDONLY);
}
After this patch, the measured real time was 4.3% smaller. Using perf
profiling it was verified that should_failslab was gone from the
profile.
With CONFIG_FAILSLAB also enabled, the patched kernel performace was
unaffected, as expected, while unpatched kernel's performance was worse,
resulting in the relative speedup being 10.5%. This means it no longer
needs to be an option suitable only for debug kernel builds.
Acked-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
include/linux/fault-inject.h | 4 +++-
mm/failslab.c | 2 +-
mm/slab.h | 3 +++
mm/slub.c | 30 +++++++++++++++++++++++++++---
4 files changed, 34 insertions(+), 5 deletions(-)
diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index cfe75cc1bac4..0d0fa94dc1c8 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -107,9 +107,11 @@ static inline bool __should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
}
#endif /* CONFIG_FAIL_PAGE_ALLOC */
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
int should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+#endif
#ifdef CONFIG_FAILSLAB
-extern bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
+bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags);
#else
static inline bool __should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
diff --git a/mm/failslab.c b/mm/failslab.c
index ffc420c0e767..878fd08e5dac 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -9,7 +9,7 @@ static struct {
bool ignore_gfp_reclaim;
bool cache_filter;
} failslab = {
- .attr = FAULT_ATTR_INITIALIZER,
+ .attr = FAULT_ATTR_INITIALIZER_KEY(&should_failslab_active.key),
.ignore_gfp_reclaim = true,
.cache_filter = false,
};
diff --git a/mm/slab.h b/mm/slab.h
index 5f8f47c5bee0..792e19cb37b8 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -11,6 +11,7 @@
#include <linux/memcontrol.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
+#include <linux/jump_label.h>
/*
* Internal slab definitions
@@ -160,6 +161,8 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t)
*/
#define slab_page(s) folio_page(slab_folio(s), 0)
+DECLARE_STATIC_KEY_FALSE(should_failslab_active);
+
/*
* If network-based swap is enabled, sl*b must keep track of whether pages
* were allocated from pfmemalloc reserves.
diff --git a/mm/slub.c b/mm/slub.c
index 0809760cf789..11980aa94631 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
0, sizeof(void *));
}
-noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
+#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
+DEFINE_STATIC_KEY_FALSE(should_failslab_active);
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+noinline
+#else
+static inline
+#endif
+int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
{
if (__should_failslab(s, gfpflags))
return -ENOMEM;
return 0;
}
-ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
+ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, &should_failslab_active);
+
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+ gfp_t gfp)
+{
+ if (static_branch_unlikely(&should_failslab_active))
+ return should_failslab(s, gfp);
+ else
+ return 0;
+}
+#else
+static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
+ gfp_t gfp)
+{
+ return false;
+}
+#endif
static __fastpath_inline
struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
@@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
might_alloc(flags);
- if (unlikely(should_failslab(s, flags)))
+ if (should_failslab_wrapped(s, flags))
return NULL;
return s;
--
2.45.2
On 6/20/24 12:49 AM, Vlastimil Babka wrote:
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
> 0, sizeof(void *));
> }
>
> -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB)
> +DEFINE_STATIC_KEY_FALSE(should_failslab_active);
> +
> +#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> +noinline
> +#else
> +static inline
> +#endif
> +int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
Note that it has been found that (regardless of this series) gcc may clone
this to a should_failslab.constprop.0 in case the function is empty because
__should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline"
doesn't help - the original function stays but only the clone is actually
being called, thus overriding the original function achieves nothing, see:
https://github.com/bpftrace/bpftrace/issues/3258
So we could use __noclone to prevent that, and I was thinking by adding
something this to error-injection.h:
#ifdef CONFIG_FUNCTION_ERROR_INJECTION
#define __error_injectable(alternative) noinline __noclone
#else
#define __error_injectable(alternative) alternative
#endif
and the usage here would be:
__error_injectable(static inline) int should_failslab(...)
Does that look acceptable, or is it too confusing that "static inline" is
specified there as the storage class to use when error injection is actually
disabled?
> {
> if (__should_failslab(s, gfpflags))
> return -ENOMEM;
> return 0;
> }
> -ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
> +ALLOW_ERROR_INJECTION_KEY(should_failslab, ERRNO, &should_failslab_active);
> +
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> + gfp_t gfp)
> +{
> + if (static_branch_unlikely(&should_failslab_active))
> + return should_failslab(s, gfp);
> + else
> + return 0;
> +}
> +#else
> +static __always_inline int should_failslab_wrapped(struct kmem_cache *s,
> + gfp_t gfp)
> +{
> + return false;
> +}
> +#endif
>
> static __fastpath_inline
> struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
> @@ -3889,7 +3913,7 @@ struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
>
> might_alloc(flags);
>
> - if (unlikely(should_failslab(s, flags)))
> + if (should_failslab_wrapped(s, flags))
> return NULL;
>
> return s;
>
On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 6/20/24 12:49 AM, Vlastimil Babka wrote: > > --- a/mm/slub.c > > +++ b/mm/slub.c > > @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, > > 0, sizeof(void *)); > > } > > > > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) > > +DEFINE_STATIC_KEY_FALSE(should_failslab_active); > > + > > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION > > +noinline > > +#else > > +static inline > > +#endif > > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) > > Note that it has been found that (regardless of this series) gcc may clone > this to a should_failslab.constprop.0 in case the function is empty because > __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline" > doesn't help - the original function stays but only the clone is actually > being called, thus overriding the original function achieves nothing, see: > https://github.com/bpftrace/bpftrace/issues/3258 > > So we could use __noclone to prevent that, and I was thinking by adding > something this to error-injection.h: > > #ifdef CONFIG_FUNCTION_ERROR_INJECTION > #define __error_injectable(alternative) noinline __noclone To prevent such compiler transformations we typically use __used noinline We didn't have a need for __noclone yet. If __used is enough I'd stick to that.
On 6/25/24 7:12 PM, Alexei Starovoitov wrote: > On Tue, Jun 25, 2024 at 7:24 AM Vlastimil Babka <vbabka@suse.cz> wrote: >> >> On 6/20/24 12:49 AM, Vlastimil Babka wrote: >> > --- a/mm/slub.c >> > +++ b/mm/slub.c >> > @@ -3874,13 +3874,37 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, >> > 0, sizeof(void *)); >> > } >> > >> > -noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> > +#if defined(CONFIG_FUNCTION_ERROR_INJECTION) || defined(CONFIG_FAILSLAB) >> > +DEFINE_STATIC_KEY_FALSE(should_failslab_active); >> > + >> > +#ifdef CONFIG_FUNCTION_ERROR_INJECTION >> > +noinline >> > +#else >> > +static inline >> > +#endif >> > +int should_failslab(struct kmem_cache *s, gfp_t gfpflags) >> >> Note that it has been found that (regardless of this series) gcc may clone >> this to a should_failslab.constprop.0 in case the function is empty because >> __should_failslab is compiled out (CONFIG_FAILSLAB=n). The "noinline" >> doesn't help - the original function stays but only the clone is actually >> being called, thus overriding the original function achieves nothing, see: >> https://github.com/bpftrace/bpftrace/issues/3258 >> >> So we could use __noclone to prevent that, and I was thinking by adding >> something this to error-injection.h: >> >> #ifdef CONFIG_FUNCTION_ERROR_INJECTION >> #define __error_injectable(alternative) noinline __noclone > > To prevent such compiler transformations we typically use > __used noinline > > We didn't have a need for __noclone yet. If __used is enough I'd stick to that. __used made no difference here (gcc 13.3), __noclone did
© 2016 - 2026 Red Hat, Inc.