mm/slub.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-)
clang ignores the inline attribute because it thinks cache_from_obj()
is too big.
Moves the slow path in a separate function (__cache_from_obj())
and use __fastpath_inline to please clang and CONFIG_SLUB_TINY configs.
This makes kmem_cache_free() and build_detached_freelist()
slightly faster.
$ size mm/slub.clang.before.o mm/slub.clang.after.o
text data bss dec hex filename
77716 7657 4208 89581 15ded mm/slub.clang.before.o
77766 7673 4208 89647 15e2f mm/slub.clang.after.o
$ scripts/bloat-o-meter -t mm/slub.clang.before.o mm/slub.clang.after.o
Function old new delta
__cache_from_obj - 211 +211
build_detached_freelist 542 569 +27
kmem_cache_free 896 919 +23
cache_from_obj 229 - -229
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
mm/slub.c | 18 +++++++++++-------
1 file changed, 11 insertions(+), 7 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 861592ac54257b9d148ff921e6d8f62aced607b3..88a842411c5c3d770ff0070b592f745832d13d1a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6748,15 +6748,10 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
return slab->slab_cache;
}
-static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
+static struct kmem_cache *__cache_from_obj(struct kmem_cache *s, void *x)
{
- struct kmem_cache *cachep;
+ struct kmem_cache *cachep = virt_to_cache(x);
- if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
- !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
- return s;
-
- cachep = virt_to_cache(x);
if (WARN(cachep && cachep != s,
"%s: Wrong slab cache. %s but object is from %s\n",
__func__, s->name, cachep->name))
@@ -6764,6 +6759,15 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
return cachep;
}
+static __fastpath_inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
+{
+ if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
+ !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
+ return s;
+
+ return __cache_from_obj(s, x);
+}
+
/**
* kmem_cache_free - Deallocate an object
* @s: The cache the allocation was from.
base-commit: 944aacb68baf7624ab8d277d0ebf07f025ca137c
--
2.52.0.457.g6b5491de43-goog
On 1/15/26 14:06, Eric Dumazet wrote:
> clang ignores the inline attribute because it thinks cache_from_obj()
> is too big.
>
> Moves the slow path in a separate function (__cache_from_obj())
> and use __fastpath_inline to please clang and CONFIG_SLUB_TINY configs.
>
> This makes kmem_cache_free() and build_detached_freelist()
> slightly faster.
>
> $ size mm/slub.clang.before.o mm/slub.clang.after.o
> text data bss dec hex filename
> 77716 7657 4208 89581 15ded mm/slub.clang.before.o
> 77766 7673 4208 89647 15e2f mm/slub.clang.after.o
>
> $ scripts/bloat-o-meter -t mm/slub.clang.before.o mm/slub.clang.after.o
> Function old new delta
> __cache_from_obj - 211 +211
> build_detached_freelist 542 569 +27
> kmem_cache_free 896 919 +23
> cache_from_obj 229 - -229
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
I assume this is without CONFIG_SLAB_FREELIST_HARDENED. But almost everyone
uses it today:
https://oracle.github.io/kconfigs/?config=UTS_RELEASE&config=SLAB_FREELIST_HARDENED
And with that enabled it would likely make things slower due to the extra
function call to __cache_from_obj(), which does its own virt_to_slab()
although kmem_cache_free() also does it, etc.
However I'd hope things could be improved differently and for all configs.
cache_from_obj() is mostly a relict from when memcgs had separate kmem_cache
instances. It should have been just removed... but hardening repurposed it.
We can however kick it from build_detached_freelist() completely as we're
not checking every object anyway. And kmem_cache_free() can be rewritten to do
the checks open-coded and calling a warn function if they fail. If anyone
cares to harden build_detached_freelist() properly, it could be done
similarly to this.
How does that look for you wrt performance and bloat-o-meter?
---8<---
From e2a45f6d00e437a77532b9a6168cd5b370a59d4d Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Mon, 19 Jan 2026 22:42:29 +0100
Subject: [PATCH] slab: replace cache_from_obj() with inline checks
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
mm/slub.c | 52 ++++++++++++++++++++++++++++++----------------------
1 file changed, 30 insertions(+), 22 deletions(-)
diff --git a/mm/slub.c b/mm/slub.c
index 861592ac5425..ddaac6990e0b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -6738,30 +6738,28 @@ void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
}
#endif
-static inline struct kmem_cache *virt_to_cache(const void *obj)
+static noinline void warn_free_bad_obj(struct kmem_cache *s, void *obj)
{
+ struct kmem_cache *cachep;
struct slab *slab;
slab = virt_to_slab(obj);
- if (WARN_ONCE(!slab, "%s: Object is not a Slab page!\n", __func__))
- return NULL;
- return slab->slab_cache;
-}
+ if (WARN_ONCE(!slab,
+ "%s: Object %p from cache %s, is not in a slab page!\n",
+ __func__, obj, s->name))
+ return;
-static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
-{
- struct kmem_cache *cachep;
+ cachep = slab->slab_cache;
- if (!IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) &&
- !kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS))
- return s;
+ if (WARN_ONCE(!cachep, "%s: Object %p from cache %s has NULL "
+ "slab->slab_cache pointer!\n", __func__,
+ obj, s->name))
+ return;
- cachep = virt_to_cache(x);
- if (WARN(cachep && cachep != s,
- "%s: Wrong slab cache. %s but object is from %s\n",
- __func__, s->name, cachep->name))
- print_tracking(cachep, x);
- return cachep;
+ if (WARN_ONCE(cachep != s,
+ "%s: Object %p freed from cache %s but belongs to cache %s\n",
+ __func__, obj, s->name, cachep->name))
+ print_tracking(cachep, obj);
}
/**
@@ -6774,11 +6772,21 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
*/
void kmem_cache_free(struct kmem_cache *s, void *x)
{
- s = cache_from_obj(s, x);
- if (!s)
- return;
+ struct slab *slab;
+
+ slab = virt_to_slab(x);
+
+ if (IS_ENABLED(CONFIG_SLAB_FREELIST_HARDENED) ||
+ kmem_cache_debug_flags(s, SLAB_CONSISTENCY_CHECKS)) {
+
+ if (unlikely(!slab || (slab->slab_cache != s))) {
+ warn_free_bad_obj(s, x);
+ return;
+ }
+ }
+
trace_kmem_cache_free(_RET_IP_, x, s);
- slab_free(s, virt_to_slab(x), x, _RET_IP_);
+ slab_free(s, slab, x, _RET_IP_);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -7305,7 +7313,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
df->s = slab->slab_cache;
} else {
df->slab = slab;
- df->s = cache_from_obj(s, object); /* Support for memcg */
+ df->s = s;
}
/* Start new detached freelist */
--
2.52.0
On Mon, Jan 19, 2026 at 11:07 PM Vlastimil Babka <vbabka@suse.cz> wrote: > > On 1/15/26 14:06, Eric Dumazet wrote: > > clang ignores the inline attribute because it thinks cache_from_obj() > > is too big. > > > > Moves the slow path in a separate function (__cache_from_obj()) > > and use __fastpath_inline to please clang and CONFIG_SLUB_TINY configs. > > > > This makes kmem_cache_free() and build_detached_freelist() > > slightly faster. > > > > $ size mm/slub.clang.before.o mm/slub.clang.after.o > > text data bss dec hex filename > > 77716 7657 4208 89581 15ded mm/slub.clang.before.o > > 77766 7673 4208 89647 15e2f mm/slub.clang.after.o > > > > $ scripts/bloat-o-meter -t mm/slub.clang.before.o mm/slub.clang.after.o > > Function old new delta > > __cache_from_obj - 211 +211 > > build_detached_freelist 542 569 +27 > > kmem_cache_free 896 919 +23 > > cache_from_obj 229 - -229 > > > > Signed-off-by: Eric Dumazet <edumazet@google.com> > > I assume this is without CONFIG_SLAB_FREELIST_HARDENED. But almost everyone > uses it today: > https://oracle.github.io/kconfigs/?config=UTS_RELEASE&config=SLAB_FREELIST_HARDENED > > And with that enabled it would likely make things slower due to the extra > function call to __cache_from_obj(), which does its own virt_to_slab() > although kmem_cache_free() also does it, etc. > Believe it or not, but when CONFIG_SLAB_FREELIST_HARDENED=y, cache_from_obj() was/is inlined (before and after my patch) by clang :) > However I'd hope things could be improved differently and for all configs. > cache_from_obj() is mostly a relict from when memcgs had separate kmem_cache > instances. It should have been just removed... but hardening repurposed it. > > We can however kick it from build_detached_freelist() completely as we're > not checking every object anyway. And kmem_cache_free() can be rewritten to do > the checks open-coded and calling a warn function if they fail. If anyone > cares to harden build_detached_freelist() properly, it could be done > similarly to this. > > How does that look for you wrt performance and bloat-o-meter? This looks fine to me, thanks ! scripts/bloat-o-meter -t mm/slub.o.old mm/slub.o | grep -v Ltmp add/remove: 78/78 grow/shrink: 8/1 up/down: 6862/-6443 (419) Function old new delta warn_free_bad_obj - 242 +242 kmem_cache_free 896 929 +33 build_detached_freelist 542 531 -11 cache_from_obj 229 - -229 Total: Before=487832, After=488251, chg +0.09%
© 2016 - 2026 Red Hat, Inc.