[v3] slab: replace cpu (partial) slabs with sheaves

[PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Vlastimil Babka 3 weeks, 3 days ago

Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
sheaves enabled. Since we want to enable them for almost all caches,
it's suboptimal to test the pointer in the fast paths, so instead
allocate it for all caches in do_kmem_cache_create(). Instead of testing
the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
kmem_cache->sheaf_capacity for being 0, where needed, using a new
cache_has_sheaves() helper.

However, for the fast paths sake we also assume that the main sheaf
always exists (pcs->main is !NULL), and during bootstrap we cannot
allocate sheaves yet.

Solve this by introducing a single static bootstrap_sheaf that's
assigned as pcs->main during bootstrap. It has a size of 0, so during
allocations, the fast path will find it's empty. Since the size of 0
matches sheaf_capacity of 0, the freeing fast paths will find it's
"full". In the slow path handlers, we use cache_has_sheaves() to
recognize that the cache doesn't (yet) have real sheaves, and fall back.
Thus sharing the single bootstrap sheaf like this for multiple caches
and cpus is safe.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 81 insertions(+), 38 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index edf341c87e20..706cb6398f05 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -501,6 +501,18 @@ struct kmem_cache_node {
 	struct node_barn *barn;
 };
 
+/*
+ * Every cache has !NULL s->cpu_sheaves but they may point to the
+ * bootstrap_sheaf temporarily during init, or permanently for the boot caches
+ * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
+ * helper distinguishes whether cache has real non-bootstrap sheaves.
+ */
+static inline bool cache_has_sheaves(struct kmem_cache *s)
+{
+	/* Test CONFIG_SLUB_TINY for code elimination purposes */
+	return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
+}
+
 static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 {
 	return s->node[node];
@@ -2855,6 +2867,10 @@ static void pcs_destroy(struct kmem_cache *s)
 		if (!pcs->main)
 			continue;
 
+		/* bootstrap or debug caches, it's the bootstrap_sheaf */
+		if (!pcs->main->cache)
+			continue;
+
 		/*
 		 * We have already passed __kmem_cache_shutdown() so everything
 		 * was flushed and there should be no objects allocated from
@@ -4030,7 +4046,7 @@ static bool has_pcs_used(int cpu, struct kmem_cache *s)
 {
 	struct slub_percpu_sheaves *pcs;
 
-	if (!s->cpu_sheaves)
+	if (!cache_has_sheaves(s))
 		return false;
 
 	pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
@@ -4052,7 +4068,7 @@ static void flush_cpu_slab(struct work_struct *w)
 
 	s = sfw->s;
 
-	if (s->cpu_sheaves)
+	if (cache_has_sheaves(s))
 		pcs_flush_all(s);
 
 	flush_this_cpu_slab(s);
@@ -4157,7 +4173,7 @@ void flush_all_rcu_sheaves(void)
 	mutex_lock(&slab_mutex);
 
 	list_for_each_entry(s, &slab_caches, list) {
-		if (!s->cpu_sheaves)
+		if (!cache_has_sheaves(s))
 			continue;
 		flush_rcu_sheaves_on_cache(s);
 	}
@@ -4179,7 +4195,7 @@ static int slub_cpu_dead(unsigned int cpu)
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list) {
 		__flush_cpu_slab(s, cpu);
-		if (s->cpu_sheaves)
+		if (cache_has_sheaves(s))
 			__pcs_flush_all_cpu(s, cpu);
 	}
 	mutex_unlock(&slab_mutex);
@@ -4979,6 +4995,12 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
 
 	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
 
+	/* Bootstrap or debug cache, back off */
+	if (unlikely(!cache_has_sheaves(s))) {
+		local_unlock(&s->cpu_sheaves->lock);
+		return NULL;
+	}
+
 	if (pcs->spare && pcs->spare->size > 0) {
 		swap(pcs->main, pcs->spare);
 		return pcs;
@@ -5165,6 +5187,11 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
 		struct slab_sheaf *full;
 		struct node_barn *barn;
 
+		if (unlikely(!cache_has_sheaves(s))) {
+			local_unlock(&s->cpu_sheaves->lock);
+			return allocated;
+		}
+
 		if (pcs->spare && pcs->spare->size > 0) {
 			swap(pcs->main, pcs->spare);
 			goto do_alloc;
@@ -5244,8 +5271,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
 	if (unlikely(object))
 		goto out;
 
-	if (s->cpu_sheaves)
-		object = alloc_from_pcs(s, gfpflags, node);
+	object = alloc_from_pcs(s, gfpflags, node);
 
 	if (!object)
 		object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
@@ -5355,17 +5381,6 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
 
 	if (unlikely(size > s->sheaf_capacity)) {
 
-		/*
-		 * slab_debug disables cpu sheaves intentionally so all
-		 * prefilled sheaves become "oversize" and we give up on
-		 * performance for the debugging. Same with SLUB_TINY.
-		 * Creating a cache without sheaves and then requesting a
-		 * prefilled sheaf is however not expected, so warn.
-		 */
-		WARN_ON_ONCE(s->sheaf_capacity == 0 &&
-			     !IS_ENABLED(CONFIG_SLUB_TINY) &&
-			     !(s->flags & SLAB_DEBUG_FLAGS));
-
 		sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
 		if (!sheaf)
 			return NULL;
@@ -6082,6 +6097,12 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs)
 restart:
 	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
 
+	/* Bootstrap or debug cache, back off */
+	if (unlikely(!cache_has_sheaves(s))) {
+		local_unlock(&s->cpu_sheaves->lock);
+		return NULL;
+	}
+
 	barn = get_barn(s);
 	if (!barn) {
 		local_unlock(&s->cpu_sheaves->lock);
@@ -6280,6 +6301,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
 		struct slab_sheaf *empty;
 		struct node_barn *barn;
 
+		/* Bootstrap or debug cache, fall back */
+		if (unlikely(!cache_has_sheaves(s))) {
+			local_unlock(&s->cpu_sheaves->lock);
+			goto fail;
+		}
+
 		if (pcs->spare && pcs->spare->size == 0) {
 			pcs->rcu_free = pcs->spare;
 			pcs->spare = NULL;
@@ -6674,9 +6701,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
 	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
 		return;
 
-	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
-				     slab_nid(slab) == numa_mem_id())
-			   && likely(!slab_test_pfmemalloc(slab))) {
+	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
+	    && likely(!slab_test_pfmemalloc(slab))) {
 		if (likely(free_to_pcs(s, object)))
 			return;
 	}
@@ -7379,7 +7405,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 	 * freeing to sheaves is so incompatible with the detached freelist so
 	 * once we go that way, we have to do everything differently
 	 */
-	if (s && s->cpu_sheaves) {
+	if (s && cache_has_sheaves(s)) {
 		free_to_pcs_bulk(s, size, p);
 		return;
 	}
@@ -7490,8 +7516,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
 		size--;
 	}
 
-	if (s->cpu_sheaves)
-		i = alloc_from_pcs_bulk(s, size, p);
+	i = alloc_from_pcs_bulk(s, size, p);
 
 	if (i < size) {
 		/*
@@ -7702,6 +7727,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 
 static int init_percpu_sheaves(struct kmem_cache *s)
 {
+	static struct slab_sheaf bootstrap_sheaf = {};
 	int cpu;
 
 	for_each_possible_cpu(cpu) {
@@ -7711,7 +7737,28 @@ static int init_percpu_sheaves(struct kmem_cache *s)
 
 		local_trylock_init(&pcs->lock);
 
-		pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
+		/*
+		 * Bootstrap sheaf has zero size so fast-path allocation fails.
+		 * It has also size == s->sheaf_capacity, so fast-path free
+		 * fails. In the slow paths we recognize the situation by
+		 * checking s->sheaf_capacity. This allows fast paths to assume
+		 * s->cpu_sheaves and pcs->main always exists and is valid.
+		 * It's also safe to share the single static bootstrap_sheaf
+		 * with zero-sized objects array as it's never modified.
+		 *
+		 * bootstrap_sheaf also has NULL pointer to kmem_cache so we
+		 * recognize it and not attempt to free it when destroying the
+		 * cache
+		 *
+		 * We keep bootstrap_sheaf for kmem_cache and kmem_cache_node,
+		 * caches with debug enabled, and all caches with SLUB_TINY.
+		 * For kmalloc caches it's used temporarily during the initial
+		 * bootstrap.
+		 */
+		if (!s->sheaf_capacity)
+			pcs->main = &bootstrap_sheaf;
+		else
+			pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
 
 		if (!pcs->main)
 			return -ENOMEM;
@@ -7809,7 +7856,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 			continue;
 		}
 
-		if (s->cpu_sheaves) {
+		if (cache_has_sheaves(s)) {
 			barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
 
 			if (!barn)
@@ -8127,7 +8174,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 	flush_all_cpus_locked(s);
 
 	/* we might have rcu sheaves in flight */
-	if (s->cpu_sheaves)
+	if (cache_has_sheaves(s))
 		rcu_barrier();
 
 	/* Attempt to free all objects */
@@ -8439,7 +8486,7 @@ static int slab_mem_going_online_callback(int nid)
 		if (get_node(s, nid))
 			continue;
 
-		if (s->cpu_sheaves) {
+		if (cache_has_sheaves(s)) {
 			barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
 
 			if (!barn) {
@@ -8647,12 +8694,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 
 	set_cpu_partial(s);
 
-	if (s->sheaf_capacity) {
-		s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
-		if (!s->cpu_sheaves) {
-			err = -ENOMEM;
-			goto out;
-		}
+	s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
+	if (!s->cpu_sheaves) {
+		err = -ENOMEM;
+		goto out;
 	}
 
 #ifdef CONFIG_NUMA
@@ -8671,11 +8716,9 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
 	if (!alloc_kmem_cache_cpus(s))
 		goto out;
 
-	if (s->cpu_sheaves) {
-		err = init_percpu_sheaves(s);
-		if (err)
-			goto out;
-	}
+	err = init_percpu_sheaves(s);
+	if (err)
+		goto out;
 
 	err = 0;
 

-- 
2.52.0

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Hao Li 3 weeks ago

On Fri, Jan 16, 2026 at 03:40:26PM +0100, Vlastimil Babka wrote:
> Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
> sheaves enabled. Since we want to enable them for almost all caches,
> it's suboptimal to test the pointer in the fast paths, so instead
> allocate it for all caches in do_kmem_cache_create(). Instead of testing
> the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
> kmem_cache->sheaf_capacity for being 0, where needed, using a new
> cache_has_sheaves() helper.
> 
> However, for the fast paths sake we also assume that the main sheaf
> always exists (pcs->main is !NULL), and during bootstrap we cannot
> allocate sheaves yet.
> 
> Solve this by introducing a single static bootstrap_sheaf that's
> assigned as pcs->main during bootstrap. It has a size of 0, so during
> allocations, the fast path will find it's empty. Since the size of 0
> matches sheaf_capacity of 0, the freeing fast paths will find it's
> "full". In the slow path handlers, we use cache_has_sheaves() to
> recognize that the cache doesn't (yet) have real sheaves, and fall back.
> Thus sharing the single bootstrap sheaf like this for multiple caches
> and cpus is safe.
> 
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 81 insertions(+), 38 deletions(-)
> 

Nit: would it make sense to also update "if (s->cpu_sheaves)" to
cache_has_sheaves() in kvfree_rcu_barrier_on_cache(), for consistency?

-- 
Thanks,
Hao

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Vlastimil Babka 2 weeks, 6 days ago

On 1/19/26 12:32, Hao Li wrote:
> On Fri, Jan 16, 2026 at 03:40:26PM +0100, Vlastimil Babka wrote:
>> Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
>> sheaves enabled. Since we want to enable them for almost all caches,
>> it's suboptimal to test the pointer in the fast paths, so instead
>> allocate it for all caches in do_kmem_cache_create(). Instead of testing
>> the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
>> kmem_cache->sheaf_capacity for being 0, where needed, using a new
>> cache_has_sheaves() helper.
>> 
>> However, for the fast paths sake we also assume that the main sheaf
>> always exists (pcs->main is !NULL), and during bootstrap we cannot
>> allocate sheaves yet.
>> 
>> Solve this by introducing a single static bootstrap_sheaf that's
>> assigned as pcs->main during bootstrap. It has a size of 0, so during
>> allocations, the fast path will find it's empty. Since the size of 0
>> matches sheaf_capacity of 0, the freeing fast paths will find it's
>> "full". In the slow path handlers, we use cache_has_sheaves() to
>> recognize that the cache doesn't (yet) have real sheaves, and fall back.
>> Thus sharing the single bootstrap sheaf like this for multiple caches
>> and cpus is safe.
>> 
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
>>  1 file changed, 81 insertions(+), 38 deletions(-)
>> 
> 
> Nit: would it make sense to also update "if (s->cpu_sheaves)" to
> cache_has_sheaves() in kvfree_rcu_barrier_on_cache(), for consistency?

Ack, will do. Thanks.

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Suren Baghdasaryan 3 weeks, 3 days ago

On Fri, Jan 16, 2026 at 2:40 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>
> Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
> sheaves enabled. Since we want to enable them for almost all caches,
> it's suboptimal to test the pointer in the fast paths, so instead
> allocate it for all caches in do_kmem_cache_create(). Instead of testing
> the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
> kmem_cache->sheaf_capacity for being 0, where needed, using a new
> cache_has_sheaves() helper.
>
> However, for the fast paths sake we also assume that the main sheaf
> always exists (pcs->main is !NULL), and during bootstrap we cannot
> allocate sheaves yet.
>
> Solve this by introducing a single static bootstrap_sheaf that's
> assigned as pcs->main during bootstrap. It has a size of 0, so during
> allocations, the fast path will find it's empty. Since the size of 0
> matches sheaf_capacity of 0, the freeing fast paths will find it's
> "full". In the slow path handlers, we use cache_has_sheaves() to
> recognize that the cache doesn't (yet) have real sheaves, and fall back.

I don't think kmem_cache_prefill_sheaf() handles this case, does it?
Or do you rely on the caller to never try prefilling a bootstrapped
sheaf?
kmem_cache_refill_sheaf() and kmem_cache_return_sheaf() operate on a
sheaf obtained by calling kmem_cache_prefill_sheaf(), so if
kmem_cache_prefill_sheaf() never returns a bootstrapped sheaf we don't
need special handling there.

> Thus sharing the single bootstrap sheaf like this for multiple caches
> and cpus is safe.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
>  mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
>  1 file changed, 81 insertions(+), 38 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index edf341c87e20..706cb6398f05 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -501,6 +501,18 @@ struct kmem_cache_node {
>         struct node_barn *barn;
>  };
>
> +/*
> + * Every cache has !NULL s->cpu_sheaves but they may point to the
> + * bootstrap_sheaf temporarily during init, or permanently for the boot caches
> + * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
> + * helper distinguishes whether cache has real non-bootstrap sheaves.
> + */
> +static inline bool cache_has_sheaves(struct kmem_cache *s)
> +{
> +       /* Test CONFIG_SLUB_TINY for code elimination purposes */
> +       return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
> +}
> +
>  static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
>  {
>         return s->node[node];
> @@ -2855,6 +2867,10 @@ static void pcs_destroy(struct kmem_cache *s)
>                 if (!pcs->main)
>                         continue;
>
> +               /* bootstrap or debug caches, it's the bootstrap_sheaf */
> +               if (!pcs->main->cache)
> +                       continue;

I wonder why we can't simply check cache_has_sheaves(s) at the
beginning and skip the loop altogether.
I realize that __kmem_cache_release()->pcs_destroy() is called in the
failure path of do_kmem_cache_create() and s->cpu_sheaves might be
partially initialized if alloc_empty_sheaf() fails somewhere in the
middle of the loop inside init_percpu_sheaves(). But for that,
s->sheaf_capacity should still be non-zero, so checking
cache_has_sheaves() at the beginning of pcs_destroy() should still
work, no?

BTW, I see one last check for s->cpu_sheaves that you didn't replace
with cache_has_sheaves() inside __kmem_cache_release(). I think that's
because it's also in the failure path of do_kmem_cache_create() and
it's possible that s->sheaf_capacity > 0 while s->cpu_sheaves == NULL
(if alloc_percpu(struct slub_percpu_sheaves) fails). It might be
helpful to add a comment inside __kmem_cache_release() to explain why
cache_has_sheaves() can't be used there.

> +
>                 /*
>                  * We have already passed __kmem_cache_shutdown() so everything
>                  * was flushed and there should be no objects allocated from
> @@ -4030,7 +4046,7 @@ static bool has_pcs_used(int cpu, struct kmem_cache *s)
>  {
>         struct slub_percpu_sheaves *pcs;
>
> -       if (!s->cpu_sheaves)
> +       if (!cache_has_sheaves(s))
>                 return false;
>
>         pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
> @@ -4052,7 +4068,7 @@ static void flush_cpu_slab(struct work_struct *w)
>
>         s = sfw->s;
>
> -       if (s->cpu_sheaves)
> +       if (cache_has_sheaves(s))
>                 pcs_flush_all(s);
>
>         flush_this_cpu_slab(s);
> @@ -4157,7 +4173,7 @@ void flush_all_rcu_sheaves(void)
>         mutex_lock(&slab_mutex);
>
>         list_for_each_entry(s, &slab_caches, list) {
> -               if (!s->cpu_sheaves)
> +               if (!cache_has_sheaves(s))
>                         continue;
>                 flush_rcu_sheaves_on_cache(s);
>         }
> @@ -4179,7 +4195,7 @@ static int slub_cpu_dead(unsigned int cpu)
>         mutex_lock(&slab_mutex);
>         list_for_each_entry(s, &slab_caches, list) {
>                 __flush_cpu_slab(s, cpu);
> -               if (s->cpu_sheaves)
> +               if (cache_has_sheaves(s))
>                         __pcs_flush_all_cpu(s, cpu);
>         }
>         mutex_unlock(&slab_mutex);
> @@ -4979,6 +4995,12 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs,
>
>         lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> +       /* Bootstrap or debug cache, back off */
> +       if (unlikely(!cache_has_sheaves(s))) {
> +               local_unlock(&s->cpu_sheaves->lock);
> +               return NULL;
> +       }
> +
>         if (pcs->spare && pcs->spare->size > 0) {
>                 swap(pcs->main, pcs->spare);
>                 return pcs;
> @@ -5165,6 +5187,11 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s, size_t size, void **p)
>                 struct slab_sheaf *full;
>                 struct node_barn *barn;
>
> +               if (unlikely(!cache_has_sheaves(s))) {
> +                       local_unlock(&s->cpu_sheaves->lock);
> +                       return allocated;
> +               }
> +
>                 if (pcs->spare && pcs->spare->size > 0) {
>                         swap(pcs->main, pcs->spare);
>                         goto do_alloc;
> @@ -5244,8 +5271,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
>         if (unlikely(object))
>                 goto out;
>
> -       if (s->cpu_sheaves)
> -               object = alloc_from_pcs(s, gfpflags, node);
> +       object = alloc_from_pcs(s, gfpflags, node);
>
>         if (!object)
>                 object = __slab_alloc_node(s, gfpflags, node, addr, orig_size);
> @@ -5355,17 +5381,6 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
>
>         if (unlikely(size > s->sheaf_capacity)) {
>
> -               /*
> -                * slab_debug disables cpu sheaves intentionally so all
> -                * prefilled sheaves become "oversize" and we give up on
> -                * performance for the debugging. Same with SLUB_TINY.
> -                * Creating a cache without sheaves and then requesting a
> -                * prefilled sheaf is however not expected, so warn.
> -                */
> -               WARN_ON_ONCE(s->sheaf_capacity == 0 &&
> -                            !IS_ENABLED(CONFIG_SLUB_TINY) &&
> -                            !(s->flags & SLAB_DEBUG_FLAGS));
> -
>                 sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
>                 if (!sheaf)
>                         return NULL;
> @@ -6082,6 +6097,12 @@ __pcs_replace_full_main(struct kmem_cache *s, struct slub_percpu_sheaves *pcs)
>  restart:
>         lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
>
> +       /* Bootstrap or debug cache, back off */
> +       if (unlikely(!cache_has_sheaves(s))) {
> +               local_unlock(&s->cpu_sheaves->lock);
> +               return NULL;
> +       }
> +
>         barn = get_barn(s);
>         if (!barn) {
>                 local_unlock(&s->cpu_sheaves->lock);
> @@ -6280,6 +6301,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *obj)
>                 struct slab_sheaf *empty;
>                 struct node_barn *barn;
>
> +               /* Bootstrap or debug cache, fall back */
> +               if (unlikely(!cache_has_sheaves(s))) {
> +                       local_unlock(&s->cpu_sheaves->lock);
> +                       goto fail;
> +               }
> +
>                 if (pcs->spare && pcs->spare->size == 0) {
>                         pcs->rcu_free = pcs->spare;
>                         pcs->spare = NULL;
> @@ -6674,9 +6701,8 @@ void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
>         if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)))
>                 return;
>
> -       if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
> -                                    slab_nid(slab) == numa_mem_id())
> -                          && likely(!slab_test_pfmemalloc(slab))) {
> +       if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) == numa_mem_id())
> +           && likely(!slab_test_pfmemalloc(slab))) {
>                 if (likely(free_to_pcs(s, object)))
>                         return;
>         }
> @@ -7379,7 +7405,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
>          * freeing to sheaves is so incompatible with the detached freelist so
>          * once we go that way, we have to do everything differently
>          */
> -       if (s && s->cpu_sheaves) {
> +       if (s && cache_has_sheaves(s)) {
>                 free_to_pcs_bulk(s, size, p);
>                 return;
>         }
> @@ -7490,8 +7516,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
>                 size--;
>         }
>
> -       if (s->cpu_sheaves)
> -               i = alloc_from_pcs_bulk(s, size, p);
> +       i = alloc_from_pcs_bulk(s, size, p);

Doesn't the above change make this fastpath a bit longer? IIUC,
instead of bailing out right here we call alloc_from_pcs_bulk() and
bail out from there because pcs->main->size is 0.

>
>         if (i < size) {
>                 /*
> @@ -7702,6 +7727,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
>
>  static int init_percpu_sheaves(struct kmem_cache *s)
>  {
> +       static struct slab_sheaf bootstrap_sheaf = {};
>         int cpu;
>
>         for_each_possible_cpu(cpu) {
> @@ -7711,7 +7737,28 @@ static int init_percpu_sheaves(struct kmem_cache *s)
>
>                 local_trylock_init(&pcs->lock);
>
> -               pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
> +               /*
> +                * Bootstrap sheaf has zero size so fast-path allocation fails.
> +                * It has also size == s->sheaf_capacity, so fast-path free
> +                * fails. In the slow paths we recognize the situation by
> +                * checking s->sheaf_capacity. This allows fast paths to assume
> +                * s->cpu_sheaves and pcs->main always exists and is valid.

s/is/are

> +                * It's also safe to share the single static bootstrap_sheaf
> +                * with zero-sized objects array as it's never modified.
> +                *
> +                * bootstrap_sheaf also has NULL pointer to kmem_cache so we
> +                * recognize it and not attempt to free it when destroying the
> +                * cache

missing a period at the end of the above sentence.

> +                *
> +                * We keep bootstrap_sheaf for kmem_cache and kmem_cache_node,
> +                * caches with debug enabled, and all caches with SLUB_TINY.
> +                * For kmalloc caches it's used temporarily during the initial
> +                * bootstrap.
> +                */
> +               if (!s->sheaf_capacity)
> +                       pcs->main = &bootstrap_sheaf;
> +               else
> +                       pcs->main = alloc_empty_sheaf(s, GFP_KERNEL);
>
>                 if (!pcs->main)
>                         return -ENOMEM;
> @@ -7809,7 +7856,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
>                         continue;
>                 }
>
> -               if (s->cpu_sheaves) {
> +               if (cache_has_sheaves(s)) {
>                         barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
>
>                         if (!barn)
> @@ -8127,7 +8174,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
>         flush_all_cpus_locked(s);
>
>         /* we might have rcu sheaves in flight */
> -       if (s->cpu_sheaves)
> +       if (cache_has_sheaves(s))
>                 rcu_barrier();
>
>         /* Attempt to free all objects */
> @@ -8439,7 +8486,7 @@ static int slab_mem_going_online_callback(int nid)
>                 if (get_node(s, nid))
>                         continue;
>
> -               if (s->cpu_sheaves) {
> +               if (cache_has_sheaves(s)) {
>                         barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
>
>                         if (!barn) {
> @@ -8647,12 +8694,10 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
>
>         set_cpu_partial(s);
>
> -       if (s->sheaf_capacity) {
> -               s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
> -               if (!s->cpu_sheaves) {
> -                       err = -ENOMEM;
> -                       goto out;
> -               }
> +       s->cpu_sheaves = alloc_percpu(struct slub_percpu_sheaves);
> +       if (!s->cpu_sheaves) {
> +               err = -ENOMEM;
> +               goto out;
>         }
>
>  #ifdef CONFIG_NUMA
> @@ -8671,11 +8716,9 @@ int do_kmem_cache_create(struct kmem_cache *s, const char *name,
>         if (!alloc_kmem_cache_cpus(s))
>                 goto out;
>
> -       if (s->cpu_sheaves) {
> -               err = init_percpu_sheaves(s);
> -               if (err)
> -                       goto out;
> -       }
> +       err = init_percpu_sheaves(s);
> +       if (err)
> +               goto out;
>
>         err = 0;
>
>
> --
> 2.52.0
>

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Vlastimil Babka 2 weeks, 6 days ago

On 1/17/26 03:11, Suren Baghdasaryan wrote:
>> @@ -7379,7 +7405,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
>>          * freeing to sheaves is so incompatible with the detached freelist so
>>          * once we go that way, we have to do everything differently
>>          */
>> -       if (s && s->cpu_sheaves) {
>> +       if (s && cache_has_sheaves(s)) {
>>                 free_to_pcs_bulk(s, size, p);
>>                 return;
>>         }
>> @@ -7490,8 +7516,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s, gfp_t flags, size_t size,
>>                 size--;
>>         }
>>
>> -       if (s->cpu_sheaves)
>> -               i = alloc_from_pcs_bulk(s, size, p);
>> +       i = alloc_from_pcs_bulk(s, size, p);
> 
> Doesn't the above change make this fastpath a bit longer? IIUC,
> instead of bailing out right here we call alloc_from_pcs_bulk() and
> bail out from there because pcs->main->size is 0.

But only for caches with no sheaves, and that should be the exception. So
the fast path avoids checks. We're making the slowpath longer. The strategy
is the same with single-object alloc, and described in the changelog. Or
what am I missing?

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Vlastimil Babka 3 weeks, 1 day ago

On 1/17/26 03:11, Suren Baghdasaryan wrote:
> On Fri, Jan 16, 2026 at 2:40 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>> Thus sharing the single bootstrap sheaf like this for multiple caches
>> and cpus is safe.
>>
>> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
>> ---
>>  mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
>>  1 file changed, 81 insertions(+), 38 deletions(-)
>>
>> diff --git a/mm/slub.c b/mm/slub.c
>> index edf341c87e20..706cb6398f05 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -501,6 +501,18 @@ struct kmem_cache_node {
>>         struct node_barn *barn;
>>  };
>>
>> +/*
>> + * Every cache has !NULL s->cpu_sheaves but they may point to the
>> + * bootstrap_sheaf temporarily during init, or permanently for the boot caches
>> + * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
>> + * helper distinguishes whether cache has real non-bootstrap sheaves.
>> + */
>> +static inline bool cache_has_sheaves(struct kmem_cache *s)
>> +{
>> +       /* Test CONFIG_SLUB_TINY for code elimination purposes */
>> +       return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
>> +}
>> +
>>  static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
>>  {
>>         return s->node[node];
>> @@ -2855,6 +2867,10 @@ static void pcs_destroy(struct kmem_cache *s)
>>                 if (!pcs->main)
>>                         continue;
>>
>> +               /* bootstrap or debug caches, it's the bootstrap_sheaf */
>> +               if (!pcs->main->cache)
>> +                       continue;
> 
> I wonder why we can't simply check cache_has_sheaves(s) at the
> beginning and skip the loop altogether.
> I realize that __kmem_cache_release()->pcs_destroy() is called in the
> failure path of do_kmem_cache_create() and s->cpu_sheaves might be
> partially initialized if alloc_empty_sheaf() fails somewhere in the
> middle of the loop inside init_percpu_sheaves(). But for that,
> s->sheaf_capacity should still be non-zero, so checking
> cache_has_sheaves() at the beginning of pcs_destroy() should still
> work, no?

I think it should, will do.

> BTW, I see one last check for s->cpu_sheaves that you didn't replace
> with cache_has_sheaves() inside __kmem_cache_release(). I think that's
> because it's also in the failure path of do_kmem_cache_create() and
> it's possible that s->sheaf_capacity > 0 while s->cpu_sheaves == NULL
> (if alloc_percpu(struct slub_percpu_sheaves) fails). It might be
> helpful to add a comment inside __kmem_cache_release() to explain why
> cache_has_sheaves() can't be used there.

The reason is rather what Harry said. I'll move the check to pcs_destroy()
and add comment there.

diff --git a/mm/slub.c b/mm/slub.c
index 706cb6398f05..6b19aa518a1a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2858,19 +2858,26 @@ static void pcs_destroy(struct kmem_cache *s)
 {
 	int cpu;
 
+	/*
+	 * We may be unwinding cache creation that failed before or during the
+	 * allocation of this.
+	 */
+	if (!s->cpu_sheaves)
+		return;
+
+	/* pcs->main can only point to the bootstrap sheaf, nothing to free */
+	if (!cache_has_sheaves(s))
+		goto free_pcs;
+
 	for_each_possible_cpu(cpu) {
 		struct slub_percpu_sheaves *pcs;
 
 		pcs = per_cpu_ptr(s->cpu_sheaves, cpu);
 
-		/* can happen when unwinding failed create */
+		/* This can happen when unwinding failed cache creation. */
 		if (!pcs->main)
 			continue;
 
-		/* bootstrap or debug caches, it's the bootstrap_sheaf */
-		if (!pcs->main->cache)
-			continue;
-
 		/*
 		 * We have already passed __kmem_cache_shutdown() so everything
 		 * was flushed and there should be no objects allocated from
@@ -2889,6 +2896,7 @@ static void pcs_destroy(struct kmem_cache *s)
 		}
 	}
 
+free_pcs:
 	free_percpu(s->cpu_sheaves);
 	s->cpu_sheaves = NULL;
 }
@@ -5379,6 +5387,9 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size)
 	struct slab_sheaf *sheaf = NULL;
 	struct node_barn *barn;
 
+	if (unlikely(!size))
+		return NULL;
+
 	if (unlikely(size > s->sheaf_capacity)) {
 
 		sheaf = kzalloc(struct_size(sheaf, objects, size), gfp);
@@ -7833,8 +7844,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
 void __kmem_cache_release(struct kmem_cache *s)
 {
 	cache_random_seq_destroy(s);
-	if (s->cpu_sheaves)
-		pcs_destroy(s);
+	pcs_destroy(s);
 #ifdef CONFIG_PREEMPT_RT
 	if (s->cpu_slab)
 		lockdep_unregister_key(&s->lock_key);

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Harry Yoo 3 weeks, 1 day ago

On Sat, Jan 17, 2026 at 02:11:02AM +0000, Suren Baghdasaryan wrote:
> On Fri, Jan 16, 2026 at 2:40 PM Vlastimil Babka <vbabka@suse.cz> wrote:
> >
> > Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
> > sheaves enabled. Since we want to enable them for almost all caches,
> > it's suboptimal to test the pointer in the fast paths, so instead
> > allocate it for all caches in do_kmem_cache_create(). Instead of testing
> > the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
> > kmem_cache->sheaf_capacity for being 0, where needed, using a new
> > cache_has_sheaves() helper.
> >
> > However, for the fast paths sake we also assume that the main sheaf
> > always exists (pcs->main is !NULL), and during bootstrap we cannot
> > allocate sheaves yet.
> >
> > Solve this by introducing a single static bootstrap_sheaf that's
> > assigned as pcs->main during bootstrap. It has a size of 0, so during
> > allocations, the fast path will find it's empty. Since the size of 0
> > matches sheaf_capacity of 0, the freeing fast paths will find it's
> > "full". In the slow path handlers, we use cache_has_sheaves() to
> > recognize that the cache doesn't (yet) have real sheaves, and fall back.
> 
> I don't think kmem_cache_prefill_sheaf() handles this case, does it?
> Or do you rely on the caller to never try prefilling a bootstrapped
> sheaf?

If a cache doesn't have sheaves, s->sheaf_capacity should be 0,
so the sheaf returned by kmem_cache_prefill_sheaf() should be
"oversized" one... unless the user tries to prefill a sheaf with
size == 0?

> kmem_cache_refill_sheaf() and kmem_cache_return_sheaf() operate on a
> sheaf obtained by calling kmem_cache_prefill_sheaf(), so if
> kmem_cache_prefill_sheaf() never returns a bootstrapped sheaf we don't
> need special handling there.

Right.

> > Thus sharing the single bootstrap sheaf like this for multiple caches
> > and cpus is safe.
> >
> > Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> > ---
> >  mm/slub.c | 119 ++++++++++++++++++++++++++++++++++++++++++--------------------
> >  1 file changed, 81 insertions(+), 38 deletions(-)
> >
> > diff --git a/mm/slub.c b/mm/slub.c
> > index edf341c87e20..706cb6398f05 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -501,6 +501,18 @@ struct kmem_cache_node {
> >         struct node_barn *barn;
> >  };
> >
> > +/*
> > + * Every cache has !NULL s->cpu_sheaves but they may point to the
> > + * bootstrap_sheaf temporarily during init, or permanently for the boot caches
> > + * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY. This
> > + * helper distinguishes whether cache has real non-bootstrap sheaves.
> > + */
> > +static inline bool cache_has_sheaves(struct kmem_cache *s)
> > +{
> > +       /* Test CONFIG_SLUB_TINY for code elimination purposes */
> > +       return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
> > +}
> > +
> >  static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
> >  {
> >         return s->node[node];
> > @@ -2855,6 +2867,10 @@ static void pcs_destroy(struct kmem_cache *s)
> >                 if (!pcs->main)
> >                         continue;
> >
> > +               /* bootstrap or debug caches, it's the bootstrap_sheaf */
> > +               if (!pcs->main->cache)
> > +                       continue;
> 
> BTW, I see one last check for s->cpu_sheaves that you didn't replace
> with cache_has_sheaves() inside __kmem_cache_release(). I think that's
> because it's also in the failure path of do_kmem_cache_create() and
> it's possible that s->sheaf_capacity > 0 while s->cpu_sheaves == NULL
> (if alloc_percpu(struct slub_percpu_sheaves) fails). It might be
> helpful to add a comment inside __kmem_cache_release() to explain why
> cache_has_sheaves() can't be used there.

I was thinking it cannot be replaced because s->cpu_sheaves is not NULL
even when s->sheaf_capacity == 0.

Agree that a comment would be worth it!

-- 
Cheers,
Harry / Hyeonggon

Re: [PATCH v3 06/21] slab: introduce percpu sheaves bootstrap

Posted by Vlastimil Babka 3 weeks, 1 day ago

On 1/19/26 04:40, Harry Yoo wrote:
> On Sat, Jan 17, 2026 at 02:11:02AM +0000, Suren Baghdasaryan wrote:
>> On Fri, Jan 16, 2026 at 2:40 PM Vlastimil Babka <vbabka@suse.cz> wrote:
>> >
>> > Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
>> > sheaves enabled. Since we want to enable them for almost all caches,
>> > it's suboptimal to test the pointer in the fast paths, so instead
>> > allocate it for all caches in do_kmem_cache_create(). Instead of testing
>> > the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
>> > kmem_cache->sheaf_capacity for being 0, where needed, using a new
>> > cache_has_sheaves() helper.
>> >
>> > However, for the fast paths sake we also assume that the main sheaf
>> > always exists (pcs->main is !NULL), and during bootstrap we cannot
>> > allocate sheaves yet.
>> >
>> > Solve this by introducing a single static bootstrap_sheaf that's
>> > assigned as pcs->main during bootstrap. It has a size of 0, so during
>> > allocations, the fast path will find it's empty. Since the size of 0
>> > matches sheaf_capacity of 0, the freeing fast paths will find it's
>> > "full". In the slow path handlers, we use cache_has_sheaves() to
>> > recognize that the cache doesn't (yet) have real sheaves, and fall back.
>> 
>> I don't think kmem_cache_prefill_sheaf() handles this case, does it?
>> Or do you rely on the caller to never try prefilling a bootstrapped
>> sheaf?
> 
> If a cache doesn't have sheaves, s->sheaf_capacity should be 0,
> so the sheaf returned by kmem_cache_prefill_sheaf() should be
> "oversized" one... unless the user tries to prefill a sheaf with
> size == 0?

I'll add a

        if (unlikely(!size))
                return NULL;

to kmem_cache_prefill_sheaf() so we don't have to deal with oversized
sheaves of size 0 just for this theoretical case...