From nobody Tue Feb 10 09:57:33 2026
Received: from smtp-out1.suse.de (smtp-out1.suse.de [195.135.223.130])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 97774320A09
	for <linux-kernel@vger.kernel.org>; Fri, 23 Jan 2026 06:53:25 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=195.135.223.130
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1769151208; cv=none;
 b=eQjbtBWRLYUfJ1NiC9MZRirYOXtTZVStL8DH5gAOyuTBPTtQCCwWm/a2k1bxAQY43D3I2TfgvpXRkaluTOGgLLd0A+dyXzPf/B8YbIoTsRNTMMDmkZu55Y4qCLPKDpKWNLFsGoUasuGbrAnkHtnB7QgGMpEpg6NiDJIHEcPAKL0=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1769151208; c=relaxed/simple;
	bh=j5cQES713Av8KmJpBGnuQ6INpAJsKyn9proXODMLN4M=;
	h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References:
	 In-Reply-To:To:Cc;
 b=Oj5OoXg0KFckUOE4pdXGEAjzw+2xo8T+B6WJsjrYlRbiCK15NZjZvRLHRCwocNW6p3Qm702RXIFiV12w788FWBsVX+IJ2Gl/Ed2f9MLm6i8Ef4GnU/EVzDqeixYxcmFjc45S4qPg78Nvm8KaclB7TW9xNdfB6wPl0izfNDCWnSM=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=suse.cz;
 spf=pass smtp.mailfrom=suse.cz; arc=none smtp.client-ip=195.135.223.130
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=none (p=none dis=none) header.from=suse.cz
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=suse.cz
Received: from imap1.dmz-prg2.suse.org (imap1.dmz-prg2.suse.org
 [IPv6:2a07:de40:b281:104:10:150:64:97])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest
 SHA256)
	(No client certificate requested)
	by smtp-out1.suse.de (Postfix) with ESMTPS id B6AB33376B;
	Fri, 23 Jan 2026 06:53:10 +0000 (UTC)
Authentication-Results: smtp-out1.suse.de;
	none
Received: from imap1.dmz-prg2.suse.org (localhost [127.0.0.1])
	(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)
	 key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest
 SHA256)
	(No client certificate requested)
	by imap1.dmz-prg2.suse.org (Postfix) with ESMTPS id CBC35139EE;
	Fri, 23 Jan 2026 06:53:09 +0000 (UTC)
Received: from dovecot-director2.suse.de ([2a07:de40:b281:106:10:150:64:167])
	by imap1.dmz-prg2.suse.org with ESMTPSA
	id GAx4MdUac2k4YgAAD6G6ig
	(envelope-from <vbabka@suse.cz>); Fri, 23 Jan 2026 06:53:09 +0000
From: Vlastimil Babka <vbabka@suse.cz>
Date: Fri, 23 Jan 2026 07:52:45 +0100
Subject: [PATCH v4 07/22] slab: introduce percpu sheaves bootstrap
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Message-Id: <20260123-sheaves-for-all-v4-7-041323d506f7@suse.cz>
References: <20260123-sheaves-for-all-v4-0-041323d506f7@suse.cz>
In-Reply-To: <20260123-sheaves-for-all-v4-0-041323d506f7@suse.cz>
To: Harry Yoo <harry.yoo@oracle.com>, Petr Tesarik <ptesarik@suse.com>,
 Christoph Lameter <cl@gentwo.org>, David Rientjes <rientjes@google.com>,
 Roman Gushchin <roman.gushchin@linux.dev>
Cc: Hao Li <hao.li@linux.dev>, Andrew Morton <akpm@linux-foundation.org>,
 Uladzislau Rezki <urezki@gmail.com>,
 "Liam R. Howlett" <Liam.Howlett@oracle.com>,
 Suren Baghdasaryan <surenb@google.com>,
 Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
 Alexei Starovoitov <ast@kernel.org>, linux-mm@kvack.org,
 linux-kernel@vger.kernel.org, linux-rt-devel@lists.linux.dev,
 bpf@vger.kernel.org, kasan-dev@googlegroups.com,
 Vlastimil Babka <vbabka@suse.cz>
X-Mailer: b4 0.14.3
X-Rspamd-Pre-Result: action=no action;
	module=replies;
	Message is reply to one we originated
X-Spamd-Result: default: False [-4.00 / 50.00];
	REPLY(-4.00)[];
	R_RATELIMIT(0.00)[to_ip_from(RLfsjnp7neds983g95ihcnuzgq)]
X-Spam-Flag: NO
X-Spam-Score: -4.00
X-Rspamd-Queue-Id: B6AB33376B
X-Rspamd-Pre-Result: action=no action;
	module=replies;
	Message is reply to one we originated
X-Rspamd-Action: no action
X-Rspamd-Server: rspamd2.dmz-prg2.suse.org
X-Spam-Level: 

Until now, kmem_cache->cpu_sheaves was !NULL only for caches with
sheaves enabled. Since we want to enable them for almost all caches,
it's suboptimal to test the pointer in the fast paths, so instead
allocate it for all caches in do_kmem_cache_create(). Instead of testing
the cpu_sheaves pointer to recognize caches (yet) without sheaves, test
kmem_cache->sheaf_capacity for being 0, where needed, using a new
cache_has_sheaves() helper.

However, for the fast paths sake we also assume that the main sheaf
always exists (pcs->main is !NULL), and during bootstrap we cannot
allocate sheaves yet.

Solve this by introducing a single static bootstrap_sheaf that's
assigned as pcs->main during bootstrap. It has a size of 0, so during
allocations, the fast path will find it's empty. Since the size of 0
matches sheaf_capacity of 0, the freeing fast paths will find it's
"full". In the slow path handlers, we use cache_has_sheaves() to
recognize that the cache doesn't (yet) have real sheaves, and fall back.
Thus sharing the single bootstrap sheaf like this for multiple caches
and cpus is safe.

Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Hao Li <hao.li@linux.dev>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
 mm/slab.h        |  12 ++++++
 mm/slab_common.c |   2 +-
 mm/slub.c        | 123 ++++++++++++++++++++++++++++++++++++---------------=
----
 3 files changed, 95 insertions(+), 42 deletions(-)

diff --git a/mm/slab.h b/mm/slab.h
index cb48ce5014ba..a20a6af6e0ef 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -277,6 +277,18 @@ struct kmem_cache {
 	struct kmem_cache_node *node[MAX_NUMNODES];
 };
=20
+/*
+ * Every cache has !NULL s->cpu_sheaves but they may point to the
+ * bootstrap_sheaf temporarily during init, or permanently for the boot ca=
ches
+ * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY.=
 This
+ * helper distinguishes whether cache has real non-bootstrap sheaves.
+ */
+static inline bool cache_has_sheaves(struct kmem_cache *s)
+{
+	/* Test CONFIG_SLUB_TINY for code elimination purposes */
+	return !IS_ENABLED(CONFIG_SLUB_TINY) && s->sheaf_capacity;
+}
+
 #if defined(CONFIG_SYSFS) && !defined(CONFIG_SLUB_TINY)
 #define SLAB_SUPPORTS_SYSFS 1
 void sysfs_slab_unlink(struct kmem_cache *s);
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 5c15a4ce5743..8d0d6b0cb896 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -2163,7 +2163,7 @@ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier);
  */
 void kvfree_rcu_barrier_on_cache(struct kmem_cache *s)
 {
-	if (s->cpu_sheaves) {
+	if (cache_has_sheaves(s)) {
 		flush_rcu_sheaves_on_cache(s);
 		rcu_barrier();
 	}
diff --git a/mm/slub.c b/mm/slub.c
index 594f5fac39b3..41e1bf35707c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2846,12 +2846,23 @@ static void pcs_destroy(struct kmem_cache *s)
 {
 	int cpu;
=20
+	/*
+	 * We may be unwinding cache creation that failed before or during the
+	 * allocation of this.
+	 */
+	if (!s->cpu_sheaves)
+		return;
+
+	/* pcs->main can only point to the bootstrap sheaf, nothing to free */
+	if (!cache_has_sheaves(s))
+		goto free_pcs;
+
 	for_each_possible_cpu(cpu) {
 		struct slub_percpu_sheaves *pcs;
=20
 		pcs =3D per_cpu_ptr(s->cpu_sheaves, cpu);
=20
-		/* can happen when unwinding failed create */
+		/* This can happen when unwinding failed cache creation. */
 		if (!pcs->main)
 			continue;
=20
@@ -2873,6 +2884,7 @@ static void pcs_destroy(struct kmem_cache *s)
 		}
 	}
=20
+free_pcs:
 	free_percpu(s->cpu_sheaves);
 	s->cpu_sheaves =3D NULL;
 }
@@ -4030,7 +4042,7 @@ static bool has_pcs_used(int cpu, struct kmem_cache *=
s)
 {
 	struct slub_percpu_sheaves *pcs;
=20
-	if (!s->cpu_sheaves)
+	if (!cache_has_sheaves(s))
 		return false;
=20
 	pcs =3D per_cpu_ptr(s->cpu_sheaves, cpu);
@@ -4052,7 +4064,7 @@ static void flush_cpu_slab(struct work_struct *w)
=20
 	s =3D sfw->s;
=20
-	if (s->cpu_sheaves)
+	if (cache_has_sheaves(s))
 		pcs_flush_all(s);
=20
 	flush_this_cpu_slab(s);
@@ -4157,7 +4169,7 @@ void flush_all_rcu_sheaves(void)
 	mutex_lock(&slab_mutex);
=20
 	list_for_each_entry(s, &slab_caches, list) {
-		if (!s->cpu_sheaves)
+		if (!cache_has_sheaves(s))
 			continue;
 		flush_rcu_sheaves_on_cache(s);
 	}
@@ -4179,7 +4191,7 @@ static int slub_cpu_dead(unsigned int cpu)
 	mutex_lock(&slab_mutex);
 	list_for_each_entry(s, &slab_caches, list) {
 		__flush_cpu_slab(s, cpu);
-		if (s->cpu_sheaves)
+		if (cache_has_sheaves(s))
 			__pcs_flush_all_cpu(s, cpu);
 	}
 	mutex_unlock(&slab_mutex);
@@ -4979,6 +4991,12 @@ __pcs_replace_empty_main(struct kmem_cache *s, struc=
t slub_percpu_sheaves *pcs,
=20
 	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
=20
+	/* Bootstrap or debug cache, back off */
+	if (unlikely(!cache_has_sheaves(s))) {
+		local_unlock(&s->cpu_sheaves->lock);
+		return NULL;
+	}
+
 	if (pcs->spare && pcs->spare->size > 0) {
 		swap(pcs->main, pcs->spare);
 		return pcs;
@@ -5165,6 +5183,11 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *=
s, size_t size, void **p)
 		struct slab_sheaf *full;
 		struct node_barn *barn;
=20
+		if (unlikely(!cache_has_sheaves(s))) {
+			local_unlock(&s->cpu_sheaves->lock);
+			return allocated;
+		}
+
 		if (pcs->spare && pcs->spare->size > 0) {
 			swap(pcs->main, pcs->spare);
 			goto do_alloc;
@@ -5244,8 +5267,7 @@ static __fastpath_inline void *slab_alloc_node(struct=
 kmem_cache *s, struct list
 	if (unlikely(object))
 		goto out;
=20
-	if (s->cpu_sheaves)
-		object =3D alloc_from_pcs(s, gfpflags, node);
+	object =3D alloc_from_pcs(s, gfpflags, node);
=20
 	if (!object)
 		object =3D __slab_alloc_node(s, gfpflags, node, addr, orig_size);
@@ -5353,18 +5375,10 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_=
t gfp, unsigned int size)
 	struct slab_sheaf *sheaf =3D NULL;
 	struct node_barn *barn;
=20
-	if (unlikely(size > s->sheaf_capacity)) {
+	if (unlikely(!size))
+		return NULL;
=20
-		/*
-		 * slab_debug disables cpu sheaves intentionally so all
-		 * prefilled sheaves become "oversize" and we give up on
-		 * performance for the debugging. Same with SLUB_TINY.
-		 * Creating a cache without sheaves and then requesting a
-		 * prefilled sheaf is however not expected, so warn.
-		 */
-		WARN_ON_ONCE(s->sheaf_capacity =3D=3D 0 &&
-			     !IS_ENABLED(CONFIG_SLUB_TINY) &&
-			     !(s->flags & SLAB_DEBUG_FLAGS));
+	if (unlikely(size > s->sheaf_capacity)) {
=20
 		sheaf =3D kzalloc(struct_size(sheaf, objects, size), gfp);
 		if (!sheaf)
@@ -6082,6 +6096,12 @@ __pcs_replace_full_main(struct kmem_cache *s, struct=
 slub_percpu_sheaves *pcs)
 restart:
 	lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock));
=20
+	/* Bootstrap or debug cache, back off */
+	if (unlikely(!cache_has_sheaves(s))) {
+		local_unlock(&s->cpu_sheaves->lock);
+		return NULL;
+	}
+
 	barn =3D get_barn(s);
 	if (!barn) {
 		local_unlock(&s->cpu_sheaves->lock);
@@ -6295,6 +6315,12 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *o=
bj)
 		struct slab_sheaf *empty;
 		struct node_barn *barn;
=20
+		/* Bootstrap or debug cache, fall back */
+		if (unlikely(!cache_has_sheaves(s))) {
+			local_unlock(&s->cpu_sheaves->lock);
+			goto fail;
+		}
+
 		if (pcs->spare && pcs->spare->size =3D=3D 0) {
 			pcs->rcu_free =3D pcs->spare;
 			pcs->spare =3D NULL;
@@ -6691,9 +6717,8 @@ void slab_free(struct kmem_cache *s, struct slab *sla=
b, void *object,
 	if (unlikely(!slab_free_hook(s, object, slab_want_init_on_free(s), false)=
))
 		return;
=20
-	if (s->cpu_sheaves && likely(!IS_ENABLED(CONFIG_NUMA) ||
-				     slab_nid(slab) =3D=3D numa_mem_id())
-			   && likely(!slab_test_pfmemalloc(slab))) {
+	if (likely(!IS_ENABLED(CONFIG_NUMA) || slab_nid(slab) =3D=3D numa_mem_id(=
))
+	    && likely(!slab_test_pfmemalloc(slab))) {
 		if (likely(free_to_pcs(s, object)))
 			return;
 	}
@@ -7396,7 +7421,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_=
t size, void **p)
 	 * freeing to sheaves is so incompatible with the detached freelist so
 	 * once we go that way, we have to do everything differently
 	 */
-	if (s && s->cpu_sheaves) {
+	if (s && cache_has_sheaves(s)) {
 		free_to_pcs_bulk(s, size, p);
 		return;
 	}
@@ -7507,8 +7532,7 @@ int kmem_cache_alloc_bulk_noprof(struct kmem_cache *s=
, gfp_t flags, size_t size,
 		size--;
 	}
=20
-	if (s->cpu_sheaves)
-		i =3D alloc_from_pcs_bulk(s, size, p);
+	i =3D alloc_from_pcs_bulk(s, size, p);
=20
 	if (i < size) {
 		/*
@@ -7719,6 +7743,7 @@ static inline int alloc_kmem_cache_cpus(struct kmem_c=
ache *s)
=20
 static int init_percpu_sheaves(struct kmem_cache *s)
 {
+	static struct slab_sheaf bootstrap_sheaf =3D {};
 	int cpu;
=20
 	for_each_possible_cpu(cpu) {
@@ -7728,7 +7753,28 @@ static int init_percpu_sheaves(struct kmem_cache *s)
=20
 		local_trylock_init(&pcs->lock);
=20
-		pcs->main =3D alloc_empty_sheaf(s, GFP_KERNEL);
+		/*
+		 * Bootstrap sheaf has zero size so fast-path allocation fails.
+		 * It has also size =3D=3D s->sheaf_capacity, so fast-path free
+		 * fails. In the slow paths we recognize the situation by
+		 * checking s->sheaf_capacity. This allows fast paths to assume
+		 * s->cpu_sheaves and pcs->main always exists and are valid.
+		 * It's also safe to share the single static bootstrap_sheaf
+		 * with zero-sized objects array as it's never modified.
+		 *
+		 * Bootstrap_sheaf also has NULL pointer to kmem_cache so we
+		 * recognize it and not attempt to free it when destroying the
+		 * cache.
+		 *
+		 * We keep bootstrap_sheaf for kmem_cache and kmem_cache_node,
+		 * caches with debug enabled, and all caches with SLUB_TINY.
+		 * For kmalloc caches it's used temporarily during the initial
+		 * bootstrap.
+		 */
+		if (!s->sheaf_capacity)
+			pcs->main =3D &bootstrap_sheaf;
+		else
+			pcs->main =3D alloc_empty_sheaf(s, GFP_KERNEL);
=20
 		if (!pcs->main)
 			return -ENOMEM;
@@ -7803,8 +7849,7 @@ static void free_kmem_cache_nodes(struct kmem_cache *=
s)
 void __kmem_cache_release(struct kmem_cache *s)
 {
 	cache_random_seq_destroy(s);
-	if (s->cpu_sheaves)
-		pcs_destroy(s);
+	pcs_destroy(s);
 #ifdef CONFIG_PREEMPT_RT
 	if (s->cpu_slab)
 		lockdep_unregister_key(&s->lock_key);
@@ -7826,7 +7871,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 			continue;
 		}
=20
-		if (s->cpu_sheaves) {
+		if (cache_has_sheaves(s)) {
 			barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
=20
 			if (!barn)
@@ -8149,7 +8194,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
 	flush_all_cpus_locked(s);
=20
 	/* we might have rcu sheaves in flight */
-	if (s->cpu_sheaves)
+	if (cache_has_sheaves(s))
 		rcu_barrier();
=20
 	/* Attempt to free all objects */
@@ -8461,7 +8506,7 @@ static int slab_mem_going_online_callback(int nid)
 		if (get_node(s, nid))
 			continue;
=20
-		if (s->cpu_sheaves) {
+		if (cache_has_sheaves(s)) {
 			barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
=20
 			if (!barn) {
@@ -8669,12 +8714,10 @@ int do_kmem_cache_create(struct kmem_cache *s, cons=
t char *name,
=20
 	set_cpu_partial(s);
=20
-	if (s->sheaf_capacity) {
-		s->cpu_sheaves =3D alloc_percpu(struct slub_percpu_sheaves);
-		if (!s->cpu_sheaves) {
-			err =3D -ENOMEM;
-			goto out;
-		}
+	s->cpu_sheaves =3D alloc_percpu(struct slub_percpu_sheaves);
+	if (!s->cpu_sheaves) {
+		err =3D -ENOMEM;
+		goto out;
 	}
=20
 #ifdef CONFIG_NUMA
@@ -8693,11 +8736,9 @@ int do_kmem_cache_create(struct kmem_cache *s, const=
 char *name,
 	if (!alloc_kmem_cache_cpus(s))
 		goto out;
=20
-	if (s->cpu_sheaves) {
-		err =3D init_percpu_sheaves(s);
-		if (err)
-			goto out;
-	}
+	err =3D init_percpu_sheaves(s);
+	if (err)
+		goto out;
=20
 	err =3D 0;
=20

--=20
2.52.0