From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C93D83F9292 for ; Fri, 15 May 2026 16:24:41 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862281; cv=none; b=rFlg3LJLkF+wSr2BV/kc2PKZoCaOIKqoUmnCnf0/h7E7GGXgyt/SFzmU+LlDpxcNTSorqujvwdXTNgP7uvIcgBG0H+JrMe19cZo2VRN6JHpOwAJpax4vmua4ai9N43YKzj34gOjdAsL2k8pig/0xRkV7vj4HwDRrT42iWXiImvM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862281; c=relaxed/simple; bh=EYPxp3BS503vPYeqgCva7KZMCxYvgQlPQAcZTIN2fQA=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=dRQ+kddAM4ridlDK74kUOl0SG6pcrQ/HVzVk1Aj4IcFRBphDdoMKc/uQ07z2oKpBQ8qGj/Adli2p2H/qUl7ZoiZbIe0dQRjGQZE59fkMzxQu4ZNI+yV9ivRdRkyVMmiQ4mz4XT7CGIoU0cWCIYEAMOTnLgC9GVcF0jghTXefS6M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=NHj4PGGj; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NHj4PGGj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B05EFC2BCB3; Fri, 15 May 2026 16:24:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862281; bh=EYPxp3BS503vPYeqgCva7KZMCxYvgQlPQAcZTIN2fQA=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=NHj4PGGjWVqj4heBlOwpnCF8Lu2CxX7rp0GRoOUwVffhmaROBnA665SPrxppjDVyh S8tr9O1BkG1+pHvszW1CKnFNhQZFb70OkMgKicuWRsnEd/Egz24qMPdCku9T87mAcr iNGRZsUQJOhO/PuutCm9BSfBdJe8+Qo5Ek+VwMXMxpesqmVyGwZin7jvVJ3wRH1s/B bTtyL1I5hMwY+O8k9Vuk6JvZliEOre0/CnWysb1XkngTVhZHwmFSFA8gqDaS4qGlKg KLQNhFUCaZj0d5d9vWusw4b+jsMG8jP4flA+Nwg9+xP6ZzLcfpFAXF0Eqm0TOYyNdt 7FOGiwx/9iG+A== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:25 +0900 Subject: [PATCH RFC 1/8] mm/slab: do not store cache pointer in struct slab_sheaf Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-1-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev The `cache` field of struct slab_sheaf is only read on the slow path when freeing an RCU sheaf. Storing it in every sheaf is an overkill. Drop the field. In rcu_free_sheaf() and rcu_free_sheaf_nobarn(), fetch the kmem_cache pointer via virt_to_slab(sheaf->objects[0])->slab_cache instead. As sheaf is only attached to pcs->rcu_free once it holds at least one object, the lookup is safe. Add a WARN_ON_ONCE() in case an empty sheaf ever reaches the RCU free path. In that case, the cache is unknown, so free_empty_sheaf() now tolerates a NULL cache argument. However, the case is never expected to trigger. While at it, remove the stale comment in init_percpu_sheaves(). Signed-off-by: Harry Yoo (Oracle) Reviewed-by: Hao Li --- mm/slub.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 5ef54d546bc2..75281eb802de 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -422,7 +422,6 @@ struct slab_sheaf { bool pfmemalloc; }; }; - struct kmem_cache *cache; unsigned int size; int node; /* only used for rcu_sheaf */ void *objects[]; @@ -2781,8 +2780,6 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct = kmem_cache *s, gfp_t gfp, if (unlikely(!sheaf)) return NULL; =20 - sheaf->cache =3D s; - stat(s, SHEAF_ALLOC); =20 return sheaf; @@ -2802,13 +2799,14 @@ static void free_empty_sheaf(struct kmem_cache *s, = struct slab_sheaf *sheaf) * warning, therefore replace NULL with CODETAG_EMPTY to indicate * that the extension for this sheaf is expected to be NULL. */ - if (s->flags & SLAB_KMALLOC) + if (s && (s->flags & SLAB_KMALLOC)) mark_obj_codetag_empty(sheaf); =20 VM_WARN_ON_ONCE(sheaf->size > 0); kfree(sheaf); =20 - stat(s, SHEAF_FREE); + if (s) + stat(s, SHEAF_FREE); } =20 static unsigned int @@ -2968,12 +2966,15 @@ static void rcu_free_sheaf_nobarn(struct rcu_head *= head) struct kmem_cache *s; =20 sheaf =3D container_of(head, struct slab_sheaf, rcu_head); - s =3D sheaf->cache; + if (WARN_ON_ONCE(!sheaf->size)) { + free_empty_sheaf(NULL, sheaf); + return; + } =20 + s =3D virt_to_slab(sheaf->objects[0])->slab_cache; __rcu_free_sheaf_prepare(s, sheaf); =20 sheaf_flush_unused(s, sheaf); - free_empty_sheaf(s, sheaf); } =20 @@ -5019,7 +5020,6 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t = gfp, unsigned int size) return NULL; =20 stat(s, SHEAF_PREFILL_OVERSIZE); - sheaf->cache =3D s; sheaf->capacity =3D size; =20 /* @@ -5873,8 +5873,12 @@ static void rcu_free_sheaf(struct rcu_head *head) struct kmem_cache *s; =20 sheaf =3D container_of(head, struct slab_sheaf, rcu_head); + if (WARN_ON_ONCE(!sheaf->size)) { + free_empty_sheaf(NULL, sheaf); + return; + } =20 - s =3D sheaf->cache; + s =3D virt_to_slab(sheaf->objects[0])->slab_cache; =20 /* * This may remove some objects due to slab_free_hook() returning false, @@ -7616,10 +7620,6 @@ static int init_percpu_sheaves(struct kmem_cache *s) * It's also safe to share the single static bootstrap_sheaf * with zero-sized objects array as it's never modified. * - * Bootstrap_sheaf also has NULL pointer to kmem_cache so we - * recognize it and not attempt to free it when destroying the - * cache. - * * We keep bootstrap_sheaf for kmem_cache and kmem_cache_node, * caches with debug enabled, and all caches with SLUB_TINY. * For kmalloc caches it's used temporarily during the initial --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 453D83FD94E for ; Fri, 15 May 2026 16:24:44 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862284; cv=none; b=GDbUXd//5PV4FBwaguidl/+oYMhzYASmoHqkYUJL2pHFylszJ9r/7/b81QcCOhiItVAz7TZ1PW7lG9Nkydlxxk3H0FihO51NEh6F3hRAKqpyaLqlOiR0KgBa3/BQbIy+6ssWeVdZs7IVg0ek+xXcjtUhqkJzZ/UCnSlfON6PvkM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862284; c=relaxed/simple; bh=mY8sogTT47E6WZnzv6R2mVXWjFcYPBLJkIVpwMtmkQE=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=GloqoAF2NLzYDUkZh8rslLxXFNmla3SusU8HoxohgvmCIJ3TBypSMaU4aAjndq5kvhCXpabaa4yX+llR41nP8LLaYqFfEs5R+RtiSgXAPYvRvst8wzUknhZjVwtf9jleTD6eGFHGnQ9CehMiYVnEKRXXzVt2fJ5W6jBece2SKy0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Wx/YP4Wt; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Wx/YP4Wt" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 218F8C2BCB0; Fri, 15 May 2026 16:24:41 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862284; bh=mY8sogTT47E6WZnzv6R2mVXWjFcYPBLJkIVpwMtmkQE=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Wx/YP4WtDdwRnvDlY399gmxLFZiDLoRGb/plBPW4wK5meiZ3U2FmWXf/1l302m3yl 41NDa29HVdmjV0DrZqnijBjdtLPkddwjXNeQeVcHYOc6HmNrWJYohoBELzDNfijtN6 zi8d/I27IaLcPG8sDhGZK4AvsKb1MtzPErOn67V8LKUQYkebeMWfISSkeaQ6/0SMMe wKIXJTA0vs6EqslcxBh0grtD3olCa1sMm5dDNcP1u3tRDiIJoVanqLWBZWwGS1DZEa dkzwTyupqSuiZ859K4uG90/zJ61fEVjO0ZGrSwwSklJa+cuW5ckXarf7pYApvO8baP W+MzbE81x+YIA== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:26 +0900 Subject: [PATCH RFC 2/8] mm/slab: change sheaf_capacity type to unsigned short Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-2-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Change struct kmem_cache.sheaf_capacity and the matching kmem_cache_args field from unsigned int to unsigned short, so that we can add a new field later without growing the struct size. unsigned short is a reasonable size for any realistic configurations. Signed-off-by: Harry Yoo (Oracle) --- include/linux/slab.h | 8 ++++---- mm/slab.h | 2 +- mm/slub.c | 34 +++++++++++++++++----------------- tools/include/linux/slab.h | 14 +++++++------- tools/testing/shared/linux.c | 4 ++-- 5 files changed, 31 insertions(+), 31 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 2b5ab488e96b..6f023f04763a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -371,7 +371,7 @@ struct kmem_cache_args { * * %0 means no sheaves will be created. */ - unsigned int sheaf_capacity; + unsigned short sheaf_capacity; }; =20 struct kmem_cache *__kmem_cache_create_args(const char *name, @@ -828,10 +828,10 @@ void *kmem_cache_alloc_node_noprof(struct kmem_cache = *s, gfp_t flags, #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_nopro= f(__VA_ARGS__)) =20 struct slab_sheaf * -kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int siz= e); +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned short s= ize); =20 int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, - struct slab_sheaf **sheafp, unsigned int size); + struct slab_sheaf **sheafp, unsigned short size); =20 void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, struct slab_sheaf *sheaf); @@ -841,7 +841,7 @@ void *kmem_cache_alloc_from_sheaf_noprof(struct kmem_ca= che *cachep, gfp_t gfp, #define kmem_cache_alloc_from_sheaf(...) \ alloc_hooks(kmem_cache_alloc_from_sheaf_noprof(__VA_ARGS__)) =20 -unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf); +unsigned short kmem_cache_sheaf_size(struct slab_sheaf *sheaf); =20 /* * These macros allow declaring a kmem_buckets * parameter alongside size,= which diff --git a/mm/slab.h b/mm/slab.h index bf2f87acf5e3..dfbe73011cb8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -204,7 +204,7 @@ struct kmem_cache { unsigned int object_size; /* Object size without metadata */ struct reciprocal_value reciprocal_size; unsigned int offset; /* Free pointer offset */ - unsigned int sheaf_capacity; + unsigned short sheaf_capacity; struct kmem_cache_order_objects oo; =20 /* Allocation and freeing of slabs */ diff --git a/mm/slub.c b/mm/slub.c index 75281eb802de..a1974523bba9 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -418,11 +418,11 @@ struct slab_sheaf { struct list_head barn_list; /* only used for prefilled sheafs */ struct { - unsigned int capacity; + unsigned short capacity; bool pfmemalloc; }; }; - unsigned int size; + unsigned short size; int node; /* only used for rcu_sheaf */ void *objects[]; }; @@ -2756,7 +2756,7 @@ static inline void *setup_object(struct kmem_cache *s= , void *object) } =20 static struct slab_sheaf *__alloc_empty_sheaf(struct kmem_cache *s, gfp_t = gfp, - unsigned int capacity) + unsigned short capacity) { struct slab_sheaf *sheaf; size_t sheaf_size; @@ -2854,10 +2854,10 @@ static void __kmem_cache_free_bulk(struct kmem_cach= e *s, size_t size, void **p); * * Returns how many objects are remaining to be flushed */ -static unsigned int __sheaf_flush_main_batch(struct kmem_cache *s) +static unsigned short __sheaf_flush_main_batch(struct kmem_cache *s) { struct slub_percpu_sheaves *pcs; - unsigned int batch, remaining; + unsigned short batch, remaining; void *objects[PCS_BATCH_MAX]; struct slab_sheaf *sheaf; =20 @@ -2884,7 +2884,7 @@ static unsigned int __sheaf_flush_main_batch(struct k= mem_cache *s) =20 static void sheaf_flush_main(struct kmem_cache *s) { - unsigned int remaining; + unsigned short remaining; =20 do { local_lock(&s->cpu_sheaves->lock); @@ -2899,7 +2899,7 @@ static void sheaf_flush_main(struct kmem_cache *s) */ static bool sheaf_try_flush_main(struct kmem_cache *s) { - unsigned int remaining; + unsigned short remaining; bool ret =3D false; =20 do { @@ -4849,7 +4849,7 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s= , gfp_t gfp, size_t size, do_alloc: =20 main =3D pcs->main; - batch =3D min(size, main->size); + batch =3D min_t(size_t, size, main->size); =20 main->size -=3D batch; memcpy(p, main->objects + main->size, batch * sizeof(void *)); @@ -5004,7 +5004,7 @@ static int __kmem_cache_alloc_bulk(struct kmem_cache = *s, gfp_t flags, * return NULL if sheaf allocation or prefilling failed */ struct slab_sheaf * -kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int siz= e) +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned short s= ize) { struct slub_percpu_sheaves *pcs; struct slab_sheaf *sheaf =3D NULL; @@ -5146,7 +5146,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gf= p_t gfp, * In practice we always refill to full sheaf's capacity. */ int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, - struct slab_sheaf **sheafp, unsigned int size) + struct slab_sheaf **sheafp, unsigned short size) { struct slab_sheaf *sheaf; =20 @@ -5225,7 +5225,7 @@ kmem_cache_alloc_from_sheaf_noprof(struct kmem_cache = *s, gfp_t gfp, return ret; } =20 -unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +unsigned short kmem_cache_sheaf_size(struct slab_sheaf *sheaf) { return sheaf->size; } @@ -6172,7 +6172,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) =20 do_free: main =3D pcs->main; - batch =3D min(size, s->sheaf_capacity - main->size); + batch =3D min_t(size_t, size, s->sheaf_capacity - main->size); =20 memcpy(main->objects + main->size, p, batch * sizeof(void *)); main->size +=3D batch; @@ -7759,11 +7759,11 @@ static int init_kmem_cache_nodes(struct kmem_cache = *s) return 1; } =20 -static unsigned int calculate_sheaf_capacity(struct kmem_cache *s, - struct kmem_cache_args *args) +static unsigned short calculate_sheaf_capacity(struct kmem_cache *s, + struct kmem_cache_args *args) =20 { - unsigned int capacity; + unsigned short capacity; size_t size; =20 =20 @@ -8466,7 +8466,7 @@ static struct kmem_cache * __init bootstrap(struct km= em_cache *static_cache) static void __init bootstrap_cache_sheaves(struct kmem_cache *s) { struct kmem_cache_args empty_args =3D {}; - unsigned int capacity; + unsigned short capacity; bool failed =3D false; int node, cpu; =20 @@ -9091,7 +9091,7 @@ SLAB_ATTR_RO(order); =20 static ssize_t sheaf_capacity_show(struct kmem_cache *s, char *buf) { - return sysfs_emit(buf, "%u\n", s->sheaf_capacity); + return sysfs_emit(buf, "%hu\n", s->sheaf_capacity); } SLAB_ATTR_RO(sheaf_capacity); =20 diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h index 6d8e9413d5a4..76d0b9da6cfe 100644 --- a/tools/include/linux/slab.h +++ b/tools/include/linux/slab.h @@ -47,7 +47,7 @@ struct kmem_cache { pthread_mutex_t lock; unsigned int size; unsigned int align; - unsigned int sheaf_capacity; + unsigned short sheaf_capacity; int nr_objs; void *objs; void (*ctor)(void *); @@ -70,7 +70,7 @@ struct kmem_cache_args { /** * @sheaf_capacity: The maximum size of the sheaf. */ - unsigned int sheaf_capacity; + unsigned short sheaf_capacity; /** * @useroffset: Usercopy region offset. * @@ -127,10 +127,10 @@ struct slab_sheaf { union { struct list_head barn_list; /* only used for prefilled sheafs */ - unsigned int capacity; + unsigned short capacity; }; struct kmem_cache *cache; - unsigned int size; + unsigned short size; int node; /* only used for rcu_sheaf */ void *objects[]; }; @@ -186,7 +186,7 @@ void kmem_cache_free_bulk(struct kmem_cache *cachep, si= ze_t size, void **list); int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t siz= e, void **list); struct slab_sheaf * -kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int siz= e); +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned short s= ize); =20 void * kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp, @@ -195,9 +195,9 @@ kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t= gfp, void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp, struct slab_sheaf *sheaf); int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, - struct slab_sheaf **sheafp, unsigned int size); + struct slab_sheaf **sheafp, unsigned short size); =20 -static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf) +static inline unsigned short kmem_cache_sheaf_size(struct slab_sheaf *shea= f) { return sheaf->size; } diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 8c7257155958..2da3a6617d87 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -252,7 +252,7 @@ __kmem_cache_create_args(const char *name, unsigned int= size, } =20 struct slab_sheaf * -kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int siz= e) +kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned short s= ize) { struct slab_sheaf *sheaf; unsigned int capacity; @@ -281,7 +281,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gf= p, unsigned int size) } =20 int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp, - struct slab_sheaf **sheafp, unsigned int size) + struct slab_sheaf **sheafp, unsigned short size) { struct slab_sheaf *sheaf =3D *sheafp; int refill; --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A4EE73FF1D8 for ; Fri, 15 May 2026 16:24:46 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862286; cv=none; b=VImflEfJ2NT/WcfzTSGFdp/AWFd0WgnTjwXS7KZBhBiVPnO1hqkeCt8F6jk+6gUoC38hd98AeA4ESfVRD28uXWQhlhyvjoD7+Cf1Yi9iEjEsHyMyuyYcYvmyAq4tfOPReY+9vECffD2pk4iXliKlYGUlw9X+NyP5aW/y0oNTzLc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862286; c=relaxed/simple; bh=n8roWf7IiSB3Cp7Yv9UEETQTWbilYSL2KXuyP3eF33Y=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=rWZqRHWO0rUe/R2J6KTHDrbwcT/8XHBZrd0xPWZ2CPAPOkUeGRDFG0FsCeWUwmJI90IbSrV5Mwwz8iTosWutQKpdRH1Lomkl5dgVlZn6nlZumnwzKTlWjB9G6VK5NZl1je3dzsK2vsu/7H30/eC7zcNR6s8fTbfaf8xR+K1m9y0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=eJWAZeyo; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="eJWAZeyo" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8423DC2BCB0; Fri, 15 May 2026 16:24:44 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862286; bh=n8roWf7IiSB3Cp7Yv9UEETQTWbilYSL2KXuyP3eF33Y=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=eJWAZeyo/z3+nAssoq1IiCClb1Yh4ZM4vmqc0mW1Gep3eYOUhL8PFm77TbOUlgeG1 BG8DVRvBDu0hnBjONljT94jzp9iWd2R+y7pTPYdR3NoDAck08rwJ6SxvSbk6I0YKaf fiX7WKYgD3dVkOhyWqiCvnr9MbialyZydP2x67YjY+YX5bhJYGSCf726SKLN6AudvR lDE8VIS5iPd/YQOnHjtHhVy88qsQXjBw2WoGcsiqYuVMkX7HSHmtL4HdeK0IVfbNOW Ud+MfKejBmlxfwrGudKM+HAyLlgtqEgt/A7FQfOHePI+xnHbeDCkNP8XwlfBsIGCFX rM5Snaz55NGJw== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:27 +0900 Subject: [PATCH RFC 3/8] mm/slab: track capacity per sheaf Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-3-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Currently, only prefilled sheaves have a capacity field, used to record the requested (possibly oversized) capacity. To allow changing sheaf capacity at runtime, track the capacity for each sheaf so that checking if a sheaf is full would work even when changing cache capacity concurrently. Signed-off-by: Harry Yoo (Oracle) --- mm/slub.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index a1974523bba9..44f36ae32570 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -417,11 +417,9 @@ struct slab_sheaf { struct rcu_head rcu_head; struct list_head barn_list; /* only used for prefilled sheafs */ - struct { - unsigned short capacity; - bool pfmemalloc; - }; + bool pfmemalloc; }; + unsigned short capacity; unsigned short size; int node; /* only used for rcu_sheaf */ void *objects[]; @@ -2780,6 +2778,8 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct = kmem_cache *s, gfp_t gfp, if (unlikely(!sheaf)) return NULL; =20 + sheaf->capacity =3D capacity; + stat(s, SHEAF_ALLOC); =20 return sheaf; @@ -2816,7 +2816,7 @@ refill_objects(struct kmem_cache *s, void **p, gfp_t = gfp, unsigned int min, static int refill_sheaf(struct kmem_cache *s, struct slab_sheaf *sheaf, gfp_t gfp) { - int to_fill =3D s->sheaf_capacity - sheaf->size; + int to_fill =3D sheaf->capacity - sheaf->size; int filled; =20 if (!to_fill) @@ -5063,7 +5063,6 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t = gfp, unsigned short size) sheaf =3D alloc_empty_sheaf(s, gfp); =20 if (sheaf) { - sheaf->capacity =3D s->sheaf_capacity; sheaf->pfmemalloc =3D false; =20 if (sheaf->size < size && @@ -5688,13 +5687,13 @@ static void __pcs_install_empty_sheaf(struct kmem_c= ache *s, * Unlikely because if the main sheaf had space, we would have just * freed to it. Get rid of our empty sheaf. */ - if (pcs->main->size < s->sheaf_capacity) { + if (pcs->main->size < pcs->main->capacity) { barn_put_empty_sheaf(barn, empty); return; } =20 /* Also unlikely for the same reason */ - if (pcs->spare->size < s->sheaf_capacity) { + if (pcs->spare->size < pcs->spare->capacity) { swap(pcs->main, pcs->spare); barn_put_empty_sheaf(barn, empty); return; @@ -5752,7 +5751,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct = slub_percpu_sheaves *pcs, goto alloc_empty; } =20 - if (pcs->spare->size < s->sheaf_capacity) { + if (pcs->spare->size < pcs->spare->capacity) { swap(pcs->main, pcs->spare); return pcs; } @@ -5819,7 +5818,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct = slub_percpu_sheaves *pcs, * but in case we got preempted or migrated, we need to * check again */ - if (pcs->main->size =3D=3D s->sheaf_capacity) + if (pcs->main->size =3D=3D pcs->main->capacity) goto restart; =20 return pcs; @@ -5850,7 +5849,7 @@ bool free_to_pcs(struct kmem_cache *s, void *object, = bool allow_spin) =20 pcs =3D this_cpu_ptr(s->cpu_sheaves); =20 - if (unlikely(pcs->main->size =3D=3D s->sheaf_capacity)) { + if (unlikely(pcs->main->size =3D=3D pcs->main->capacity)) { =20 pcs =3D __pcs_replace_full_main(s, pcs, allow_spin); if (unlikely(!pcs)) @@ -6015,7 +6014,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *ob= j) */ rcu_sheaf->objects[rcu_sheaf->size++] =3D obj; =20 - if (likely(rcu_sheaf->size < s->sheaf_capacity)) { + if (likely(rcu_sheaf->size < rcu_sheaf->capacity)) { rcu_sheaf =3D NULL; } else { pcs->rcu_free =3D NULL; @@ -6139,7 +6138,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) =20 pcs =3D this_cpu_ptr(s->cpu_sheaves); =20 - if (likely(pcs->main->size < s->sheaf_capacity)) + if (likely(pcs->main->size < pcs->main->capacity)) goto do_free; =20 barn =3D get_barn(s); @@ -6156,7 +6155,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) goto do_free; } =20 - if (pcs->spare->size < s->sheaf_capacity) { + if (pcs->spare->size < pcs->spare->capacity) { swap(pcs->main, pcs->spare); goto do_free; } @@ -6172,7 +6171,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) =20 do_free: main =3D pcs->main; - batch =3D min_t(size_t, size, s->sheaf_capacity - main->size); + batch =3D min_t(size_t, size, main->capacity - main->size); =20 memcpy(main->objects + main->size, p, batch * sizeof(void *)); main->size +=3D batch; @@ -7613,7 +7612,7 @@ static int init_percpu_sheaves(struct kmem_cache *s) =20 /* * Bootstrap sheaf has zero size so fast-path allocation fails. - * It has also size =3D=3D s->sheaf_capacity, so fast-path free + * It has also size =3D=3D sheaf->capacity, so fast-path free * fails. In the slow paths we recognize the situation by * checking s->sheaf_capacity. This allows fast paths to assume * s->cpu_sheaves and pcs->main always exists and are valid. --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1B49E3B9D91 for ; Fri, 15 May 2026 16:24:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862290; cv=none; b=STYxyCzz5S4iE6wLNFsszwDjv9LDCt1aWlcfl2LqztW1jJcdJvmauRiZQNQcZGEELoRiWC6fubvY1LwkPD3u+IYtzuMguc/T0uPzuvJ2hhvQpgfoXjnn1HTUCAMwcGGgkWZEP7YuI8Eb4olY4OHe4+EJj7U0GVbaTiAN9tgv7CA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862290; c=relaxed/simple; bh=p7E51ELkjQ3iEfoGMpm7cOapjWiciEbqUEy1lvQfStY=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=qpJ669Jrf9vpZ8g+sa65iCqlpl1XxRPaagV4AIRVOCgW8BwYp0KGZ9qHPBEhMM2iwTwQFJ+qC8sAZdd8xv1bo1DJC+eaoy6gLxk7m82ePoNsvTFqVN6KlfzPfNRrXqQxqq3l7AtZUl7tQx8ZAgXfAFBFli3nyu27i4Q2tlQ5I74= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Jdj/KuQR; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Jdj/KuQR" Received: by smtp.kernel.org (Postfix) with ESMTPSA id F31E1C2BCB0; Fri, 15 May 2026 16:24:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862289; bh=p7E51ELkjQ3iEfoGMpm7cOapjWiciEbqUEy1lvQfStY=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Jdj/KuQRERrRriQ2EslwPMUFvOg+0uZhQtj1KSxohVCK8GzSivJVkseWGyo5e80cV ZLrOgfCArxxH15U6/ohQWFmKr4zW611h7XtwPEzcy4+yLxLhjFR5d0LXXNfCK85G/6 ng/+GdVfghISC2xEnU+pO9vS9rC4+XHFmT6CQlua/YL78ZFlR4FjH9izhhwCjPHfzs GIo4x/TuGKBYrAfS/rGwrrMjbDF3H03Z6cSruHE2PvTQnT5Yequs/F8EAc6EVFi/Yj nW3sk3Jq9AE0dbYOQ+6u2e3LDtxlKt4WD0V9BLOdVmr3YtAGChkf6gfnJeNzWiDqAw 9bHNO0t7hCizw== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:28 +0900 Subject: [PATCH RFC 4/8] mm/slab: allow bootstrap_cache_sheaves() to fail Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-4-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Panicking on sheaf allocation failure is acceptable during boot, but to allow changing the sheaf capacity at runtime, the bootstrap path must be able to propagate errors instead. Return an error code from bootstrap_cache_sheaves() so callers can decide how to react. Change it to return an int (0 on success, negative errno on failure), accept capacity as a parameter, and drop __init. Callers without a user-specified capacity pass zero to use the default capacity calculated by the slab allocator. Failures are now handled by the caller. Signed-off-by: Harry Yoo (Oracle) --- mm/slub.c | 46 ++++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 44f36ae32570..fb98d0da5c78 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -8462,18 +8462,18 @@ static struct kmem_cache * __init bootstrap(struct = kmem_cache *static_cache) * init_kmem_cache_nodes(). For normal kmalloc caches we have to bootstrap= it * since sheaves and barns are allocated by kmalloc. */ -static void __init bootstrap_cache_sheaves(struct kmem_cache *s) +static int bootstrap_cache_sheaves(struct kmem_cache *s, + unsigned short capacity) { struct kmem_cache_args empty_args =3D {}; - unsigned short capacity; - bool failed =3D false; - int node, cpu; + int node, cpu, err =3D 0; =20 - capacity =3D calculate_sheaf_capacity(s, &empty_args); + if (!capacity) + capacity =3D calculate_sheaf_capacity(s, &empty_args); =20 /* capacity can be 0 due to debugging or SLUB_TINY */ if (!capacity) - return; + return 0; =20 for_each_node_mask(node, slab_barn_nodes) { struct node_barn *barn; @@ -8481,7 +8481,7 @@ static void __init bootstrap_cache_sheaves(struct kme= m_cache *s) barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, node); =20 if (!barn) { - failed =3D true; + err =3D -ENOMEM; goto out; } =20 @@ -8497,31 +8497,37 @@ static void __init bootstrap_cache_sheaves(struct k= mem_cache *s) pcs->main =3D __alloc_empty_sheaf(s, GFP_KERNEL, capacity); =20 if (!pcs->main) { - failed =3D true; + err =3D -ENOMEM; break; } } =20 out: - /* - * It's still early in boot so treat this like same as a failure to - * create the kmalloc cache in the first place - */ - if (failed) - panic("Out of memory when creating kmem_cache %s\n", s->name); + if (!err) + s->sheaf_capacity =3D capacity; =20 - s->sheaf_capacity =3D capacity; + return err; } =20 +#define for_each_normal_kmalloc_cache(s, type, idx) \ + for (type =3D KMALLOC_NORMAL; type <=3D KMALLOC_RANDOM_END; type++) \ + for (idx =3D 0; idx < KMALLOC_SHIFT_HIGH + 1; idx++) \ + if ((s =3D kmalloc_caches[type][idx])) + static void __init bootstrap_kmalloc_sheaves(void) { enum kmalloc_cache_type type; + struct kmem_cache *s; + int idx; =20 - for (type =3D KMALLOC_NORMAL; type <=3D KMALLOC_RANDOM_END; type++) { - for (int idx =3D 0; idx < KMALLOC_SHIFT_HIGH + 1; idx++) { - if (kmalloc_caches[type][idx]) - bootstrap_cache_sheaves(kmalloc_caches[type][idx]); - } + for_each_normal_kmalloc_cache(s, type, idx) { + /* + * It's still early in boot so treat this as a failure to + * create the kmalloc cache in the first place. + */ + if (bootstrap_cache_sheaves(s, 0)) + panic("Out of memory when creating kmem_cache %s\n", + s->name); } } =20 --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C77E74BCAA0 for ; Fri, 15 May 2026 16:24:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862292; cv=none; b=bmQUcJvn2KFcSXh3yfa2TltIExnkHg7BYupbLm8Jbl9gPzIF1/KlfQRcGBAI47smIBB4s6Oz0CjIkGBEdppaxAiAg0sVspwiXVZag+DcaSwtco2FQ15Ik9JiU0H3+5gPsMR/3fEvwPpfoiguuUSpJUlPIlLcbogtF20W0TKzOs4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862292; c=relaxed/simple; bh=p4LeJcvkw7gKvzZRYzQC1mqfqJ4paQ5EfhboaJpEGYM=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=FDI6JjngDKV1N8wW90B1Xnd9LaAmDPjhyXltmvK2AslRtYKvVccqMxUudeSIl1pgEMKdSWpESo4nG+LVDKF1Fe6tyW+I/DYM5IVaAkIL+HDcd5/Nmk+A8Ph8b1YpWiBM6glKv/0zRcftJUd85hAxpp3QGaVQK44AxI6+Hb8EsRQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=jwqc+K5k; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="jwqc+K5k" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5853AC2BCB3; Fri, 15 May 2026 16:24:50 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862292; bh=p4LeJcvkw7gKvzZRYzQC1mqfqJ4paQ5EfhboaJpEGYM=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=jwqc+K5knGquBFlk7v1m1bJK4fatuMgjkzh0Mv5oYdtk11w86GoCLWcWY+Ssu/KkC LR3u/wxo+eBipQiZEZZek9LgPSg5OM9KqUzo+tQ1vQSbSSD0n0BenPdarA0WwGbHSZ pT0oZJWcBbq1le8iB4+KsL2QGOwqJd9Rju/GQKTTXOHYH2Vr3HMIQmpCKe2XIdA/Ho 6x090sA4Wori7E59MMNTgD5APQ+fhdqt9RQYYdwXYtcZqmQYKVFZDkQC9FPO95aS0H pm5b7tE6rnHNOsfT1nxGPTY3b1RILgESS6RnukGMB0pVWkq90Y+SFpt3wgyZFfCOCv HMlSHLbNmf9iQ== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:29 +0900 Subject: [PATCH RFC 5/8] mm/slab: rework cache_has_sheaves() to check immutable properties only Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-5-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Currently the sheaf capacity is determined when a cache is created and never changes, with normal kmalloc caches as the only exception. Checking whether s->sheaf_capacity is non-zero is therefore sufficient for cache_has_sheaves() to work correctly. However, once s->sheaf_capacity becomes mutable at runtime, both the name and the implementation become confusing and racy: a cache that currently has sheaves may have them disabled at runtime, or vice versa. Except for normal kmalloc caches, what callers of cache_has_sheaves() actually want to know depends only on properties that do not change: 1. Whether the cache has certain flags (SLAB_NO_OBJ_EXT, SLAB_NOLEAKTRACE, SLAB_DEBUG_FLAGS) 2. Whether a certain build option is enabled (CONFIG_SLUB_TINY) Since these never change at runtime, check them directly instead of going through s->sheaf_capacity. To avoid confusion, rename cache_has_sheaves() to cache_supports_sheaves(). Normal kmalloc caches need special handling. They don't have sheaves initially and only get them later via bootstrap_kmalloc_sheaves(). That said, cache_supports_sheaves() can return true while a cache's percpu sheaves still point at the shared bootstrap_sheaf. This special handling might sound like it applies only to normal kmalloc caches, but the same handling is needed when sheaf capacity can change. The existing callers of cache_has_sheaves() fall into two categories. The first category performs operations on the whole cache: kvfree_rcu barrier, cache destruction, sheaf flushing, and CPU/memory hot(un)plug. These should not skip caches that support sheaves, no matter whether they actually have sheaves. If such an operation actually needs to access percpu sheaves, use the new pcs_has_sheaves() helper to skip CPUs whose pcs->main points to the bootstrap_sheaf. The second category allocates from or frees to percpu sheaves directly (in the slowpath). These should confirm pcs_has_sheaves() returns true before proceeding. In addition, init_kmem_cache_nodes() skips barn allocation for normal kmalloc caches. Their barns are set up later by bootstrap_kmalloc_sheaves(). Change calculate_sheaf_capacity() to call cache_supports_sheaves() directly instead of open-coding the same conditions. Signed-off-by: Harry Yoo (Oracle) --- mm/slab.h | 36 ++++++++++++++++++++++++ mm/slab_common.c | 2 +- mm/slub.c | 85 ++++++++++++++++++++++++++++++++--------------------= ---- 3 files changed, 86 insertions(+), 37 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index dfbe73011cb8..907a8207809c 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -481,6 +481,42 @@ static inline bool kmem_cache_debug_flags(struct kmem_= cache *s, slab_flags_t fla return false; } =20 +static inline bool kmem_cache_debug(struct kmem_cache *s) +{ + return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); +} + +/* + * Every cache has !NULL s->cpu_sheaves but they may point to the + * bootstrap_sheaf temporarily during init, or permanently for the boot ca= ches + * and caches with debugging enabled, or all caches with CONFIG_SLUB_TINY.= This + * helper distinguishes whether cache supports real non-bootstrap sheaves. + * + * Return false when the cache does not support sheaves. + * + * When it returns true, the cache may or may not have sheaves. + * Callers who access percpu sheaves must verify that they actually have + * sheaves enabled. + */ +static inline bool cache_supports_sheaves(struct kmem_cache *s) +{ + if (IS_ENABLED(CONFIG_SLUB_TINY)) + return false; + + if (kmem_cache_debug(s)) + return false; + /* + * Bootstrap caches can't have sheaves for now (SLAB_NO_OBJ_EXT). + * SLAB_NOLEAKTRACE caches (e.g., kmemleak's object_cache) must not + * have sheaves to avoid recursion when sheaf allocation triggers + * kmemleak tracking. + */ + if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE)) + return false; + + return true; +} + #if IS_ENABLED(CONFIG_SLUB_DEBUG) && IS_ENABLED(CONFIG_KUNIT) bool slab_in_kunit_test(void); #else diff --git a/mm/slab_common.c b/mm/slab_common.c index d5a70a831a2a..3092c1c3f284 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -2109,7 +2109,7 @@ EXPORT_SYMBOL_GPL(kvfree_rcu_barrier); */ void kvfree_rcu_barrier_on_cache(struct kmem_cache *s) { - if (cache_has_sheaves(s)) { + if (cache_supports_sheaves(s)) { flush_rcu_sheaves_on_cache(s); rcu_barrier(); } diff --git a/mm/slub.c b/mm/slub.c index fb98d0da5c78..c746c9b48728 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -238,11 +238,6 @@ struct slab_obj_iter { #endif }; =20 -static inline bool kmem_cache_debug(struct kmem_cache *s) -{ - return kmem_cache_debug_flags(s, SLAB_DEBUG_FLAGS); -} - void *fixup_red_left(struct kmem_cache *s, void *p) { if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) @@ -432,6 +427,23 @@ struct slub_percpu_sheaves { struct slab_sheaf *rcu_free; /* for batching kfree_rcu() */ }; =20 +static struct slab_sheaf bootstrap_sheaf =3D {}; + +static inline bool pcs_has_sheaves_unlocked(struct slub_percpu_sheaves *pc= s) +{ + /* Test CONFIG_SLUB_TINY for code elimination purposes */ + if (IS_ENABLED(CONFIG_SLUB_TINY)) + return false; + + return unlikely(pcs->main !=3D &bootstrap_sheaf); +} + +static inline bool pcs_has_sheaves(struct slub_percpu_sheaves *pcs) +{ + lockdep_assert_held(&pcs->lock); + return pcs_has_sheaves_unlocked(pcs); +} + /* * The slab lists for all objects. */ @@ -3045,8 +3057,7 @@ static void pcs_destroy(struct kmem_cache *s) if (!s->cpu_sheaves) return; =20 - /* pcs->main can only point to the bootstrap sheaf, nothing to free */ - if (!cache_has_sheaves(s)) + if (!cache_supports_sheaves(s)) goto free_pcs; =20 for_each_possible_cpu(cpu) { @@ -3058,6 +3069,9 @@ static void pcs_destroy(struct kmem_cache *s) if (!pcs->main) continue; =20 + if (!pcs_has_sheaves_unlocked(pcs)) + continue; + /* * We have already passed __kmem_cache_shutdown() so everything * was flushed and there should be no objects allocated from @@ -3949,7 +3963,7 @@ static bool has_pcs_used(int cpu, struct kmem_cache *= s) { struct slub_percpu_sheaves *pcs; =20 - if (!cache_has_sheaves(s)) + if (!cache_supports_sheaves(s)) return false; =20 pcs =3D per_cpu_ptr(s->cpu_sheaves, cpu); @@ -3971,7 +3985,7 @@ static void flush_cpu_sheaves(struct work_struct *w) =20 s =3D sfw->s; =20 - if (cache_has_sheaves(s)) + if (cache_supports_sheaves(s)) pcs_flush_all(s); } =20 @@ -4074,7 +4088,7 @@ void flush_all_rcu_sheaves(void) mutex_lock(&slab_mutex); =20 list_for_each_entry(s, &slab_caches, list) { - if (!cache_has_sheaves(s)) + if (!cache_supports_sheaves(s)) continue; flush_rcu_sheaves_on_cache(s); } @@ -4109,7 +4123,7 @@ static int slub_cpu_setup(unsigned int cpu) /* * barn might already exist if a previous callback failed midway */ - if (!cache_has_sheaves(s) || get_barn_node(s, nid)) + if (!cache_supports_sheaves(s) || get_barn_node(s, nid)) continue; =20 barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); @@ -4140,7 +4154,7 @@ static int slub_cpu_dead(unsigned int cpu) =20 mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { - if (cache_has_sheaves(s)) + if (cache_supports_sheaves(s)) __pcs_flush_all_cpu(s, cpu); } mutex_unlock(&slab_mutex); @@ -4612,8 +4626,8 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct= slub_percpu_sheaves *pcs, =20 lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); =20 - /* Bootstrap or debug cache, back off */ - if (unlikely(!cache_has_sheaves(s))) { + /* Sheaves are not supported or disabled for this cache */ + if (unlikely(!pcs_has_sheaves(pcs))) { local_unlock(&s->cpu_sheaves->lock); return NULL; } @@ -4809,7 +4823,7 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s= , gfp_t gfp, size_t size, struct slab_sheaf *full; struct node_barn *barn; =20 - if (unlikely(!cache_has_sheaves(s))) { + if (unlikely(!pcs_has_sheaves(pcs))) { local_unlock(&s->cpu_sheaves->lock); return allocated; } @@ -5727,8 +5741,8 @@ __pcs_replace_full_main(struct kmem_cache *s, struct = slub_percpu_sheaves *pcs, restart: lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); =20 - /* Bootstrap or debug cache, back off */ - if (unlikely(!cache_has_sheaves(s))) { + /* Sheaves are not supported or disabled for this cache */ + if (unlikely(!pcs_has_sheaves(pcs))) { local_unlock(&s->cpu_sheaves->lock); return NULL; } @@ -5959,8 +5973,8 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *ob= j) struct slab_sheaf *empty; struct node_barn *barn; =20 - /* Bootstrap or debug cache, fall back */ - if (unlikely(!cache_has_sheaves(s))) { + /* Sheaves are not supported or disabled for this cache */ + if (unlikely(!pcs_has_sheaves(pcs))) { local_unlock(&s->cpu_sheaves->lock); goto fail; } @@ -6138,6 +6152,11 @@ static void free_to_pcs_bulk(struct kmem_cache *s, s= ize_t size, void **p) =20 pcs =3D this_cpu_ptr(s->cpu_sheaves); =20 + if (unlikely(!pcs_has_sheaves(pcs))) { + local_unlock(&s->cpu_sheaves->lock); + goto fallback; + } + if (likely(pcs->main->size < pcs->main->capacity)) goto do_free; =20 @@ -7131,7 +7150,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_= t size, void **p) * freeing to sheaves is so incompatible with the detached freelist so * once we go that way, we have to do everything differently */ - if (s && cache_has_sheaves(s)) { + if (s && cache_supports_sheaves(s)) { free_to_pcs_bulk(s, size, p); return; } @@ -7600,7 +7619,6 @@ static inline int alloc_kmem_cache_stats(struct kmem_= cache *s) =20 static int init_percpu_sheaves(struct kmem_cache *s) { - static struct slab_sheaf bootstrap_sheaf =3D {}; int cpu; =20 for_each_possible_cpu(cpu) { @@ -7614,7 +7632,7 @@ static int init_percpu_sheaves(struct kmem_cache *s) * Bootstrap sheaf has zero size so fast-path allocation fails. * It has also size =3D=3D sheaf->capacity, so fast-path free * fails. In the slow paths we recognize the situation by - * checking s->sheaf_capacity. This allows fast paths to assume + * pcs_has_sheaves(). This allows fast paths to assume * s->cpu_sheaves and pcs->main always exists and are valid. * It's also safe to share the single static bootstrap_sheaf * with zero-sized objects array as it's never modified. @@ -7631,6 +7649,7 @@ static int init_percpu_sheaves(struct kmem_cache *s) =20 if (!pcs->main) return -ENOMEM; + } =20 return 0; @@ -7740,7 +7759,11 @@ static int init_kmem_cache_nodes(struct kmem_cache *= s) s->per_node[node].node =3D n; } =20 - if (slab_state =3D=3D DOWN || !cache_has_sheaves(s)) + if (slab_state =3D=3D DOWN || !cache_supports_sheaves(s)) + return 1; + + /* Enable sheaves later to avoid the chicken and egg problem */ + if (is_kmalloc_normal(s)) return 1; =20 for_each_node_mask(node, slab_barn_nodes) { @@ -7765,17 +7788,7 @@ static unsigned short calculate_sheaf_capacity(struc= t kmem_cache *s, unsigned short capacity; size_t size; =20 - - if (IS_ENABLED(CONFIG_SLUB_TINY) || s->flags & SLAB_DEBUG_FLAGS) - return 0; - - /* - * Bootstrap caches can't have sheaves for now (SLAB_NO_OBJ_EXT). - * SLAB_NOLEAKTRACE caches (e.g., kmemleak's object_cache) must not - * have sheaves to avoid recursion when sheaf allocation triggers - * kmemleak tracking. - */ - if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE)) + if (!cache_supports_sheaves(s)) return 0; =20 /* @@ -8040,7 +8053,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s) flush_all_cpus_locked(s); =20 /* we might have rcu sheaves in flight */ - if (cache_has_sheaves(s)) + if (cache_supports_sheaves(s)) rcu_barrier(); =20 for_each_node(node) { @@ -8361,7 +8374,7 @@ static int slab_mem_going_online_callback(int nid) if (get_node(s, nid)) continue; =20 - if (cache_has_sheaves(s) && !get_barn_node(s, nid)) { + if (cache_supports_sheaves(s) && !get_barn_node(s, nid)) { =20 barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, nid); =20 --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CEF924BCACD for ; Fri, 15 May 2026 16:24:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862295; cv=none; b=QJkVIAuq8uPEyCAXYDBSV48OP2Ap431IN92zY4eZgDUrf24Ru2RuZsf8+fR9xRoryKGPpWPwz1Q9VvfFC7tQfO5LDp1/gfrsYBPH9gD1wgTWtHurtmGBsYkS7JhNBzj07hHnPDbdX850pieRgyifW9Ct0DR6gaDkYuVxWPyIHvo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862295; c=relaxed/simple; bh=1Hbg57nzucmqI4BlrwpvXWXGzZTX2ByPVlWDsSZQJgo=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=dGNdCoomvGxune+qZ16v5h8gYXNP2fRSXU4SkMRMMEg2o4LCaa7begP+uM3AcImqjgw/Er67NCg+ZeaThSCmdcoq6SjL5CRjr4jzH7aSiCH68/v+Pbpqr3LDnSux4YByRW4Jkegh7NRmstdHAPu4AWgegT+wT2wyhvHbly2ojNQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kJVtU4pi; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kJVtU4pi" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 35013C2BCB0; Fri, 15 May 2026 16:24:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862295; bh=1Hbg57nzucmqI4BlrwpvXWXGzZTX2ByPVlWDsSZQJgo=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=kJVtU4piTNUdTCPHNEdhZhgAc+WwvlrtKm0hJLlZd8o+wftki5fasFOB/gjcHRavu S1aduNjRMDWIlIr5GRD22SuQfPmQrRAhJW9NI88x4VMVC1mvxmFrnusPJlPB95maoQ xOlwoc3R1Ys/4WeV1/A3UBzVJb2KqVkMjC1EQVPiQnhAdeyFfCi3VbZn7DGhM/myjo OdoSzesDFPGjjPZjqFBLsetPJ+h1nBA6BhL6kF87sYWAxRtau/rKgUCMBzS4YWJ9yK LUjFx8nXyLhLMb1dvDUzx3mM1+HD/EQ+Ync884RFSkjHj8DF481M3kP6ZuqUYrQht/ 4Bd+FvsGYBTIQ== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:30 +0900 Subject: [PATCH RFC 6/8] mm/slab: allow changing sheaf_capacity at runtime Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-6-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Make the sheaf_capacity sysfs attribute writable so that the sheaf capacity can be tuned at runtime per-cache. The steps to change sheaf capacity: 1. Disable sheaves: make all online CPUs replace their main sheaves with the bootstrap sheaf under local_lock and wait for completion. For offline CPUs, update their pcs directly under cpu_hotplug_lock. 2. Wait for pre-existing RCU callbacks to complete so that RCU sheaves are returned to the barn. 3. Shrink the cache to flush and free all existing sheaves. 4. Re-enable sheaves with the new capacity by calling bootstrap_cache_sheaves(). If this fails, sheaves remain disabled for the cache. Use slab_mutex to serialize sheaf capacity updates. If sheaves of different capacities can coexist after several updates to the sheaf_capacity, performance becomes hard to predict. It is important to allow only a single capacity at any given point in time. To achieve that, it is required to check whether the sheaf capacity is stale. However, performing this check without an expensive synchronization mechanism (SRCU, atomics, etc.) is inevitably racy. Instead, a copy of sheaf capacity is stored in struct slub_percpu_sheaves and each CPU has its own copy of the capacity. With that, we can guarantee that this value remains stable under local_lock. If local_lock is acquired while the sheaf capacity update is in progress, then either: 1. Sheaves on the CPU have already been disabled (meaning pcs->main points at bootstrap_sheaf, falling back to slowpath), or 2. After releasing local_lock, percpu sheaves will be flushed and disabled for the CPU, and all sheaves in the barn will be flushed and freed. It is guaranteed that no sheaf with stale capacity remains after the process is complete as long as certain rules are followed. The new rules to avoid sheaves of a stale capacity: 1. Hold local_lock when getting/putting sheaves from/to the barn. 2. When allocating new sheaves, check whether pcs->capacity and sheaf->capacity match after re-acquiring local_lock. 3. If local_trylock fails, flush and free the sheaf. This should be rare. Per the above rules, rcu_free_sheaf() now tries to acquire the pcs lock and check whether the capacities match before putting the sheaf back to the barn. Signed-off-by: Harry Yoo (Oracle) --- mm/slub.c | 386 +++++++++++++++++++++++++++++++++++++++++++++++++---------= ---- 1 file changed, 308 insertions(+), 78 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index c746c9b48728..7def24fdfae6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -425,6 +425,7 @@ struct slub_percpu_sheaves { struct slab_sheaf *main; /* never NULL when unlocked */ struct slab_sheaf *spare; /* empty or full, may be NULL */ struct slab_sheaf *rcu_free; /* for batching kfree_rcu() */ + unsigned short capacity; }; =20 static struct slab_sheaf bootstrap_sheaf =3D {}; @@ -492,6 +493,30 @@ static inline struct node_barn *get_barn(struct kmem_c= ache *s) */ static nodemask_t slab_nodes; =20 + +static inline +unsigned short get_pcs_capacity(struct kmem_cache *s, + struct slub_percpu_sheaves *pcs) +{ + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + return pcs->capacity; +} + +static inline void set_pcs_capacity(struct kmem_cache *s, + struct slub_percpu_sheaves *pcs, + unsigned short capacity) +{ + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + pcs->capacity =3D capacity; +} + +static inline bool pcs_capacity_match(struct kmem_cache *s, + struct slub_percpu_sheaves *pcs, + struct slab_sheaf *sheaf) +{ + return get_pcs_capacity(s, pcs) =3D=3D sheaf->capacity; +} + /* * Similar to slab_nodes but for where we have node_barn allocated. * Corresponds to N_ONLINE nodes. @@ -507,6 +532,11 @@ struct slub_flush_work { struct work_struct work; struct kmem_cache *s; bool skip; + /* for flushing sheaves */ + bool disable_sheaves; + /* for enabling sheaves */ + void **sheaves; + unsigned short capacity; }; =20 static DEFINE_MUTEX(flush_lock); @@ -2774,6 +2804,10 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct= kmem_cache *s, gfp_t gfp, if (gfp & __GFP_NO_OBJ_EXT) return NULL; =20 + /* Sheaves have been disabled */ + if (!capacity) + return NULL; + gfp &=3D ~OBJCGS_CLEAR_MASK; =20 /* @@ -2800,7 +2834,7 @@ static struct slab_sheaf *__alloc_empty_sheaf(struct = kmem_cache *s, gfp_t gfp, static inline struct slab_sheaf *alloc_empty_sheaf(struct kmem_cache *s, gfp_t gfp) { - return __alloc_empty_sheaf(s, gfp, s->sheaf_capacity); + return __alloc_empty_sheaf(s, gfp, data_race(s->sheaf_capacity)); } =20 static void free_empty_sheaf(struct kmem_cache *s, struct slab_sheaf *shea= f) @@ -2999,10 +3033,10 @@ static void rcu_free_sheaf_nobarn(struct rcu_head *= head) * flushing operations are rare so let's keep it simple and flush to slabs * directly, skipping the barn */ -static void pcs_flush_all(struct kmem_cache *s) +static void pcs_flush_all(struct kmem_cache *s, bool disable_sheaves) { struct slub_percpu_sheaves *pcs; - struct slab_sheaf *spare, *rcu_free; + struct slab_sheaf *spare, *rcu_free, *main; =20 local_lock(&s->cpu_sheaves->lock); pcs =3D this_cpu_ptr(s->cpu_sheaves); @@ -3013,8 +3047,23 @@ static void pcs_flush_all(struct kmem_cache *s) rcu_free =3D pcs->rcu_free; pcs->rcu_free =3D NULL; =20 + if (disable_sheaves && pcs_has_sheaves(pcs)) { + main =3D pcs->main; + pcs->main =3D &bootstrap_sheaf; + set_pcs_capacity(s, pcs, 0); + } else { + main =3D NULL; + } + local_unlock(&s->cpu_sheaves->lock); =20 + if (main) { + sheaf_flush_unused(s, main); + free_empty_sheaf(s, main); + } else { + sheaf_flush_main(s); + } + if (spare) { sheaf_flush_unused(s, spare); free_empty_sheaf(s, spare); @@ -3022,8 +3071,6 @@ static void pcs_flush_all(struct kmem_cache *s) =20 if (rcu_free) call_rcu(&rcu_free->rcu_head, rcu_free_sheaf_nobarn); - - sheaf_flush_main(s); } =20 static void __pcs_flush_all_cpu(struct kmem_cache *s, unsigned int cpu) @@ -3986,10 +4033,34 @@ static void flush_cpu_sheaves(struct work_struct *w) s =3D sfw->s; =20 if (cache_supports_sheaves(s)) - pcs_flush_all(s); + pcs_flush_all(s, sfw->disable_sheaves); +} + +static void enable_cpu_sheaves(struct work_struct *w) +{ + struct kmem_cache *s; + struct slub_flush_work *sfw; + struct slab_sheaf *sheaf; + struct slub_percpu_sheaves *pcs; + + sfw =3D container_of(w, struct slub_flush_work, work); + + s =3D sfw->s; + + local_lock(&s->cpu_sheaves->lock); + pcs =3D this_cpu_ptr(s->cpu_sheaves); + sheaf =3D sfw->sheaves[smp_processor_id()]; + + VM_WARN_ON_ONCE(pcs->rcu_free); + VM_WARN_ON_ONCE(pcs->spare); + VM_WARN_ON_ONCE(pcs->main !=3D &bootstrap_sheaf); + + pcs->main =3D sheaf; + set_pcs_capacity(s, pcs, sfw->capacity); + local_unlock(&s->cpu_sheaves->lock); } =20 -static void flush_all_cpus_locked(struct kmem_cache *s) +static void flush_all_cpus_locked(struct kmem_cache *s, bool disable_sheav= es) { struct slub_flush_work *sfw; unsigned int cpu; @@ -3997,16 +4068,32 @@ static void flush_all_cpus_locked(struct kmem_cache= *s) lockdep_assert_cpus_held(); mutex_lock(&flush_lock); =20 - for_each_online_cpu(cpu) { + for_each_possible_cpu(cpu) { sfw =3D &per_cpu(slub_flush, cpu); - if (!has_pcs_used(cpu, s)) { + + if (cpu_online(cpu)) { + /* Do not skip empty sheaves when disabling them */ + if (!disable_sheaves && !has_pcs_used(cpu, s)) { + sfw->skip =3D true; + continue; + } + INIT_WORK(&sfw->work, flush_cpu_sheaves); + sfw->skip =3D false; + sfw->s =3D s; + sfw->disable_sheaves =3D disable_sheaves; + queue_work_on(cpu, flushwq, &sfw->work); + } else if (disable_sheaves) { + struct slub_percpu_sheaves *pcs; + sfw->skip =3D true; - continue; + pcs =3D per_cpu_ptr(s->cpu_sheaves, cpu); + if (pcs->main !=3D &bootstrap_sheaf) { + sheaf_flush_unused(s, pcs->main); + free_empty_sheaf(s, pcs->main); + pcs->main =3D &bootstrap_sheaf; + pcs->capacity =3D 0; + } } - INIT_WORK(&sfw->work, flush_cpu_sheaves); - sfw->skip =3D false; - sfw->s =3D s; - queue_work_on(cpu, flushwq, &sfw->work); } =20 for_each_online_cpu(cpu) { @@ -4019,10 +4106,10 @@ static void flush_all_cpus_locked(struct kmem_cache= *s) mutex_unlock(&flush_lock); } =20 -static void flush_all(struct kmem_cache *s) +static void flush_all(struct kmem_cache *s, bool disable_sheaves) { cpus_read_lock(); - flush_all_cpus_locked(s); + flush_all_cpus_locked(s, disable_sheaves); cpus_read_unlock(); } =20 @@ -4690,10 +4777,21 @@ __pcs_replace_empty_main(struct kmem_cache *s, stru= ct slub_percpu_sheaves *pcs, full =3D empty; empty =3D NULL; =20 - if (!local_trylock(&s->cpu_sheaves->lock)) - goto barn_put; + if (!local_trylock(&s->cpu_sheaves->lock)) { + sheaf_flush_unused(s, full); + free_empty_sheaf(s, full); + return NULL; + } + pcs =3D this_cpu_ptr(s->cpu_sheaves); =20 + if (unlikely(!pcs_capacity_match(s, pcs, full))) { + local_unlock(&s->cpu_sheaves->lock); + sheaf_flush_unused(s, full); + free_empty_sheaf(s, full); + return NULL; + } + /* * If we put any empty or full sheaf to the barn below, it's due to * racing or being migrated to a different cpu. Breaching the barn's @@ -4721,7 +4819,6 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct= slub_percpu_sheaves *pcs, return pcs; } =20 -barn_put: barn_put_full_sheaf(barn, full); stat(s, BARN_PUT); =20 @@ -5027,29 +5124,8 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t= gfp, unsigned short size) if (unlikely(!size)) return NULL; =20 - if (unlikely(size > s->sheaf_capacity)) { - - sheaf =3D kzalloc_flex(*sheaf, objects, size, gfp); - if (!sheaf) - return NULL; - - stat(s, SHEAF_PREFILL_OVERSIZE); - sheaf->capacity =3D size; - - /* - * we do not need to care about pfmemalloc here because oversize - * sheaves area always flushed and freed when returned - */ - if (!__kmem_cache_alloc_bulk(s, gfp, size, - &sheaf->objects[0])) { - kfree(sheaf); - return NULL; - } - - sheaf->size =3D size; - - return sheaf; - } + if (unlikely(size > data_race(s->sheaf_capacity))) + goto oversized; =20 local_lock(&s->cpu_sheaves->lock); pcs =3D this_cpu_ptr(s->cpu_sheaves); @@ -5072,11 +5148,16 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_= t gfp, unsigned short size) =20 local_unlock(&s->cpu_sheaves->lock); =20 - if (!sheaf) sheaf =3D alloc_empty_sheaf(s, gfp); =20 if (sheaf) { + if (size > sheaf->capacity) { + sheaf_flush_unused(s, sheaf); + free_empty_sheaf(s, sheaf); + sheaf =3D NULL; + goto oversized; + } sheaf->pfmemalloc =3D false; =20 if (sheaf->size < size && @@ -5087,6 +5168,28 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t= gfp, unsigned short size) } } =20 + return sheaf; + +oversized: + sheaf =3D kzalloc_flex(*sheaf, objects, size, gfp); + if (!sheaf) + return NULL; + + stat(s, SHEAF_PREFILL_OVERSIZE); + sheaf->capacity =3D size; + + /* + * we do not need to care about pfmemalloc here because oversize + * sheaves area always flushed and freed when returned + */ + if (!__kmem_cache_alloc_bulk(s, gfp, size, + &sheaf->objects[0])) { + kfree(sheaf); + return NULL; + } + + sheaf->size =3D size; + return sheaf; } =20 @@ -5106,27 +5209,25 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, = gfp_t gfp, struct slub_percpu_sheaves *pcs; struct node_barn *barn; =20 - if (unlikely((sheaf->capacity !=3D s->sheaf_capacity) - || sheaf->pfmemalloc)) { - sheaf_flush_unused(s, sheaf); - kfree(sheaf); - return; - } + if (unlikely((sheaf->capacity !=3D data_race(s->sheaf_capacity)) + || sheaf->pfmemalloc)) + goto free_sheaf; =20 local_lock(&s->cpu_sheaves->lock); pcs =3D this_cpu_ptr(s->cpu_sheaves); barn =3D get_barn(s); =20 + if (!pcs_capacity_match(s, pcs, sheaf)) { + local_unlock(&s->cpu_sheaves->lock); + goto free_sheaf; + } + if (!pcs->spare) { pcs->spare =3D sheaf; - sheaf =3D NULL; stat(s, SHEAF_RETURN_FAST); - } - - local_unlock(&s->cpu_sheaves->lock); - - if (!sheaf) + local_unlock(&s->cpu_sheaves->lock); return; + } =20 stat(s, SHEAF_RETURN_SLOW); =20 @@ -5134,15 +5235,32 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, = gfp_t gfp, * If the barn has too many full sheaves or we fail to refill the sheaf, * simply flush and free it. */ - if (!barn || data_race(barn->nr_full) >=3D MAX_FULL_SHEAVES || - refill_sheaf(s, sheaf, gfp)) { - sheaf_flush_unused(s, sheaf); - free_empty_sheaf(s, sheaf); - return; + if (!barn || data_race(barn->nr_full) >=3D MAX_FULL_SHEAVES) { + local_unlock(&s->cpu_sheaves->lock); + goto free_sheaf; + } + + local_unlock(&s->cpu_sheaves->lock); + + if (refill_sheaf(s, sheaf, gfp)) + goto free_sheaf; + + local_lock(&s->cpu_sheaves->lock); + pcs =3D this_cpu_ptr(s->cpu_sheaves); + + if (!pcs_capacity_match(s, pcs, sheaf)) { + local_unlock(&s->cpu_sheaves->lock); + goto free_sheaf; } =20 barn_put_full_sheaf(barn, sheaf); + local_unlock(&s->cpu_sheaves->lock); stat(s, BARN_PUT); + return; + +free_sheaf: + sheaf_flush_unused(s, sheaf); + free_empty_sheaf(s, sheaf); } =20 /* @@ -5839,11 +5957,18 @@ __pcs_replace_full_main(struct kmem_cache *s, struc= t slub_percpu_sheaves *pcs, =20 got_empty: if (!local_trylock(&s->cpu_sheaves->lock)) { - barn_put_empty_sheaf(barn, empty); + free_empty_sheaf(s, empty); return NULL; } =20 pcs =3D this_cpu_ptr(s->cpu_sheaves); + + if (unlikely(!pcs_capacity_match(s, pcs, empty))) { + local_unlock(&s->cpu_sheaves->lock); + free_empty_sheaf(s, empty); + return NULL; + } + __pcs_install_empty_sheaf(s, pcs, empty, barn); =20 return pcs; @@ -5884,6 +6009,7 @@ static void rcu_free_sheaf(struct rcu_head *head) struct slab_sheaf *sheaf; struct node_barn *barn =3D NULL; struct kmem_cache *s; + struct slub_percpu_sheaves *pcs; =20 sheaf =3D container_of(head, struct slab_sheaf, rcu_head); if (WARN_ON_ONCE(!sheaf->size)) { @@ -5905,11 +6031,20 @@ static void rcu_free_sheaf(struct rcu_head *head) * slab so simply flush everything. */ if (__rcu_free_sheaf_prepare(s, sheaf)) - goto flush; + goto flush_unlocked; =20 barn =3D get_barn_node(s, sheaf->node); if (!barn) - goto flush; + goto flush_unlocked; + + if (!local_trylock(&s->cpu_sheaves->lock)) + goto flush_unlocked; + + pcs =3D this_cpu_ptr(s->cpu_sheaves); + if (!pcs_capacity_match(s, pcs, sheaf)) { + local_unlock(&s->cpu_sheaves->lock); + goto flush_unlocked; + } =20 /* due to slab_free_hook() */ if (unlikely(sheaf->size =3D=3D 0)) @@ -5924,19 +6059,27 @@ static void rcu_free_sheaf(struct rcu_head *head) if (data_race(barn->nr_full) < MAX_FULL_SHEAVES) { stat(s, BARN_PUT); barn_put_full_sheaf(barn, sheaf); + local_unlock(&s->cpu_sheaves->lock); return; } =20 -flush: stat(s, BARN_PUT_FAIL); sheaf_flush_unused(s, sheaf); =20 empty: if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { barn_put_empty_sheaf(barn, sheaf); + local_unlock(&s->cpu_sheaves->lock); return; } =20 + local_unlock(&s->cpu_sheaves->lock); + free_empty_sheaf(s, sheaf); + return; + +flush_unlocked: + stat(s, BARN_PUT_FAIL); + sheaf_flush_unused(s, sheaf); free_empty_sheaf(s, sheaf); } =20 @@ -6006,12 +6149,18 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *= obj) goto fail; =20 if (!local_trylock(&s->cpu_sheaves->lock)) { - barn_put_empty_sheaf(barn, empty); + free_empty_sheaf(s, empty); goto fail; } =20 pcs =3D this_cpu_ptr(s->cpu_sheaves); =20 + if (unlikely(!pcs_capacity_match(s, pcs, empty))) { + local_unlock(&s->cpu_sheaves->lock); + free_empty_sheaf(s, empty); + goto fail; + } + if (unlikely(pcs->rcu_free)) barn_put_empty_sheaf(barn, empty); else @@ -7650,6 +7799,7 @@ static int init_percpu_sheaves(struct kmem_cache *s) if (!pcs->main) return -ENOMEM; =20 + pcs->capacity =3D s->sheaf_capacity; } =20 return 0; @@ -8050,7 +8200,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s) int node; struct kmem_cache_node *n; =20 - flush_all_cpus_locked(s); + flush_all_cpus_locked(s, /* disable_sheaves =3D */false); =20 /* we might have rcu sheaves in flight */ if (cache_supports_sheaves(s)) @@ -8334,7 +8484,7 @@ static int __kmem_cache_do_shrink(struct kmem_cache *= s) =20 int __kmem_cache_shrink(struct kmem_cache *s) { - flush_all(s); + flush_all(s, /* disable_sheaves =3D */false); return __kmem_cache_do_shrink(s); } =20 @@ -8344,7 +8494,7 @@ static int slab_mem_going_offline_callback(void) =20 mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) { - flush_all_cpus_locked(s); + flush_all_cpus_locked(s, /* disable_sheaves =3D */false); __kmem_cache_do_shrink(s); } mutex_unlock(&slab_mutex); @@ -8479,7 +8629,11 @@ static int bootstrap_cache_sheaves(struct kmem_cache= *s, unsigned short capacity) { struct kmem_cache_args empty_args =3D {}; + struct slub_flush_work *sfw; int node, cpu, err =3D 0; + void **sheaves =3D NULL; + + lockdep_assert_cpus_held(); =20 if (!capacity) capacity =3D calculate_sheaf_capacity(s, &empty_args); @@ -8491,6 +8645,9 @@ static int bootstrap_cache_sheaves(struct kmem_cache = *s, for_each_node_mask(node, slab_barn_nodes) { struct node_barn *barn; =20 + if (s->per_node[node].barn) + continue; + barn =3D kmalloc_node(sizeof(*barn), GFP_KERNEL, node); =20 if (!barn) { @@ -8502,23 +8659,62 @@ static int bootstrap_cache_sheaves(struct kmem_cach= e *s, s->per_node[node].barn =3D barn; } =20 + sheaves =3D kmalloc_array(nr_cpu_ids, sizeof(*sheaves), + GFP_KERNEL | __GFP_ZERO); + if (!sheaves) { + err =3D -ENOMEM; + goto out; + } + + /* Do not queue the work if any of the allocations fails */ + for_each_possible_cpu(cpu) { + sheaves[cpu] =3D __alloc_empty_sheaf(s, GFP_KERNEL, capacity); + + if (!sheaves[cpu]) { + err =3D -ENOMEM; + goto out_free_sheaves; + } + } + + mutex_lock(&flush_lock); for_each_possible_cpu(cpu) { struct slub_percpu_sheaves *pcs; =20 pcs =3D per_cpu_ptr(s->cpu_sheaves, cpu); + sfw =3D &per_cpu(slub_flush, cpu); =20 - pcs->main =3D __alloc_empty_sheaf(s, GFP_KERNEL, capacity); - - if (!pcs->main) { - err =3D -ENOMEM; - break; + if (!cpu_online(cpu) || slab_state =3D=3D UP) { + pcs->main =3D sheaves[cpu]; + pcs->capacity =3D capacity; + sfw->skip =3D true; + } else { + INIT_WORK(&sfw->work, enable_cpu_sheaves); + sfw->s =3D s; + sfw->sheaves =3D sheaves; + sfw->capacity =3D capacity; + sfw->skip =3D false; + queue_work_on(cpu, flushwq, &sfw->work); } } =20 -out: - if (!err) - s->sheaf_capacity =3D capacity; + for_each_online_cpu(cpu) { + sfw =3D &per_cpu(slub_flush, cpu); + if (sfw->skip) + continue; + flush_work(&sfw->work); + } + mutex_unlock(&flush_lock); =20 + kfree(sheaves); + s->sheaf_capacity =3D capacity; + return 0; + +out_free_sheaves: + for_each_possible_cpu(cpu) + if (sheaves[cpu]) + free_empty_sheaf(s, sheaves[cpu]); + kfree(sheaves); +out: return err; } =20 @@ -8533,6 +8729,7 @@ static void __init bootstrap_kmalloc_sheaves(void) struct kmem_cache *s; int idx; =20 + cpus_read_lock(); for_each_normal_kmalloc_cache(s, type, idx) { /* * It's still early in boot so treat this as a failure to @@ -8542,6 +8739,7 @@ static void __init bootstrap_kmalloc_sheaves(void) panic("Out of memory when creating kmem_cache %s\n", s->name); } + cpus_read_unlock(); } =20 void __init kmem_cache_init(void) @@ -8799,7 +8997,7 @@ long validate_slab_cache(struct kmem_cache *s) if (!obj_map) return -ENOMEM; =20 - flush_all(s); + flush_all(s, /* disable_sheaves =3D */false); for_each_kmem_cache_node(s, node, n) count +=3D validate_slab_node(s, n, obj_map); =20 @@ -9111,7 +9309,39 @@ static ssize_t sheaf_capacity_show(struct kmem_cache= *s, char *buf) { return sysfs_emit(buf, "%hu\n", s->sheaf_capacity); } -SLAB_ATTR_RO(sheaf_capacity); +static ssize_t sheaf_capacity_store(struct kmem_cache *s, + const char *buf, size_t length) +{ + unsigned short capacity; + int err; + + err =3D kstrtou16(buf, 10, &capacity); + if (err) + return err; + + if (!cache_supports_sheaves(s)) + return -EOPNOTSUPP; + + cpus_read_lock(); + mutex_lock(&slab_mutex); + flush_all_cpus_locked(s, /* disable_sheaves =3D */true); + rcu_barrier(); + __kmem_cache_do_shrink(s); + s->sheaf_capacity =3D 0; + if (capacity) { + err =3D bootstrap_cache_sheaves(s, capacity); + if (err) { + mutex_unlock(&slab_mutex); + cpus_read_unlock(); + return err; + } + } + mutex_unlock(&slab_mutex); + cpus_read_unlock(); + + return length; +} +SLAB_ATTR(sheaf_capacity); =20 static ssize_t min_partial_show(struct kmem_cache *s, char *buf) { --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2C2DE3EFFD8 for ; Fri, 15 May 2026 16:24:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862298; cv=none; b=rei83U7iQRgIbachyLbCrlcEvI3tu9hfFvq3ty1VrbnGMRKErvZHdo+ljRA+euM3vE7YVXuyuLyKEKUdztBjS9MkHhojdEA/dgqbp4kMZB7a2bWEmcElaC1UxxDNbswKhgUbsUsIN7PgspzyvaXsspX0F1tdLk58SKZKdzxVSjE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862298; c=relaxed/simple; bh=MxvKk95lrw+bUt79KYQQW9SxXe29lTK/BGOMUL7tjHI=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=AStJfwQZLWPqAISpl+Ndyn/jvcWQqB0ZHQ1Whz7jWfj8gpSZPW5QmD9go/uUNZg/j8IvazU1I7hunY7SbjoMFeRoteWOylsd6odqg7ijcaeUr5VzL44ZFubImIaVyY4xRRb9lHwMC+MUfuU8cHRUFd93bIOCAwNruCSUIUdq5nI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=bsV8Qbq+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="bsV8Qbq+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D953BC2BCC7; Fri, 15 May 2026 16:24:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862298; bh=MxvKk95lrw+bUt79KYQQW9SxXe29lTK/BGOMUL7tjHI=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=bsV8Qbq+Wzb4LJDaGTkoFbx9/YBMLEM/kXBs/QqYND7k1yxN9O7Uf+kOqDvL2xYPW VF1d9CLcjPR0BRFU1mYmVD1ap6Ncpj5e2nZDauBo9rH9wTqNWigE+i7t+Ru/Zabb/+ FoxDxOaNDgsIvmjamESQGfjXgYOlSnb8kFjcNrNoO0HIU78J13P8lP3uJC4oTOAw27 RJc/Mk+XyVGqpzqSRlT3V3KL3ZFo0LeaqzNsvwLGH8kjaLmyd6q9wKFZdb0yEaZYZ5 nXAZxoar3CJbEtH0bkb6AvKWjc6Tz3fYY7CDKsq1djM8zQvuCq7sRyvp1TTE68A3ss WSIpAVpCaDCgA== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:31 +0900 Subject: [PATCH RFC 7/8] mm/slab: add pcs->lock lockdep assert when accessing the barn Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-7-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev If the cache's capacity changes while a CPU is getting/putting a sheaf from/to the barn, the writer performing the capacity change is responsible for flushing and freeing those stale sheaves. However, that can be done only if CPUs hold pcs->lock when accessing the barn. Add lockdep_assert_held() on the pcs lock whenever moving a sheaf to/from the barn. Since struct slab_sheaf no longer has the cache pointer, add a new parameter for the cache pointer. When lockdep is disabled, the assert is a no-op and the compiler can optimize away the unused parameter (since these helpers are static). Signed-off-by: Harry Yoo (Oracle) --- mm/slub.c | 70 +++++++++++++++++++++++++++++++++++++++--------------------= ---- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 7def24fdfae6..856639d3d3f0 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3142,12 +3142,15 @@ static void pcs_destroy(struct kmem_cache *s) s->cpu_sheaves =3D NULL; } =20 -static struct slab_sheaf *barn_get_empty_sheaf(struct node_barn *barn, +static struct slab_sheaf *barn_get_empty_sheaf(struct kmem_cache *s, + struct node_barn *barn, bool allow_spin) { struct slab_sheaf *empty =3D NULL; unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + if (!data_race(barn->nr_empty)) return NULL; =20 @@ -3174,10 +3177,13 @@ static struct slab_sheaf *barn_get_empty_sheaf(stru= ct node_barn *barn, * empty or full sheaf limits for simplicity. */ =20 -static void barn_put_empty_sheaf(struct node_barn *barn, struct slab_sheaf= *sheaf) +static void barn_put_empty_sheaf(struct kmem_cache *s, struct node_barn *b= arn, + struct slab_sheaf *sheaf) { unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + spin_lock_irqsave(&barn->lock, flags); =20 list_add(&sheaf->barn_list, &barn->sheaves_empty); @@ -3186,10 +3192,13 @@ static void barn_put_empty_sheaf(struct node_barn *= barn, struct slab_sheaf *shea spin_unlock_irqrestore(&barn->lock, flags); } =20 -static void barn_put_full_sheaf(struct node_barn *barn, struct slab_sheaf = *sheaf) +static void barn_put_full_sheaf(struct kmem_cache *s, struct node_barn *ba= rn, + struct slab_sheaf *sheaf) { unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + spin_lock_irqsave(&barn->lock, flags); =20 list_add(&sheaf->barn_list, &barn->sheaves_full); @@ -3198,11 +3207,14 @@ static void barn_put_full_sheaf(struct node_barn *b= arn, struct slab_sheaf *sheaf spin_unlock_irqrestore(&barn->lock, flags); } =20 -static struct slab_sheaf *barn_get_full_or_empty_sheaf(struct node_barn *b= arn) +static struct slab_sheaf *barn_get_full_or_empty_sheaf(struct kmem_cache *= s, + struct node_barn *barn) { struct slab_sheaf *sheaf =3D NULL; unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + if (!data_race(barn->nr_full) && !data_race(barn->nr_empty)) return NULL; =20 @@ -3231,12 +3243,14 @@ static struct slab_sheaf *barn_get_full_or_empty_sh= eaf(struct node_barn *barn) * change. */ static struct slab_sheaf * -barn_replace_empty_sheaf(struct node_barn *barn, struct slab_sheaf *empty, - bool allow_spin) +barn_replace_empty_sheaf(struct kmem_cache *s, struct node_barn *barn, + struct slab_sheaf *empty, bool allow_spin) { struct slab_sheaf *full =3D NULL; unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + if (!data_race(barn->nr_full)) return NULL; =20 @@ -3264,12 +3278,14 @@ barn_replace_empty_sheaf(struct node_barn *barn, st= ruct slab_sheaf *empty, * barn. But if there are too many full sheaves, reject this with -E2BIG. */ static struct slab_sheaf * -barn_replace_full_sheaf(struct node_barn *barn, struct slab_sheaf *full, - bool allow_spin) +barn_replace_full_sheaf(struct kmem_cache *s, struct node_barn *barn, + struct slab_sheaf *full, bool allow_spin) { struct slab_sheaf *empty; unsigned long flags; =20 + lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); + /* we don't repeat this check under barn->lock as it's not critical */ if (data_race(barn->nr_full) >=3D MAX_FULL_SHEAVES) return ERR_PTR(-E2BIG); @@ -4732,7 +4748,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct= slub_percpu_sheaves *pcs, =20 allow_spin =3D gfpflags_allow_spinning(gfp); =20 - full =3D barn_replace_empty_sheaf(barn, pcs->main, allow_spin); + full =3D barn_replace_empty_sheaf(s, barn, pcs->main, allow_spin); =20 if (full) { stat(s, BARN_GET); @@ -4747,7 +4763,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct= slub_percpu_sheaves *pcs, empty =3D pcs->spare; pcs->spare =3D NULL; } else { - empty =3D barn_get_empty_sheaf(barn, true); + empty =3D barn_get_empty_sheaf(s, barn, true); } } =20 @@ -4803,7 +4819,7 @@ __pcs_replace_empty_main(struct kmem_cache *s, struct= slub_percpu_sheaves *pcs, if (!pcs->spare) pcs->spare =3D pcs->main; else - barn_put_empty_sheaf(barn, pcs->main); + barn_put_empty_sheaf(s, barn, pcs->main); pcs->main =3D full; return pcs; } @@ -4814,12 +4830,12 @@ __pcs_replace_empty_main(struct kmem_cache *s, stru= ct slub_percpu_sheaves *pcs, } =20 if (pcs->spare->size =3D=3D 0) { - barn_put_empty_sheaf(barn, pcs->spare); + barn_put_empty_sheaf(s, barn, pcs->spare); pcs->spare =3D full; return pcs; } =20 - barn_put_full_sheaf(barn, full); + barn_put_full_sheaf(s, barn, full); stat(s, BARN_PUT); =20 return pcs; @@ -4936,7 +4952,7 @@ unsigned int alloc_from_pcs_bulk(struct kmem_cache *s= , gfp_t gfp, size_t size, return allocated; } =20 - full =3D barn_replace_empty_sheaf(barn, pcs->main, + full =3D barn_replace_empty_sheaf(s, barn, pcs->main, gfpflags_allow_spinning(gfp)); =20 if (full) { @@ -5139,7 +5155,7 @@ kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t = gfp, unsigned short size) =20 stat(s, SHEAF_PREFILL_SLOW); if (barn) - sheaf =3D barn_get_full_or_empty_sheaf(barn); + sheaf =3D barn_get_full_or_empty_sheaf(s, barn); if (sheaf && sheaf->size) stat(s, BARN_GET); else @@ -5253,7 +5269,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gf= p_t gfp, goto free_sheaf; } =20 - barn_put_full_sheaf(barn, sheaf); + barn_put_full_sheaf(s, barn, sheaf); local_unlock(&s->cpu_sheaves->lock); stat(s, BARN_PUT); return; @@ -5820,14 +5836,14 @@ static void __pcs_install_empty_sheaf(struct kmem_c= ache *s, * freed to it. Get rid of our empty sheaf. */ if (pcs->main->size < pcs->main->capacity) { - barn_put_empty_sheaf(barn, empty); + barn_put_empty_sheaf(s, barn, empty); return; } =20 /* Also unlikely for the same reason */ if (pcs->spare->size < pcs->spare->capacity) { swap(pcs->main, pcs->spare); - barn_put_empty_sheaf(barn, empty); + barn_put_empty_sheaf(s, barn, empty); return; } =20 @@ -5835,7 +5851,7 @@ static void __pcs_install_empty_sheaf(struct kmem_cac= he *s, * We probably failed barn_replace_full_sheaf() due to no empty sheaf * available there, but we allocated one, so finish the job. */ - barn_put_full_sheaf(barn, pcs->main); + barn_put_full_sheaf(s, barn, pcs->main); stat(s, BARN_PUT); pcs->main =3D empty; } @@ -5874,7 +5890,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct = slub_percpu_sheaves *pcs, put_fail =3D false; =20 if (!pcs->spare) { - empty =3D barn_get_empty_sheaf(barn, allow_spin); + empty =3D barn_get_empty_sheaf(s, barn, allow_spin); if (empty) { pcs->spare =3D pcs->main; pcs->main =3D empty; @@ -5888,7 +5904,7 @@ __pcs_replace_full_main(struct kmem_cache *s, struct = slub_percpu_sheaves *pcs, return pcs; } =20 - empty =3D barn_replace_full_sheaf(barn, pcs->main, allow_spin); + empty =3D barn_replace_full_sheaf(s, barn, pcs->main, allow_spin); =20 if (!IS_ERR(empty)) { stat(s, BARN_PUT); @@ -6058,7 +6074,7 @@ static void rcu_free_sheaf(struct rcu_head *head) =20 if (data_race(barn->nr_full) < MAX_FULL_SHEAVES) { stat(s, BARN_PUT); - barn_put_full_sheaf(barn, sheaf); + barn_put_full_sheaf(s, barn, sheaf); local_unlock(&s->cpu_sheaves->lock); return; } @@ -6068,7 +6084,7 @@ static void rcu_free_sheaf(struct rcu_head *head) =20 empty: if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { - barn_put_empty_sheaf(barn, sheaf); + barn_put_empty_sheaf(s, barn, sheaf); local_unlock(&s->cpu_sheaves->lock); return; } @@ -6134,7 +6150,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *ob= j) goto fail; } =20 - empty =3D barn_get_empty_sheaf(barn, true); + empty =3D barn_get_empty_sheaf(s, barn, true); =20 if (empty) { pcs->rcu_free =3D empty; @@ -6162,7 +6178,7 @@ bool __kfree_rcu_sheaf(struct kmem_cache *s, void *ob= j) } =20 if (unlikely(pcs->rcu_free)) - barn_put_empty_sheaf(barn, empty); + barn_put_empty_sheaf(s, barn, empty); else pcs->rcu_free =3D empty; } @@ -6314,7 +6330,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) goto no_empty; =20 if (!pcs->spare) { - empty =3D barn_get_empty_sheaf(barn, true); + empty =3D barn_get_empty_sheaf(s, barn, true); if (!empty) goto no_empty; =20 @@ -6328,7 +6344,7 @@ static void free_to_pcs_bulk(struct kmem_cache *s, si= ze_t size, void **p) goto do_free; } =20 - empty =3D barn_replace_full_sheaf(barn, pcs->main, true); + empty =3D barn_replace_full_sheaf(s, barn, pcs->main, true); if (IS_ERR(empty)) { stat(s, BARN_PUT_FAIL); goto no_empty; --=20 2.43.0 From nobody Mon May 25 08:11:40 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C7F823E0092 for ; Fri, 15 May 2026 16:25:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862300; cv=none; b=YWQofckAuxKmfxEIoycX24lz9nlIJG9/huNpwGnhI2qeZkeuUjTIdSsL9kCTkodtT+FL7iH9srLornB93t9k7uvWSe0qKkds6JbBHIOCdE3tVqYd4bJr44Cdeatlpq+MWQjPp4CDBnPoSIGfQm0eqRqklsRJCHG28GjSWEtpVeo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1778862300; c=relaxed/simple; bh=dvyaAGwaAEmKdhHRQ6Trfstm6Ieb9ZAn2zLUlooC8QE=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=rlyVLcEfdf9qdhRjBD8cF+Ac5+qLa254+5O9bRH/bNnrhBqTP2sVoIOvD2LWpOGR2BvuA+OLTsM4KsERa0nU7wG2PBK3zOLCDRfGu92+ISd3OwO1k+Ze/SQNbVkG2hT10azzDfi8bLjE1DR8TharcMa46yO9UJNT4yLf1NKNi4I= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ovGCot7F; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ovGCot7F" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7A7B1C2BCB3; Fri, 15 May 2026 16:24:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1778862300; bh=dvyaAGwaAEmKdhHRQ6Trfstm6Ieb9ZAn2zLUlooC8QE=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=ovGCot7Fct19IIkkgts4PSq3Hkjx4cZBUTmShXiE2+2xLMJTahWSp3ehfRUgXBkIo A51/OcvpuibLhpf2opV/nbf9sBldvUTY2TOoZ/0Wc/PXW+FPa/r3TrKBNmYshH2E56 YlCMq1kAjRUwTVlCzQtRwMwwv5GmErW3PXrFpUPsBQLCBVcMWPX5n9tpKqA+Kkngix 6driDpZcyRd/6uLimC5IunBP7dMk2WLNcACyRlUuYhhjTqERzFC1bCQhlqJkXF/3oV SlevGnQWcx21aAdwCiNKasLfDBwyfJTWwsno3MrOQvWQ7xnr/JiZdvj55mlGb+LHRk cs2np44xkpaSg== From: "Harry Yoo (Oracle)" Date: Sat, 16 May 2026 01:24:32 +0900 Subject: [PATCH RFC 8/8] mm/slab: allow changing max_{full,empty}_sheaves at runtime Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260516-sheaves-tuning-v1-8-221aa3e1d829@kernel.org> References: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> In-Reply-To: <20260516-sheaves-tuning-v1-0-221aa3e1d829@kernel.org> To: Vlastimil Babka , Andrew Morton , Hao Li , Christoph Lameter , David Rientjes , Roman Gushchin Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org, Suren Baghdasaryan , "Liam R. Howlett" X-Mailer: b4 0.16-dev Replace MAX_FULL_SHEAVES and MAX_EMPTY_SHEAVES with per-cache tunables, and expose them via sysfs attributes as max_{full,empty}_sheaves. Keep the default value 10 to preserve the existing behavior. Let us measure the impact of this parameter and discuss whether it is actually needed before landing this in mainline. Signed-off-by: Harry Yoo (Oracle) --- mm/slab.h | 2 ++ mm/slub.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 50 insertions(+), 7 deletions(-) diff --git a/mm/slab.h b/mm/slab.h index 907a8207809c..22df364a2ef7 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -205,6 +205,8 @@ struct kmem_cache { struct reciprocal_value reciprocal_size; unsigned int offset; /* Free pointer offset */ unsigned short sheaf_capacity; + unsigned short max_full_sheaves; + unsigned short max_empty_sheaves; struct kmem_cache_order_objects oo; =20 /* Allocation and freeing of slabs */ diff --git a/mm/slub.c b/mm/slub.c index 856639d3d3f0..e9b33567d98c 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -396,9 +396,6 @@ void stat_add(const struct kmem_cache *s, enum stat_ite= m si, int v) #endif } =20 -#define MAX_FULL_SHEAVES 10 -#define MAX_EMPTY_SHEAVES 10 - struct node_barn { spinlock_t lock; struct list_head sheaves_full; @@ -3287,7 +3284,7 @@ barn_replace_full_sheaf(struct kmem_cache *s, struct = node_barn *barn, lockdep_assert_held(this_cpu_ptr(&s->cpu_sheaves->lock)); =20 /* we don't repeat this check under barn->lock as it's not critical */ - if (data_race(barn->nr_full) >=3D MAX_FULL_SHEAVES) + if (data_race(barn->nr_full) >=3D s->max_full_sheaves) return ERR_PTR(-E2BIG); if (!data_race(barn->nr_empty)) return ERR_PTR(-ENOMEM); @@ -5251,7 +5248,7 @@ void kmem_cache_return_sheaf(struct kmem_cache *s, gf= p_t gfp, * If the barn has too many full sheaves or we fail to refill the sheaf, * simply flush and free it. */ - if (!barn || data_race(barn->nr_full) >=3D MAX_FULL_SHEAVES) { + if (!barn || data_race(barn->nr_full) >=3D s->max_full_sheaves) { local_unlock(&s->cpu_sheaves->lock); goto free_sheaf; } @@ -6072,7 +6069,7 @@ static void rcu_free_sheaf(struct rcu_head *head) * limit but that should be rare and harmless. */ =20 - if (data_race(barn->nr_full) < MAX_FULL_SHEAVES) { + if (data_race(barn->nr_full) < s->max_full_sheaves) { stat(s, BARN_PUT); barn_put_full_sheaf(s, barn, sheaf); local_unlock(&s->cpu_sheaves->lock); @@ -6083,7 +6080,7 @@ static void rcu_free_sheaf(struct rcu_head *head) sheaf_flush_unused(s, sheaf); =20 empty: - if (barn && data_race(barn->nr_empty) < MAX_EMPTY_SHEAVES) { + if (barn && data_race(barn->nr_empty) < s->max_empty_sheaves) { barn_put_empty_sheaf(s, barn, sheaf); local_unlock(&s->cpu_sheaves->lock); return; @@ -8843,6 +8840,8 @@ int do_kmem_cache_create(struct kmem_cache *s, const = char *name, #endif s->align =3D args->align; s->ctor =3D args->ctor; + s->max_full_sheaves =3D 10; + s->max_empty_sheaves =3D 10; #ifdef CONFIG_HARDENED_USERCOPY s->useroffset =3D args->useroffset; s->usersize =3D args->usersize; @@ -9359,6 +9358,46 @@ static ssize_t sheaf_capacity_store(struct kmem_cach= e *s, } SLAB_ATTR(sheaf_capacity); =20 +static ssize_t max_full_sheaves_show(struct kmem_cache *s, char *buf) +{ + return sysfs_emit(buf, "%hu\n", s->max_full_sheaves); +} + +static ssize_t max_full_sheaves_store(struct kmem_cache *s, const char *bu= f, + size_t length) +{ + unsigned short max_full_sheaves; + int err; + + err =3D kstrtou16(buf, 10, &max_full_sheaves); + if (err) + return err; + + s->max_full_sheaves =3D max_full_sheaves; + return length; +} +SLAB_ATTR(max_full_sheaves); + +static ssize_t max_empty_sheaves_show(struct kmem_cache *s, char *buf) +{ + return sysfs_emit(buf, "%hu\n", s->max_empty_sheaves); +} + +static ssize_t max_empty_sheaves_store(struct kmem_cache *s, const char *b= uf, + size_t length) +{ + unsigned short max_empty_sheaves; + int err; + + err =3D kstrtou16(buf, 10, &max_empty_sheaves); + if (err) + return err; + + s->max_empty_sheaves =3D max_empty_sheaves; + return length; +} +SLAB_ATTR(max_empty_sheaves); + static ssize_t min_partial_show(struct kmem_cache *s, char *buf) { return sysfs_emit(buf, "%lu\n", s->min_partial); @@ -9721,6 +9760,8 @@ static const struct attribute *const slab_attrs[] =3D= { &objs_per_slab_attr.attr, &order_attr.attr, &sheaf_capacity_attr.attr, + &max_full_sheaves_attr.attr, + &max_empty_sheaves_attr.attr, &min_partial_attr.attr, &cpu_partial_attr.attr, &objects_partial_attr.attr, --=20 2.43.0