From nobody Thu Jan 1 23:51:42 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 53DA3CDB474 for ; Tue, 17 Oct 2023 15:45:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1344131AbjJQPpZ (ORCPT ); Tue, 17 Oct 2023 11:45:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:42314 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235034AbjJQPpW (ORCPT ); Tue, 17 Oct 2023 11:45:22 -0400 Received: from out-206.mta1.migadu.com (out-206.mta1.migadu.com [95.215.58.206]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E173095 for ; Tue, 17 Oct 2023 08:45:20 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1697557519; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=k+bY4oHxBRtJWcNO+Gp7uU0LbPEc1T0Re1GRoZLORj4=; b=CcmfhoGdDV5iQ65rv3mmqcXetcW5trAeYkwUs1YW3F3rWasU7mQ1KysL674T5DLYaL+6bY hvlmkEqgAGs6ijxn+CaFmKHMdHwEHXZOjokAKPisHp+6bsfBg24xWmR06X5Sv+xRkXOR31 /Kyv+0b/9DHODnwMzhKPyqiEic4J/bc= From: chengming.zhou@linux.dev To: cl@linux.com, penberg@kernel.org Cc: rientjes@google.com, iamjoonsoo.kim@lge.com, akpm@linux-foundation.org, vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, chengming.zhou@linux.dev, Chengming Zhou Subject: [RFC PATCH 1/5] slub: Introduce on_partial() Date: Tue, 17 Oct 2023 15:44:35 +0000 Message-Id: <20231017154439.3036608-2-chengming.zhou@linux.dev> In-Reply-To: <20231017154439.3036608-1-chengming.zhou@linux.dev> References: <20231017154439.3036608-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Chengming Zhou We change slab->__unused to slab->flags to use it as SLUB_FLAGS, which now only include SF_NODE_PARTIAL flag. It indicates whether or not the slab is on node partial list. The following patches will change to don't freeze slab when moving it from node partial list to cpu partial list. So we can't rely on frozen bit to see if we should manipulate the slab->slab_list. Instead we will rely on this SF_NODE_PARTIAL flag, which is protected by node list_lock. Signed-off-by: Chengming Zhou --- mm/slab.h | 2 +- mm/slub.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/mm/slab.h b/mm/slab.h index 8cd3294fedf5..11e9c9a0f648 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -89,7 +89,7 @@ struct slab { }; struct rcu_head rcu_head; }; - unsigned int __unused; + unsigned int flags; =20 #else #error "Unexpected slab allocator configured" diff --git a/mm/slub.c b/mm/slub.c index 63d281dfacdb..e5356ad14951 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1993,6 +1993,12 @@ static inline bool shuffle_freelist(struct kmem_cach= e *s, struct slab *slab) } #endif /* CONFIG_SLAB_FREELIST_RANDOM */ =20 +enum SLUB_FLAGS { + SF_INIT_VALUE =3D 0, + SF_EXIT_VALUE =3D -1, + SF_NODE_PARTIAL =3D 1 << 0, +}; + static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int n= ode) { struct slab *slab; @@ -2031,6 +2037,7 @@ static struct slab *allocate_slab(struct kmem_cache *= s, gfp_t flags, int node) slab->objects =3D oo_objects(oo); slab->inuse =3D 0; slab->frozen =3D 0; + slab->flags =3D SF_INIT_VALUE; =20 account_slab(slab, oo_order(oo), s, flags); =20 @@ -2077,6 +2084,7 @@ static void __free_slab(struct kmem_cache *s, struct = slab *slab) int order =3D folio_order(folio); int pages =3D 1 << order; =20 + slab->flags =3D SF_EXIT_VALUE; __slab_clear_pfmemalloc(slab); folio->mapping =3D NULL; /* Make the mapping reset visible before clearing the flag */ @@ -2119,9 +2127,28 @@ static void discard_slab(struct kmem_cache *s, struc= t slab *slab) /* * Management of partially allocated slabs. */ +static void ___add_partial(struct kmem_cache_node *n, struct slab *slab) +{ + lockdep_assert_held(&n->list_lock); + slab->flags |=3D SF_NODE_PARTIAL; +} + +static void ___remove_partial(struct kmem_cache_node *n, struct slab *slab) +{ + lockdep_assert_held(&n->list_lock); + slab->flags &=3D ~SF_NODE_PARTIAL; +} + +static inline bool on_partial(struct kmem_cache_node *n, struct slab *slab) +{ + lockdep_assert_held(&n->list_lock); + return slab->flags & SF_NODE_PARTIAL; +} + static inline void __add_partial(struct kmem_cache_node *n, struct slab *slab, int tail) { + ___add_partial(n, slab); n->nr_partial++; if (tail =3D=3D DEACTIVATE_TO_TAIL) list_add_tail(&slab->slab_list, &n->partial); @@ -2142,6 +2169,7 @@ static inline void remove_partial(struct kmem_cache_n= ode *n, lockdep_assert_held(&n->list_lock); list_del(&slab->slab_list); n->nr_partial--; + ___remove_partial(n, slab); } =20 /* --=20 2.40.1 From nobody Thu Jan 1 23:51:42 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6B2B6CDB482 for ; Tue, 17 Oct 2023 15:45:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1344135AbjJQPpa (ORCPT ); Tue, 17 Oct 2023 11:45:30 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45668 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235075AbjJQPp0 (ORCPT ); Tue, 17 Oct 2023 11:45:26 -0400 Received: from out-208.mta1.migadu.com (out-208.mta1.migadu.com [95.215.58.208]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id D59D9FA for ; Tue, 17 Oct 2023 08:45:23 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1697557522; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=uepMlN8KtJ2YuJ4CH70+gbxhj8qnD/tT1k7NrYZtVVI=; b=DaAtiZgQCtDa1rxPzHIgjJc4DIjqm3myRMfLOwX9HUwVK+3r/TPToUXQkLJLfTM7KtEz6X P6rFQUmudCXJ4U+y638uzl0+mFMh4ihgvECPrJJA2ZQUJxMrCtLFiczP96D5Kt+GJmG0g9 TeH5WidQi5s+QCvlDqd5nD8QeqE0pNc= From: chengming.zhou@linux.dev To: cl@linux.com, penberg@kernel.org Cc: rientjes@google.com, iamjoonsoo.kim@lge.com, akpm@linux-foundation.org, vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, chengming.zhou@linux.dev, Chengming Zhou Subject: [RFC PATCH 2/5] slub: Don't manipulate slab list when used by cpu Date: Tue, 17 Oct 2023 15:44:36 +0000 Message-Id: <20231017154439.3036608-3-chengming.zhou@linux.dev> In-Reply-To: <20231017154439.3036608-1-chengming.zhou@linux.dev> References: <20231017154439.3036608-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Chengming Zhou We will change to don't freeze slab when moving it out of node partial list in the following patch, so we can't rely on the frozen bit to indicate if we should manipulate the slab list or not. This patch use the introduced on_partial() helper, which check the slab->flags that protected by node list_lock, so we can know if the slab is on the node partial list. Signed-off-by: Chengming Zhou --- mm/slub.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/slub.c b/mm/slub.c index e5356ad14951..27eac93baa13 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3636,6 +3636,7 @@ static void __slab_free(struct kmem_cache *s, struct = slab *slab, unsigned long counters; struct kmem_cache_node *n =3D NULL; unsigned long flags; + bool on_node_partial; =20 stat(s, FREE_SLOWPATH); =20 @@ -3683,6 +3684,7 @@ static void __slab_free(struct kmem_cache *s, struct = slab *slab, */ spin_lock_irqsave(&n->list_lock, flags); =20 + on_node_partial =3D on_partial(n, slab); } } =20 @@ -3711,6 +3713,15 @@ static void __slab_free(struct kmem_cache *s, struct= slab *slab, return; } =20 + /* + * This slab was not on node partial list and not full either, + * in which case we shouldn't manipulate its list, early return. + */ + if (!on_node_partial && prior) { + spin_unlock_irqrestore(&n->list_lock, flags); + return; + } + if (unlikely(!new.inuse && n->nr_partial >=3D s->min_partial)) goto slab_empty; =20 --=20 2.40.1 From nobody Thu Jan 1 23:51:42 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 992F8CDB474 for ; Tue, 17 Oct 2023 15:45:39 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1344249AbjJQPpi (ORCPT ); Tue, 17 Oct 2023 11:45:38 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45734 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235082AbjJQPp2 (ORCPT ); Tue, 17 Oct 2023 11:45:28 -0400 Received: from out-191.mta1.migadu.com (out-191.mta1.migadu.com [95.215.58.191]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C7E37F7 for ; Tue, 17 Oct 2023 08:45:26 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1697557525; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Tb71WTBIRi+Tnmk2GZg+a0sbcsGECWTHjBVZ/Kgj+ow=; b=FTuhEnndbqSCtiqr1O7e+RaC5Xz6OtTHQQeyiMhRejvTs1gPYpLuqXRStCw16NBr7Q3k3+ O4RYPhHqfkT5J7H1e8VhHiBdbBs0AckyoX/neMpbkg24WHjGGLBQOELIyx+/BM31Uj3GT8 t6UbHf9+/jBpU1VLbPxe5Kb/sPbBt9o= From: chengming.zhou@linux.dev To: cl@linux.com, penberg@kernel.org Cc: rientjes@google.com, iamjoonsoo.kim@lge.com, akpm@linux-foundation.org, vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, chengming.zhou@linux.dev, Chengming Zhou Subject: [RFC PATCH 3/5] slub: Optimize deactivate_slab() Date: Tue, 17 Oct 2023 15:44:37 +0000 Message-Id: <20231017154439.3036608-4-chengming.zhou@linux.dev> In-Reply-To: <20231017154439.3036608-1-chengming.zhou@linux.dev> References: <20231017154439.3036608-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Chengming Zhou Since the introduce of unfrozen slabs on cpu partial list, we don't need to synchronize the slab frozen state under the node list_lock. The caller of deactivate_slab() and the caller of __slab_free() won't manipulate the slab list concurrently. So we can get node list_lock in the stage three if we need to manipulate the slab list in this path. Signed-off-by: Chengming Zhou --- mm/slub.c | 70 ++++++++++++++++++++----------------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 27eac93baa13..5a9711b35c74 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2520,10 +2520,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *= s) static void deactivate_slab(struct kmem_cache *s, struct slab *slab, void *freelist) { - enum slab_modes { M_NONE, M_PARTIAL, M_FREE, M_FULL_NOLIST }; struct kmem_cache_node *n =3D get_node(s, slab_nid(slab)); int free_delta =3D 0; - enum slab_modes mode =3D M_NONE; void *nextfree, *freelist_iter, *freelist_tail; int tail =3D DEACTIVATE_TO_HEAD; unsigned long flags =3D 0; @@ -2570,58 +2568,40 @@ static void deactivate_slab(struct kmem_cache *s, s= truct slab *slab, * unfrozen and number of objects in the slab may have changed. * Then release lock and retry cmpxchg again. */ -redo: - - old.freelist =3D READ_ONCE(slab->freelist); - old.counters =3D READ_ONCE(slab->counters); - VM_BUG_ON(!old.frozen); - - /* Determine target state of the slab */ - new.counters =3D old.counters; - if (freelist_tail) { - new.inuse -=3D free_delta; - set_freepointer(s, freelist_tail, old.freelist); - new.freelist =3D freelist; - } else - new.freelist =3D old.freelist; + do { + old.freelist =3D READ_ONCE(slab->freelist); + old.counters =3D READ_ONCE(slab->counters); + VM_BUG_ON(!old.frozen); + + /* Determine target state of the slab */ + new.counters =3D old.counters; + new.frozen =3D 0; + if (freelist_tail) { + new.inuse -=3D free_delta; + set_freepointer(s, freelist_tail, old.freelist); + new.freelist =3D freelist; + } else + new.freelist =3D old.freelist; =20 - new.frozen =3D 0; + } while (!slab_update_freelist(s, slab, + old.freelist, old.counters, + new.freelist, new.counters, + "unfreezing slab")); =20 + /* + * Stage three: Manipulate the slab list based on the updated state. + */ if (!new.inuse && n->nr_partial >=3D s->min_partial) { - mode =3D M_FREE; + stat(s, DEACTIVATE_EMPTY); + discard_slab(s, slab); + stat(s, FREE_SLAB); } else if (new.freelist) { - mode =3D M_PARTIAL; - /* - * Taking the spinlock removes the possibility that - * acquire_slab() will see a slab that is frozen - */ spin_lock_irqsave(&n->list_lock, flags); - } else { - mode =3D M_FULL_NOLIST; - } - - - if (!slab_update_freelist(s, slab, - old.freelist, old.counters, - new.freelist, new.counters, - "unfreezing slab")) { - if (mode =3D=3D M_PARTIAL) - spin_unlock_irqrestore(&n->list_lock, flags); - goto redo; - } - - - if (mode =3D=3D M_PARTIAL) { add_partial(n, slab, tail); spin_unlock_irqrestore(&n->list_lock, flags); stat(s, tail); - } else if (mode =3D=3D M_FREE) { - stat(s, DEACTIVATE_EMPTY); - discard_slab(s, slab); - stat(s, FREE_SLAB); - } else if (mode =3D=3D M_FULL_NOLIST) { + } else stat(s, DEACTIVATE_FULL); - } } =20 #ifdef CONFIG_SLUB_CPU_PARTIAL --=20 2.40.1 From nobody Thu Jan 1 23:51:42 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id BEE7ACDB474 for ; Tue, 17 Oct 2023 15:45:45 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S235075AbjJQPpn (ORCPT ); Tue, 17 Oct 2023 11:45:43 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45822 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1344182AbjJQPpe (ORCPT ); Tue, 17 Oct 2023 11:45:34 -0400 Received: from out-203.mta1.migadu.com (out-203.mta1.migadu.com [95.215.58.203]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0745610D for ; Tue, 17 Oct 2023 08:45:29 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1697557528; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=SwVUZ0CSOlUV0/uHFVqKY/DwonXk9nrbYMCg7m1DgMw=; b=QTQRTtR4TDMx815F2FxWR0MO4r4tizfmzdquqGgIm6079a5vKw/+Kv5swHYxfc+xr4bwXq Qc4wl110RF7gV0nbPrV0c1kK94qUoAu4HC34L59IfxeRumqBLP9gWTX7ryMGjzSz0nXUJR 3HtIn99NiO2gQzsXZGKH13+VO/pQ1ig= From: chengming.zhou@linux.dev To: cl@linux.com, penberg@kernel.org Cc: rientjes@google.com, iamjoonsoo.kim@lge.com, akpm@linux-foundation.org, vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, chengming.zhou@linux.dev, Chengming Zhou Subject: [RFC PATCH 4/5] slub: Don't freeze slabs for cpu partial Date: Tue, 17 Oct 2023 15:44:38 +0000 Message-Id: <20231017154439.3036608-5-chengming.zhou@linux.dev> In-Reply-To: <20231017154439.3036608-1-chengming.zhou@linux.dev> References: <20231017154439.3036608-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Chengming Zhou Now we will freeze slabs when moving them out of node partial list to cpu partial list, this method needs two cmpxchg_double operations: 1. freeze slab (acquire_slab()) under the node list_lock 2. get_freelist() when pick used in ___slab_alloc() Actually we don't need to freeze when moving slabs out of node partial list, we can delay freeze to get slab freelist in ___slab_alloc(), so we can save one cmpxchg_double(). And there are other good points: 1. The moving of slabs between node partial list and cpu partial list becomes simpler, since we don't need to freeze or unfreeze at all. 2. The node list_lock contention would be less, since we only need to freeze one slab under the node list_lock. (In fact, we can first move slabs out of node partial list, don't need to freeze any slab at all, so the contention on slab won't transfer to the node list_lock contention.) We can achieve this because there is no concurrent path would manipulate the partial slab list except the __slab_free() path, which is serialized using the new introduced slab->flags. Note this patch just change the part of moving the partial slabs for easy code review, we will fix other parts in the following patches. Signed-off-by: Chengming Zhou --- mm/slub.c | 61 ++++++++++++++++--------------------------------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 5a9711b35c74..044235bd8a45 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2329,19 +2329,21 @@ static void *get_partial_node(struct kmem_cache *s,= struct kmem_cache_node *n, continue; } =20 - t =3D acquire_slab(s, n, slab, object =3D=3D NULL); - if (!t) - break; - if (!object) { - *pc->slab =3D slab; - stat(s, ALLOC_FROM_PARTIAL); - object =3D t; - } else { - put_cpu_partial(s, slab, 0); - stat(s, CPU_PARTIAL_NODE); - partial_slabs++; + t =3D acquire_slab(s, n, slab, object =3D=3D NULL); + if (t) { + *pc->slab =3D slab; + stat(s, ALLOC_FROM_PARTIAL); + object =3D t; + continue; + } } + + remove_partial(n, slab); + put_cpu_partial(s, slab, 0); + stat(s, CPU_PARTIAL_NODE); + partial_slabs++; + #ifdef CONFIG_SLUB_CPU_PARTIAL if (!kmem_cache_has_cpu_partial(s) || partial_slabs > s->cpu_partial_slabs / 2) @@ -2612,9 +2614,6 @@ static void __unfreeze_partials(struct kmem_cache *s,= struct slab *partial_slab) unsigned long flags =3D 0; =20 while (partial_slab) { - struct slab new; - struct slab old; - slab =3D partial_slab; partial_slab =3D slab->next; =20 @@ -2627,23 +2626,7 @@ static void __unfreeze_partials(struct kmem_cache *s= , struct slab *partial_slab) spin_lock_irqsave(&n->list_lock, flags); } =20 - do { - - old.freelist =3D slab->freelist; - old.counters =3D slab->counters; - VM_BUG_ON(!old.frozen); - - new.counters =3D old.counters; - new.freelist =3D old.freelist; - - new.frozen =3D 0; - - } while (!__slab_update_freelist(s, slab, - old.freelist, old.counters, - new.freelist, new.counters, - "unfreezing slab")); - - if (unlikely(!new.inuse && n->nr_partial >=3D s->min_partial)) { + if (unlikely(!slab->inuse && n->nr_partial >=3D s->min_partial)) { slab->next =3D slab_to_discard; slab_to_discard =3D slab; } else { @@ -3640,18 +3623,8 @@ static void __slab_free(struct kmem_cache *s, struct= slab *slab, was_frozen =3D new.frozen; new.inuse -=3D cnt; if ((!new.inuse || !prior) && !was_frozen) { - - if (kmem_cache_has_cpu_partial(s) && !prior) { - - /* - * Slab was on no list before and will be - * partially empty - * We can defer the list move and instead - * freeze it. - */ - new.frozen =3D 1; - - } else { /* Needs to be taken off a list */ + /* Needs to be taken off a list */ + if (!kmem_cache_has_cpu_partial(s) || prior) { =20 n =3D get_node(s, slab_nid(slab)); /* @@ -3681,7 +3654,7 @@ static void __slab_free(struct kmem_cache *s, struct = slab *slab, * activity can be necessary. */ stat(s, FREE_FROZEN); - } else if (new.frozen) { + } else if (kmem_cache_has_cpu_partial(s) && !prior) { /* * If we just froze the slab then put it onto the * per cpu partial list. --=20 2.40.1 From nobody Thu Jan 1 23:51:42 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3DDE9CDB474 for ; Tue, 17 Oct 2023 15:45:51 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S235091AbjJQPpt (ORCPT ); Tue, 17 Oct 2023 11:45:49 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45756 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1344158AbjJQPpf (ORCPT ); Tue, 17 Oct 2023 11:45:35 -0400 Received: from out-205.mta1.migadu.com (out-205.mta1.migadu.com [IPv6:2001:41d0:203:375::cd]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 0CA56119 for ; Tue, 17 Oct 2023 08:45:32 -0700 (PDT) X-Report-Abuse: Please report any abuse attempt to abuse@migadu.com and include these headers. DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.dev; s=key1; t=1697557531; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=/rqGINGA/f9CjHGz6bcXHgniUddOArpzlJ7hGX+P1rU=; b=t7itMpfWMoYVC3j6sv4EJs1ydOPU1zP3XISTj8W27fYulJ37dQyEN+Pa322OfNaStXpJZV 36vAPklwKga0vw8d+1eVptg8XqL89bkZHme5rmr/8afd3FtJyx000aND1uE1U0lpxztDa8 d7+B+Qhwf+dLLjvv182AjauHrt2yObw= From: chengming.zhou@linux.dev To: cl@linux.com, penberg@kernel.org Cc: rientjes@google.com, iamjoonsoo.kim@lge.com, akpm@linux-foundation.org, vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com, linux-mm@kvack.org, linux-kernel@vger.kernel.org, chengming.zhou@linux.dev, Chengming Zhou Subject: [RFC PATCH 5/5] slub: Introduce get_cpu_partial() Date: Tue, 17 Oct 2023 15:44:39 +0000 Message-Id: <20231017154439.3036608-6-chengming.zhou@linux.dev> In-Reply-To: <20231017154439.3036608-1-chengming.zhou@linux.dev> References: <20231017154439.3036608-1-chengming.zhou@linux.dev> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable X-Migadu-Flow: FLOW_OUT Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" From: Chengming Zhou Since the slabs on cpu partial list are not frozen anymore, we introduce get_cpu_partial() to get a frozen slab with its freelist from cpu partial list. It's now much like getting a frozen slab with its freelist from node partial list. Another change is about get_partial(), which can return no frozen slab when all slabs are failed when acquire_slab(), but get some unfreeze slabs in its cpu partial list, so we need to check this rare case to avoid allocating a new slab. Signed-off-by: Chengming Zhou --- mm/slub.c | 87 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 19 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index 044235bd8a45..d58eaf8447fd 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3064,6 +3064,68 @@ static inline void *get_freelist(struct kmem_cache *= s, struct slab *slab) return freelist; } =20 +#ifdef CONFIG_SLUB_CPU_PARTIAL + +static void *get_cpu_partial(struct kmem_cache *s, struct kmem_cache_cpu *= c, + struct slab **slabptr, int node, gfp_t gfpflags) +{ + unsigned long flags; + struct slab *slab; + struct slab new; + unsigned long counters; + void *freelist; + + while (slub_percpu_partial(c)) { + local_lock_irqsave(&s->cpu_slab->lock, flags); + if (unlikely(!slub_percpu_partial(c))) { + local_unlock_irqrestore(&s->cpu_slab->lock, flags); + /* we were preempted and partial list got empty */ + return NULL; + } + + slab =3D slub_percpu_partial(c); + slub_set_percpu_partial(c, slab); + local_unlock_irqrestore(&s->cpu_slab->lock, flags); + stat(s, CPU_PARTIAL_ALLOC); + + if (unlikely(!node_match(slab, node) || + !pfmemalloc_match(slab, gfpflags))) { + slab->next =3D NULL; + __unfreeze_partials(s, slab); + continue; + } + + do { + freelist =3D slab->freelist; + counters =3D slab->counters; + + new.counters =3D counters; + VM_BUG_ON(new.frozen); + + new.inuse =3D slab->objects; + new.frozen =3D 1; + } while (!__slab_update_freelist(s, slab, + freelist, counters, + NULL, new.counters, + "get_cpu_partial")); + + *slabptr =3D slab; + return freelist; + } + + return NULL; +} + +#else /* CONFIG_SLUB_CPU_PARTIAL */ + +static void *get_cpu_partial(struct kmem_cache *s, struct kmem_cache_cpu *= c, + struct slab **slabptr, int node, gfp_t gfpflags) +{ + return NULL; +} + +#endif + /* * Slow path. The lockless freelist is empty or we need to perform * debugging duties. @@ -3106,7 +3168,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_= t gfpflags, int node, node =3D NUMA_NO_NODE; goto new_slab; } -redo: =20 if (unlikely(!node_match(slab, node))) { /* @@ -3182,24 +3243,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp= _t gfpflags, int node, =20 new_slab: =20 - if (slub_percpu_partial(c)) { - local_lock_irqsave(&s->cpu_slab->lock, flags); - if (unlikely(c->slab)) { - local_unlock_irqrestore(&s->cpu_slab->lock, flags); - goto reread_slab; - } - if (unlikely(!slub_percpu_partial(c))) { - local_unlock_irqrestore(&s->cpu_slab->lock, flags); - /* we were preempted and partial list got empty */ - goto new_objects; - } - - slab =3D c->slab =3D slub_percpu_partial(c); - slub_set_percpu_partial(c, slab); - local_unlock_irqrestore(&s->cpu_slab->lock, flags); - stat(s, CPU_PARTIAL_ALLOC); - goto redo; - } + freelist =3D get_cpu_partial(s, c, &slab, node, gfpflags); + if (freelist) + goto retry_load_slab; =20 new_objects: =20 @@ -3210,6 +3256,9 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_= t gfpflags, int node, if (freelist) goto check_new_slab; =20 + if (slub_percpu_partial(c)) + goto new_slab; + slub_put_cpu_ptr(s->cpu_slab); slab =3D new_slab(s, gfpflags, node); c =3D slub_get_cpu_ptr(s->cpu_slab); --=20 2.40.1