The pointer to barn currently exists in struct kmem_cache_node. That
struct is instantiated for every NUMA node with memory, but we want to
have a barn for every online node (including memoryless).
Thus decouple the two structures. In struct kmem_cache we have an array
for kmem_cache_node pointers that appears to be sized MAX_NUMNODES but
the actual size calculation in kmem_cache_init() uses nr_node_ids.
Therefore we can't just add another array of barn pointers. Instead
change the array to newly introduced struct kmem_cache_per_node_ptrs
holding both kmem_cache_node and barn pointer.
Adjust barn accessor and allocation/initialization code accordingly. For
now no functional change intended, barns are created 1:1 together with
kmem_cache_nodes.
Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
---
mm/slab.h | 7 +++-
mm/slub.c | 128 +++++++++++++++++++++++++++++++++++---------------------------
2 files changed, 78 insertions(+), 57 deletions(-)
diff --git a/mm/slab.h b/mm/slab.h
index e9ab292acd22..c735e6b4dddb 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -191,6 +191,11 @@ struct kmem_cache_order_objects {
unsigned int x;
};
+struct kmem_cache_per_node_ptrs {
+ struct node_barn *barn;
+ struct kmem_cache_node *node;
+};
+
/*
* Slab cache management.
*/
@@ -247,7 +252,7 @@ struct kmem_cache {
struct kmem_cache_stats __percpu *cpu_stats;
#endif
- struct kmem_cache_node *node[MAX_NUMNODES];
+ struct kmem_cache_per_node_ptrs per_node[MAX_NUMNODES];
};
/*
diff --git a/mm/slub.c b/mm/slub.c
index 20cb4f3b636d..609a183f8533 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -59,7 +59,7 @@
* 0. cpu_hotplug_lock
* 1. slab_mutex (Global Mutex)
* 2a. kmem_cache->cpu_sheaves->lock (Local trylock)
- * 2b. node->barn->lock (Spinlock)
+ * 2b. barn->lock (Spinlock)
* 2c. node->list_lock (Spinlock)
* 3. slab_lock(slab) (Only on some arches)
* 4. object_map_lock (Only for debugging)
@@ -136,7 +136,7 @@
* or spare sheaf can handle the allocation or free, there is no other
* overhead.
*
- * node->barn->lock (spinlock)
+ * barn->lock (spinlock)
*
* This lock protects the operations on per-NUMA-node barn. It can quickly
* serve an empty or full sheaf if available, and avoid more expensive refill
@@ -436,26 +436,24 @@ struct kmem_cache_node {
atomic_long_t total_objects;
struct list_head full;
#endif
- struct node_barn *barn;
};
static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
{
- return s->node[node];
+ return s->per_node[node].node;
+}
+
+static inline struct node_barn *get_barn_node(struct kmem_cache *s, int node)
+{
+ return s->per_node[node].barn;
}
/*
- * Get the barn of the current cpu's closest memory node. It may not exist on
- * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
+ * Get the barn of the current cpu's memory node. It may be a memoryless node.
*/
static inline struct node_barn *get_barn(struct kmem_cache *s)
{
- struct kmem_cache_node *n = get_node(s, numa_mem_id());
-
- if (!n)
- return NULL;
-
- return n->barn;
+ return get_barn_node(s, numa_node_id());
}
/*
@@ -5791,7 +5789,6 @@ bool free_to_pcs(struct kmem_cache *s, void *object, bool allow_spin)
static void rcu_free_sheaf(struct rcu_head *head)
{
- struct kmem_cache_node *n;
struct slab_sheaf *sheaf;
struct node_barn *barn = NULL;
struct kmem_cache *s;
@@ -5814,12 +5811,10 @@ static void rcu_free_sheaf(struct rcu_head *head)
if (__rcu_free_sheaf_prepare(s, sheaf))
goto flush;
- n = get_node(s, sheaf->node);
- if (!n)
+ barn = get_barn_node(s, sheaf->node);
+ if (!barn)
goto flush;
- barn = n->barn;
-
/* due to slab_free_hook() */
if (unlikely(sheaf->size == 0))
goto empty;
@@ -7430,7 +7425,7 @@ static inline int calculate_order(unsigned int size)
}
static void
-init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
+init_kmem_cache_node(struct kmem_cache_node *n)
{
n->nr_partial = 0;
spin_lock_init(&n->list_lock);
@@ -7440,9 +7435,6 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct node_barn *barn)
atomic_long_set(&n->total_objects, 0);
INIT_LIST_HEAD(&n->full);
#endif
- n->barn = barn;
- if (barn)
- barn_init(barn);
}
#ifdef CONFIG_SLUB_STATS
@@ -7537,8 +7529,8 @@ static void early_kmem_cache_node_alloc(int node)
n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL, false);
slab->freelist = get_freepointer(kmem_cache_node, n);
slab->inuse = 1;
- kmem_cache_node->node[node] = n;
- init_kmem_cache_node(n, NULL);
+ kmem_cache_node->per_node[node].node = n;
+ init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, slab->objects);
/*
@@ -7553,15 +7545,20 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
int node;
struct kmem_cache_node *n;
- for_each_kmem_cache_node(s, node, n) {
- if (n->barn) {
- WARN_ON(n->barn->nr_full);
- WARN_ON(n->barn->nr_empty);
- kfree(n->barn);
- n->barn = NULL;
- }
+ for_each_node(node) {
+ struct node_barn *barn = get_barn_node(s, node);
- s->node[node] = NULL;
+ if (!barn)
+ continue;
+
+ WARN_ON(barn->nr_full);
+ WARN_ON(barn->nr_empty);
+ kfree(barn);
+ s->per_node[node].barn = NULL;
+ }
+
+ for_each_kmem_cache_node(s, node, n) {
+ s->per_node[node].node = NULL;
kmem_cache_free(kmem_cache_node, n);
}
}
@@ -7582,31 +7579,36 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
for_each_node_mask(node, slab_nodes) {
struct kmem_cache_node *n;
- struct node_barn *barn = NULL;
if (slab_state == DOWN) {
early_kmem_cache_node_alloc(node);
continue;
}
- if (cache_has_sheaves(s)) {
- barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
-
- if (!barn)
- return 0;
- }
-
n = kmem_cache_alloc_node(kmem_cache_node,
GFP_KERNEL, node);
- if (!n) {
- kfree(barn);
+ if (!n)
return 0;
- }
- init_kmem_cache_node(n, barn);
+ init_kmem_cache_node(n);
+ s->per_node[node].node = n;
+ }
+
+ if (slab_state == DOWN || !cache_has_sheaves(s))
+ return 1;
+
+ for_each_node_mask(node, slab_nodes) {
+ struct node_barn *barn;
+
+ barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, node);
+
+ if (!barn)
+ return 0;
- s->node[node] = n;
+ barn_init(barn);
+ s->per_node[node].barn = barn;
}
+
return 1;
}
@@ -7895,10 +7897,15 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
if (cache_has_sheaves(s))
rcu_barrier();
+ for_each_node(node) {
+ struct node_barn *barn = get_barn_node(s, node);
+
+ if (barn)
+ barn_shrink(s, barn);
+ }
+
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
- if (n->barn)
- barn_shrink(s, n->barn);
free_partial(s, n);
if (n->nr_partial || node_nr_slabs(n))
return 1;
@@ -8108,14 +8115,18 @@ static int __kmem_cache_do_shrink(struct kmem_cache *s)
unsigned long flags;
int ret = 0;
+ for_each_node(node) {
+ struct node_barn *barn = get_barn_node(s, node);
+
+ if (barn)
+ barn_shrink(s, barn);
+ }
+
for_each_kmem_cache_node(s, node, n) {
INIT_LIST_HEAD(&discard);
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
INIT_LIST_HEAD(promote + i);
- if (n->barn)
- barn_shrink(s, n->barn);
-
spin_lock_irqsave(&n->list_lock, flags);
/*
@@ -8204,7 +8215,8 @@ static int slab_mem_going_online_callback(int nid)
if (get_node(s, nid))
continue;
- if (cache_has_sheaves(s)) {
+ if (cache_has_sheaves(s) && !get_barn_node(s, nid)) {
+
barn = kmalloc_node(sizeof(*barn), GFP_KERNEL, nid);
if (!barn) {
@@ -8225,13 +8237,17 @@ static int slab_mem_going_online_callback(int nid)
goto out;
}
- init_kmem_cache_node(n, barn);
+ init_kmem_cache_node(n);
+ s->per_node[nid].node = n;
- s->node[nid] = n;
+ if (barn) {
+ barn_init(barn);
+ s->per_node[nid].barn = barn;
+ }
}
/*
* Any cache created after this point will also have kmem_cache_node
- * initialized for the new node.
+ * and barn initialized for the new node.
*/
node_set(nid, slab_nodes);
out:
@@ -8323,7 +8339,7 @@ static void __init bootstrap_cache_sheaves(struct kmem_cache *s)
}
barn_init(barn);
- get_node(s, node)->barn = barn;
+ s->per_node[node].barn = barn;
}
for_each_possible_cpu(cpu) {
@@ -8394,8 +8410,8 @@ void __init kmem_cache_init(void)
slab_state = PARTIAL;
create_boot_cache(kmem_cache, "kmem_cache",
- offsetof(struct kmem_cache, node) +
- nr_node_ids * sizeof(struct kmem_cache_node *),
+ offsetof(struct kmem_cache, per_node) +
+ nr_node_ids * sizeof(struct kmem_cache_per_node_ptrs),
SLAB_HWCACHE_ALIGN | SLAB_NO_OBJ_EXT, 0, 0);
kmem_cache = bootstrap(&boot_kmem_cache);
--
2.53.0
On Wed, Mar 11, 2026 at 09:25:55AM +0100, Vlastimil Babka (SUSE) wrote:
> The pointer to barn currently exists in struct kmem_cache_node. That
> struct is instantiated for every NUMA node with memory, but we want to
> have a barn for every online node (including memoryless).
>
> Thus decouple the two structures. In struct kmem_cache we have an array
> for kmem_cache_node pointers that appears to be sized MAX_NUMNODES but
> the actual size calculation in kmem_cache_init() uses nr_node_ids.
> Therefore we can't just add another array of barn pointers. Instead
> change the array to newly introduced struct kmem_cache_per_node_ptrs
> holding both kmem_cache_node and barn pointer.
>
> Adjust barn accessor and allocation/initialization code accordingly. For
> now no functional change intended, barns are created 1:1 together with
> kmem_cache_nodes.
>
> Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
> ---
> mm/slab.h | 7 +++-
> mm/slub.c | 128 +++++++++++++++++++++++++++++++++++---------------------------
> 2 files changed, 78 insertions(+), 57 deletions(-)
>
> diff --git a/mm/slab.h b/mm/slab.h
> index e9ab292acd22..c735e6b4dddb 100644
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -247,7 +252,7 @@ struct kmem_cache {
> struct kmem_cache_stats __percpu *cpu_stats;
> #endif
>
> - struct kmem_cache_node *node[MAX_NUMNODES];
> + struct kmem_cache_per_node_ptrs per_node[MAX_NUMNODES];
> };
We should probably turn this into a true flexible array at some point,
but that's out of scope for this patchset.
> diff --git a/mm/slub.c b/mm/slub.c
> index 20cb4f3b636d..609a183f8533 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -436,26 +436,24 @@ struct kmem_cache_node {
> /*
> - * Get the barn of the current cpu's closest memory node. It may not exist on
> - * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
> + * Get the barn of the current cpu's memory node. It may be a memoryless node.
> */
> static inline struct node_barn *get_barn(struct kmem_cache *s)
> {
> - struct kmem_cache_node *n = get_node(s, numa_mem_id());
> -
> - if (!n)
> - return NULL;
> -
> - return n->barn;
> + return get_barn_node(s, numa_node_id());
> }
Previously, memoryless nodes on architectures w/ CONFIG_HAVE_MEMORYLESS_NODES
shared the barn of the nearest NUMA node with memory.
But now memoryless nodes will have their own barns (after patch 2)
regardless of CONFIG_HAVE_MEMORYLESS_NODES, and that's intentional, right?
Otherwise LGTM!
--
Cheers,
Harry / Hyeonggon
On 3/13/26 10:27, Harry Yoo wrote:
> On Wed, Mar 11, 2026 at 09:25:55AM +0100, Vlastimil Babka (SUSE) wrote:
>> The pointer to barn currently exists in struct kmem_cache_node. That
>> struct is instantiated for every NUMA node with memory, but we want to
>> have a barn for every online node (including memoryless).
>>
>> Thus decouple the two structures. In struct kmem_cache we have an array
>> for kmem_cache_node pointers that appears to be sized MAX_NUMNODES but
>> the actual size calculation in kmem_cache_init() uses nr_node_ids.
>> Therefore we can't just add another array of barn pointers. Instead
>> change the array to newly introduced struct kmem_cache_per_node_ptrs
>> holding both kmem_cache_node and barn pointer.
>>
>> Adjust barn accessor and allocation/initialization code accordingly. For
>> now no functional change intended, barns are created 1:1 together with
>> kmem_cache_nodes.
>>
>> Signed-off-by: Vlastimil Babka (SUSE) <vbabka@kernel.org>
>> ---
>> mm/slab.h | 7 +++-
>> mm/slub.c | 128 +++++++++++++++++++++++++++++++++++---------------------------
>> 2 files changed, 78 insertions(+), 57 deletions(-)
>>
>> diff --git a/mm/slab.h b/mm/slab.h
>> index e9ab292acd22..c735e6b4dddb 100644
>> --- a/mm/slab.h
>> +++ b/mm/slab.h
>> @@ -247,7 +252,7 @@ struct kmem_cache {
>> struct kmem_cache_stats __percpu *cpu_stats;
>> #endif
>>
>> - struct kmem_cache_node *node[MAX_NUMNODES];
>> + struct kmem_cache_per_node_ptrs per_node[MAX_NUMNODES];
>> };
>
> We should probably turn this into a true flexible array at some point,
> but that's out of scope for this patchset.
Right.
>> diff --git a/mm/slub.c b/mm/slub.c
>> index 20cb4f3b636d..609a183f8533 100644
>> --- a/mm/slub.c
>> +++ b/mm/slub.c
>> @@ -436,26 +436,24 @@ struct kmem_cache_node {
>> /*
>> - * Get the barn of the current cpu's closest memory node. It may not exist on
>> - * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
>> + * Get the barn of the current cpu's memory node. It may be a memoryless node.
>> */
>> static inline struct node_barn *get_barn(struct kmem_cache *s)
>> {
>> - struct kmem_cache_node *n = get_node(s, numa_mem_id());
>> -
>> - if (!n)
>> - return NULL;
>> -
>> - return n->barn;
>> + return get_barn_node(s, numa_node_id());
>> }
>
> Previously, memoryless nodes on architectures w/ CONFIG_HAVE_MEMORYLESS_NODES
> shared the barn of the nearest NUMA node with memory.
>
> But now memoryless nodes will have their own barns (after patch 2)
> regardless of CONFIG_HAVE_MEMORYLESS_NODES, and that's intentional, right?
Yeah it improves their caching capacity, but good point, will mention it in
the changelog.
> Otherwise LGTM!
>
On Fri, Mar 13, 2026 at 10:46:15AM +0100, Vlastimil Babka (SUSE) wrote:
> On 3/13/26 10:27, Harry Yoo wrote:
> > On Wed, Mar 11, 2026 at 09:25:55AM +0100, Vlastimil Babka (SUSE) wrote:
> >> diff --git a/mm/slub.c b/mm/slub.c
> >> index 20cb4f3b636d..609a183f8533 100644
> >> --- a/mm/slub.c
> >> +++ b/mm/slub.c
> >> @@ -436,26 +436,24 @@ struct kmem_cache_node {
> >> /*
> >> - * Get the barn of the current cpu's closest memory node. It may not exist on
> >> - * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
> >> + * Get the barn of the current cpu's memory node. It may be a memoryless node.
> >> */
> >> static inline struct node_barn *get_barn(struct kmem_cache *s)
> >> {
> >> - struct kmem_cache_node *n = get_node(s, numa_mem_id());
> >> -
> >> - if (!n)
> >> - return NULL;
> >> -
> >> - return n->barn;
> >> + return get_barn_node(s, numa_node_id());
> >> }
> >
> > Previously, memoryless nodes on architectures w/ CONFIG_HAVE_MEMORYLESS_NODES
> > shared the barn of the nearest NUMA node with memory.
> >
> > But now memoryless nodes will have their own barns (after patch 2)
> > regardless of CONFIG_HAVE_MEMORYLESS_NODES, and that's intentional, right?
>
> Yeah it improves their caching capacity, but good point, will mention it in
> the changelog.
Thanks! just wanted to check that it was intentional.
with that, please feel free to add:
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
--
Cheers,
Harry / Hyeonggon
On 3/13/26 12:48, Harry Yoo wrote:
> On Fri, Mar 13, 2026 at 10:46:15AM +0100, Vlastimil Babka (SUSE) wrote:
>> On 3/13/26 10:27, Harry Yoo wrote:
>> > On Wed, Mar 11, 2026 at 09:25:55AM +0100, Vlastimil Babka (SUSE) wrote:
>> >> diff --git a/mm/slub.c b/mm/slub.c
>> >> index 20cb4f3b636d..609a183f8533 100644
>> >> --- a/mm/slub.c
>> >> +++ b/mm/slub.c
>> >> @@ -436,26 +436,24 @@ struct kmem_cache_node {
>> >> /*
>> >> - * Get the barn of the current cpu's closest memory node. It may not exist on
>> >> - * systems with memoryless nodes but without CONFIG_HAVE_MEMORYLESS_NODES
>> >> + * Get the barn of the current cpu's memory node. It may be a memoryless node.
>> >> */
>> >> static inline struct node_barn *get_barn(struct kmem_cache *s)
>> >> {
>> >> - struct kmem_cache_node *n = get_node(s, numa_mem_id());
>> >> -
>> >> - if (!n)
>> >> - return NULL;
>> >> -
>> >> - return n->barn;
>> >> + return get_barn_node(s, numa_node_id());
>> >> }
>> >
>> > Previously, memoryless nodes on architectures w/ CONFIG_HAVE_MEMORYLESS_NODES
>> > shared the barn of the nearest NUMA node with memory.
>> >
>> > But now memoryless nodes will have their own barns (after patch 2)
>> > regardless of CONFIG_HAVE_MEMORYLESS_NODES, and that's intentional, right?
>>
>> Yeah it improves their caching capacity, but good point, will mention it in
>> the changelog.
>
> Thanks! just wanted to check that it was intentional.
I wanted to update the changelog as promised. But realized that the change
from numa_node_id() to numa_node_id() in get_barn() should actually be done
only in patch 2, so I will move it there. In patch 1 that would mean no
barns with CONFIG_HAVE_MEMORYLESS_NODES and thus a performance bisection hazard.
> with that, please feel free to add:
> Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Thanks!
© 2016 - 2026 Red Hat, Inc.