[PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover

Harry Yoo posted 8 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Harry Yoo 1 month, 2 weeks ago
The leftover space in a slab is always smaller than s->size, and
kmem caches for large objects that are not power-of-two sizes tend to have
a greater amount of leftover space per slab. In some cases, the leftover
space is larger than the size of the slabobj_ext array for the slab.

An excellent example of such a cache is ext4_inode_cache. On my system,
the object size is 1144, with a preferred order of 3, 28 objects per slab,
and 736 bytes of leftover space per slab.

Since the size of the slabobj_ext array is only 224 bytes (w/o mem
profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
fits within the leftover space.

Allocate the slabobj_exts array from this unused space instead of using
kcalloc() when it is large enough. The array is allocated from unused
space only when creating new slabs, and it doesn't try to utilize unused
space if alloc_slab_obj_exts() is called after slab creation because
implementing lazy allocation involves more expensive synchronization.

The implementation and evaluation of lazy allocation from unused space
is left as future-work. As pointed by Vlastimil Babka [1], it could be
beneficial when a slab cache without SLAB_ACCOUNT can be created, and
some of the allocations from the cache use __GFP_ACCOUNT. For example,
xarray does that.

To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
array only when either of them is enabled.

[ MEMCG=y, MEM_ALLOC_PROFILING=n ]

Before patch (creating ~2.64M directories on ext4):
  Slab:            4747880 kB
  SReclaimable:    4169652 kB
  SUnreclaim:       578228 kB

After patch (creating ~2.64M directories on ext4):
  Slab:            4724020 kB
  SReclaimable:    4169188 kB
  SUnreclaim:       554832 kB (-22.84 MiB)

Enjoy the memory savings!

Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz [1]
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
 mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 151 insertions(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 39c381cc1b2c..3fc3d2ca42e7 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
 	return *(unsigned long *)p;
 }
 
+#ifdef CONFIG_SLAB_OBJ_EXT
+
+/*
+ * Check if memory cgroup or memory allocation profiling is enabled.
+ * If enabled, SLUB tries to reduce memory overhead of accounting
+ * slab objects. If neither is enabled when this function is called,
+ * the optimization is simply skipped to avoid affecting caches that do not
+ * need slabobj_ext metadata.
+ *
+ * However, this may disable optimization when memory cgroup or memory
+ * allocation profiling is used, but slabs are created too early
+ * even before those subsystems are initialized.
+ */
+static inline bool need_slab_obj_exts(struct kmem_cache *s)
+{
+	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
+		return true;
+
+	if (mem_alloc_profiling_enabled())
+		return true;
+
+	return false;
+}
+
+static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
+{
+	return sizeof(struct slabobj_ext) * slab->objects;
+}
+
+static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
+						    struct slab *slab)
+{
+	unsigned long objext_offset;
+
+	objext_offset = s->red_left_pad + s->size * slab->objects;
+	objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
+	return objext_offset;
+}
+
+static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
+						     struct slab *slab)
+{
+	unsigned long objext_offset = obj_exts_offset_in_slab(s, slab);
+	unsigned long objext_size = obj_exts_size_in_slab(slab);
+
+	return objext_offset + objext_size <= slab_size(slab);
+}
+
+static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
+{
+	unsigned long expected;
+	unsigned long obj_exts;
+
+	obj_exts = slab_obj_exts(slab);
+	if (!obj_exts)
+		return false;
+
+	if (!obj_exts_fit_within_slab_leftover(s, slab))
+		return false;
+
+	expected = (unsigned long)slab_address(slab);
+	expected += obj_exts_offset_in_slab(s, slab);
+	return obj_exts == expected;
+}
+#else
+static inline bool need_slab_obj_exts(struct kmem_cache *s)
+{
+	return false;
+}
+
+static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
+{
+	return 0;
+}
+
+static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
+						    struct slab *slab)
+{
+	return 0;
+}
+
+static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
+						     struct slab *slab)
+{
+	return false;
+}
+
+static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_SLUB_DEBUG
 
 /*
@@ -1405,7 +1498,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
 	start = slab_address(slab);
 	length = slab_size(slab);
 	end = start + length;
-	remainder = length % s->size;
+
+	if (obj_exts_in_slab(s, slab)) {
+		remainder = length;
+		remainder -= obj_exts_offset_in_slab(s, slab);
+		remainder -= obj_exts_size_in_slab(slab);
+	} else {
+		remainder = length % s->size;
+	}
+
 	if (!remainder)
 		return;
 
@@ -2179,6 +2280,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
 		return;
 	}
 
+	if (obj_exts_in_slab(slab->slab_cache, slab)) {
+		slab->obj_exts = 0;
+		return;
+	}
+
 	/*
 	 * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
 	 * corresponding extension will be NULL. alloc_tag_sub() will throw a
@@ -2194,6 +2300,35 @@ static inline void free_slab_obj_exts(struct slab *slab)
 	slab->obj_exts = 0;
 }
 
+/*
+ * Try to allocate slabobj_ext array from unused space.
+ * This function must be called on a freshly allocated slab to prevent
+ * concurrency problems.
+ */
+static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
+{
+	void *addr;
+	unsigned long obj_exts;
+
+	if (!need_slab_obj_exts(s))
+		return;
+
+	if (obj_exts_fit_within_slab_leftover(s, slab)) {
+		addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
+		addr = kasan_reset_tag(addr);
+		obj_exts = (unsigned long)addr;
+
+		get_slab_obj_exts(obj_exts);
+		memset(addr, 0, obj_exts_size_in_slab(slab));
+		put_slab_obj_exts(obj_exts);
+
+		if (IS_ENABLED(CONFIG_MEMCG))
+			obj_exts |= MEMCG_DATA_OBJEXTS;
+		slab->obj_exts = obj_exts;
+		slab_set_stride(slab, sizeof(struct slabobj_ext));
+	}
+}
+
 #else /* CONFIG_SLAB_OBJ_EXT */
 
 static inline void init_slab_obj_exts(struct slab *slab)
@@ -2210,6 +2345,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
 {
 }
 
+static inline void alloc_slab_obj_exts_early(struct kmem_cache *s,
+						       struct slab *slab)
+{
+}
+
 #endif /* CONFIG_SLAB_OBJ_EXT */
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
@@ -3206,7 +3346,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
 static __always_inline void account_slab(struct slab *slab, int order,
 					 struct kmem_cache *s, gfp_t gfp)
 {
-	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
+	if (memcg_kmem_online() &&
+			(s->flags & SLAB_ACCOUNT) &&
+			!slab_obj_exts(slab))
 		alloc_slab_obj_exts(slab, s, gfp, true);
 
 	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
@@ -3270,9 +3412,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	slab->objects = oo_objects(oo);
 	slab->inuse = 0;
 	slab->frozen = 0;
-	init_slab_obj_exts(slab);
-
-	account_slab(slab, oo_order(oo), s, flags);
 
 	slab->slab_cache = s;
 
@@ -3281,6 +3420,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	start = slab_address(slab);
 
 	setup_slab_debug(s, slab, start);
+	init_slab_obj_exts(slab);
+	/*
+	 * Poison the slab before initializing the slabobj_ext array
+	 * to prevent the array from being overwritten.
+	 */
+	alloc_slab_obj_exts_early(s, slab);
+	account_slab(slab, oo_order(oo), s, flags);
 
 	shuffle = shuffle_freelist(s, slab);
 
-- 
2.43.0
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Hao Li 1 month, 2 weeks ago
On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> The leftover space in a slab is always smaller than s->size, and
> kmem caches for large objects that are not power-of-two sizes tend to have
> a greater amount of leftover space per slab. In some cases, the leftover
> space is larger than the size of the slabobj_ext array for the slab.
> 
> An excellent example of such a cache is ext4_inode_cache. On my system,
> the object size is 1144, with a preferred order of 3, 28 objects per slab,
> and 736 bytes of leftover space per slab.
> 
> Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> fits within the leftover space.
> 
> Allocate the slabobj_exts array from this unused space instead of using
> kcalloc() when it is large enough. The array is allocated from unused
> space only when creating new slabs, and it doesn't try to utilize unused
> space if alloc_slab_obj_exts() is called after slab creation because
> implementing lazy allocation involves more expensive synchronization.
> 
> The implementation and evaluation of lazy allocation from unused space
> is left as future-work. As pointed by Vlastimil Babka [1], it could be
> beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> some of the allocations from the cache use __GFP_ACCOUNT. For example,
> xarray does that.
> 
> To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> array only when either of them is enabled.
> 
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> 
> Before patch (creating ~2.64M directories on ext4):
>   Slab:            4747880 kB
>   SReclaimable:    4169652 kB
>   SUnreclaim:       578228 kB
> 
> After patch (creating ~2.64M directories on ext4):
>   Slab:            4724020 kB
>   SReclaimable:    4169188 kB
>   SUnreclaim:       554832 kB (-22.84 MiB)
> 
> Enjoy the memory savings!
> 
> Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz [1]
> Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> ---
>  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 151 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/slub.c b/mm/slub.c
> index 39c381cc1b2c..3fc3d2ca42e7 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
>  	return *(unsigned long *)p;
>  }
>  
> +#ifdef CONFIG_SLAB_OBJ_EXT
> +
> +/*
> + * Check if memory cgroup or memory allocation profiling is enabled.
> + * If enabled, SLUB tries to reduce memory overhead of accounting
> + * slab objects. If neither is enabled when this function is called,
> + * the optimization is simply skipped to avoid affecting caches that do not
> + * need slabobj_ext metadata.
> + *
> + * However, this may disable optimization when memory cgroup or memory
> + * allocation profiling is used, but slabs are created too early
> + * even before those subsystems are initialized.
> + */
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> +		return true;
> +
> +	if (mem_alloc_profiling_enabled())
> +		return true;
> +
> +	return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> +	return sizeof(struct slabobj_ext) * slab->objects;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> +						    struct slab *slab)
> +{
> +	unsigned long objext_offset;
> +
> +	objext_offset = s->red_left_pad + s->size * slab->objects;

Hi Harry,

As s->size already includes s->red_left_pad, do we still need
s->red_left_pad here?

> +	objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
> +	return objext_offset;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> +						     struct slab *slab)
> +{
> +	unsigned long objext_offset = obj_exts_offset_in_slab(s, slab);
> +	unsigned long objext_size = obj_exts_size_in_slab(slab);
> +
> +	return objext_offset + objext_size <= slab_size(slab);
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> +	unsigned long expected;
> +	unsigned long obj_exts;
> +
> +	obj_exts = slab_obj_exts(slab);
> +	if (!obj_exts)
> +		return false;
> +
> +	if (!obj_exts_fit_within_slab_leftover(s, slab))
> +		return false;
> +
> +	expected = (unsigned long)slab_address(slab);
> +	expected += obj_exts_offset_in_slab(s, slab);
> +	return obj_exts == expected;
> +}
> +#else
> +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> +{
> +	return false;
> +}
> +
> +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> +{
> +	return 0;
> +}
> +
> +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> +						    struct slab *slab)
> +{
> +	return 0;
> +}
> +
> +static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
> +						     struct slab *slab)
> +{
> +	return false;
> +}
> +
> +static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> +{
> +	return false;
> +}
> +#endif
> +
>  #ifdef CONFIG_SLUB_DEBUG
>  
>  /*
> @@ -1405,7 +1498,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
>  	start = slab_address(slab);
>  	length = slab_size(slab);
>  	end = start + length;
> -	remainder = length % s->size;
> +
> +	if (obj_exts_in_slab(s, slab)) {
> +		remainder = length;
> +		remainder -= obj_exts_offset_in_slab(s, slab);
> +		remainder -= obj_exts_size_in_slab(slab);
> +	} else {
> +		remainder = length % s->size;
> +	}
> +
>  	if (!remainder)
>  		return;
>  
> @@ -2179,6 +2280,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  		return;
>  	}
>  
> +	if (obj_exts_in_slab(slab->slab_cache, slab)) {
> +		slab->obj_exts = 0;
> +		return;
> +	}
> +
>  	/*
>  	 * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
>  	 * corresponding extension will be NULL. alloc_tag_sub() will throw a
> @@ -2194,6 +2300,35 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  	slab->obj_exts = 0;
>  }
>  
> +/*
> + * Try to allocate slabobj_ext array from unused space.
> + * This function must be called on a freshly allocated slab to prevent
> + * concurrency problems.
> + */
> +static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
> +{
> +	void *addr;
> +	unsigned long obj_exts;
> +
> +	if (!need_slab_obj_exts(s))
> +		return;
> +
> +	if (obj_exts_fit_within_slab_leftover(s, slab)) {
> +		addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
> +		addr = kasan_reset_tag(addr);
> +		obj_exts = (unsigned long)addr;
> +
> +		get_slab_obj_exts(obj_exts);
> +		memset(addr, 0, obj_exts_size_in_slab(slab));
> +		put_slab_obj_exts(obj_exts);
> +
> +		if (IS_ENABLED(CONFIG_MEMCG))
> +			obj_exts |= MEMCG_DATA_OBJEXTS;
> +		slab->obj_exts = obj_exts;
> +		slab_set_stride(slab, sizeof(struct slabobj_ext));
> +	}
> +}
> +
>  #else /* CONFIG_SLAB_OBJ_EXT */
>  
>  static inline void init_slab_obj_exts(struct slab *slab)
> @@ -2210,6 +2345,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  {
>  }
>  
> +static inline void alloc_slab_obj_exts_early(struct kmem_cache *s,
> +						       struct slab *slab)
> +{
> +}
> +
>  #endif /* CONFIG_SLAB_OBJ_EXT */
>  
>  #ifdef CONFIG_MEM_ALLOC_PROFILING
> @@ -3206,7 +3346,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
>  static __always_inline void account_slab(struct slab *slab, int order,
>  					 struct kmem_cache *s, gfp_t gfp)
>  {
> -	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> +	if (memcg_kmem_online() &&
> +			(s->flags & SLAB_ACCOUNT) &&
> +			!slab_obj_exts(slab))
>  		alloc_slab_obj_exts(slab, s, gfp, true);
>  
>  	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
> @@ -3270,9 +3412,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
>  	slab->objects = oo_objects(oo);
>  	slab->inuse = 0;
>  	slab->frozen = 0;
> -	init_slab_obj_exts(slab);
> -
> -	account_slab(slab, oo_order(oo), s, flags);
>  
>  	slab->slab_cache = s;
>  
> @@ -3281,6 +3420,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
>  	start = slab_address(slab);
>  
>  	setup_slab_debug(s, slab, start);
> +	init_slab_obj_exts(slab);
> +	/*
> +	 * Poison the slab before initializing the slabobj_ext array
> +	 * to prevent the array from being overwritten.
> +	 */
> +	alloc_slab_obj_exts_early(s, slab);
> +	account_slab(slab, oo_order(oo), s, flags);
>  
>  	shuffle = shuffle_freelist(s, slab);
>  
> -- 
> 2.43.0
>
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Harry Yoo 1 month, 2 weeks ago
On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > The leftover space in a slab is always smaller than s->size, and
> > kmem caches for large objects that are not power-of-two sizes tend to have
> > a greater amount of leftover space per slab. In some cases, the leftover
> > space is larger than the size of the slabobj_ext array for the slab.
> > 
> > An excellent example of such a cache is ext4_inode_cache. On my system,
> > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > and 736 bytes of leftover space per slab.
> > 
> > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > fits within the leftover space.
> > 
> > Allocate the slabobj_exts array from this unused space instead of using
> > kcalloc() when it is large enough. The array is allocated from unused
> > space only when creating new slabs, and it doesn't try to utilize unused
> > space if alloc_slab_obj_exts() is called after slab creation because
> > implementing lazy allocation involves more expensive synchronization.
> > 
> > The implementation and evaluation of lazy allocation from unused space
> > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > xarray does that.
> > 
> > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > array only when either of them is enabled.
> > 
> > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > 
> > Before patch (creating ~2.64M directories on ext4):
> >   Slab:            4747880 kB
> >   SReclaimable:    4169652 kB
> >   SUnreclaim:       578228 kB
> > 
> > After patch (creating ~2.64M directories on ext4):
> >   Slab:            4724020 kB
> >   SReclaimable:    4169188 kB
> >   SUnreclaim:       554832 kB (-22.84 MiB)
> > 
> > Enjoy the memory savings!
> > 
> > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > ---
> >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 151 insertions(+), 5 deletions(-)
> > 
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> >  	return *(unsigned long *)p;
> >  }
> >  
> > +#ifdef CONFIG_SLAB_OBJ_EXT
> > +
> > +/*
> > + * Check if memory cgroup or memory allocation profiling is enabled.
> > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > + * slab objects. If neither is enabled when this function is called,
> > + * the optimization is simply skipped to avoid affecting caches that do not
> > + * need slabobj_ext metadata.
> > + *
> > + * However, this may disable optimization when memory cgroup or memory
> > + * allocation profiling is used, but slabs are created too early
> > + * even before those subsystems are initialized.
> > + */
> > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > +{
> > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > +		return true;
> > +
> > +	if (mem_alloc_profiling_enabled())
> > +		return true;
> > +
> > +	return false;
> > +}
> > +
> > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > +{
> > +	return sizeof(struct slabobj_ext) * slab->objects;
> > +}
> > +
> > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > +						    struct slab *slab)
> > +{
> > +	unsigned long objext_offset;
> > +
> > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> 
> Hi Harry,

Hi Hao, thanks for the review!
Hope you're doing well.

> As s->size already includes s->red_left_pad

Great question. It's true that s->size includes s->red_left_pad,
but we have also a redzone right before the first object:

  [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]

So we have (slab->objects + 1) red zones and so

> do we still need > s->red_left_pad here?

I think this is still needed.

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Hao Li 1 month, 2 weeks ago
On Wed, Dec 24, 2025 at 12:31:19AM +0900, Harry Yoo wrote:
> On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> > On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > > The leftover space in a slab is always smaller than s->size, and
> > > kmem caches for large objects that are not power-of-two sizes tend to have
> > > a greater amount of leftover space per slab. In some cases, the leftover
> > > space is larger than the size of the slabobj_ext array for the slab.
> > > 
> > > An excellent example of such a cache is ext4_inode_cache. On my system,
> > > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > > and 736 bytes of leftover space per slab.
> > > 
> > > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > > fits within the leftover space.
> > > 
> > > Allocate the slabobj_exts array from this unused space instead of using
> > > kcalloc() when it is large enough. The array is allocated from unused
> > > space only when creating new slabs, and it doesn't try to utilize unused
> > > space if alloc_slab_obj_exts() is called after slab creation because
> > > implementing lazy allocation involves more expensive synchronization.
> > > 
> > > The implementation and evaluation of lazy allocation from unused space
> > > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > > xarray does that.
> > > 
> > > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > > array only when either of them is enabled.
> > > 
> > > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > > 
> > > Before patch (creating ~2.64M directories on ext4):
> > >   Slab:            4747880 kB
> > >   SReclaimable:    4169652 kB
> > >   SUnreclaim:       578228 kB
> > > 
> > > After patch (creating ~2.64M directories on ext4):
> > >   Slab:            4724020 kB
> > >   SReclaimable:    4169188 kB
> > >   SUnreclaim:       554832 kB (-22.84 MiB)
> > > 
> > > Enjoy the memory savings!
> > > 
> > > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > > ---
> > >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 151 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/mm/slub.c b/mm/slub.c
> > > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > > --- a/mm/slub.c
> > > +++ b/mm/slub.c
> > > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> > >  	return *(unsigned long *)p;
> > >  }
> > >  
> > > +#ifdef CONFIG_SLAB_OBJ_EXT
> > > +
> > > +/*
> > > + * Check if memory cgroup or memory allocation profiling is enabled.
> > > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > > + * slab objects. If neither is enabled when this function is called,
> > > + * the optimization is simply skipped to avoid affecting caches that do not
> > > + * need slabobj_ext metadata.
> > > + *
> > > + * However, this may disable optimization when memory cgroup or memory
> > > + * allocation profiling is used, but slabs are created too early
> > > + * even before those subsystems are initialized.
> > > + */
> > > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > > +{
> > > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > > +		return true;
> > > +
> > > +	if (mem_alloc_profiling_enabled())
> > > +		return true;
> > > +
> > > +	return false;
> > > +}
> > > +
> > > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > > +{
> > > +	return sizeof(struct slabobj_ext) * slab->objects;
> > > +}
> > > +
> > > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > > +						    struct slab *slab)
> > > +{
> > > +	unsigned long objext_offset;
> > > +
> > > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> > 
> > Hi Harry,
> 
> Hi Hao, thanks for the review!
> Hope you're doing well.

Thanks Harry. Hope you are too!

> 
> > As s->size already includes s->red_left_pad
> 
> Great question. It's true that s->size includes s->red_left_pad,
> but we have also a redzone right before the first object:
> 
>   [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]
> 
> So we have (slab->objects + 1) red zones and so

I have a follow-up question regarding the redzones. Unless I'm missing
some detail, it seems the left redzone should apply to each object as
well. If so, I would expect the memory layout to be:

[left redzone | obj 1 | right redzone], [left redzone | obj 2 | right redzone], [ ... ]

In `calculate_sizes()`, I see:

if ((flags & SLAB_RED_ZONE) && size == s->object_size)
    size += sizeof(void *);
...
...
if (flags & SLAB_RED_ZONE) {
    size += s->red_left_pad;
}

Could you please confirm whether my understanding is correct, or point
out what I'm missing?

> 
> > do we still need > s->red_left_pad here?
> 
> I think this is still needed.
> 
> -- 
> Cheers,
> Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Harry Yoo 1 month, 2 weeks ago
On Wed, Dec 24, 2025 at 12:08:36AM +0800, Hao Li wrote:
> On Wed, Dec 24, 2025 at 12:31:19AM +0900, Harry Yoo wrote:
> > On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> > > On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > > > The leftover space in a slab is always smaller than s->size, and
> > > > kmem caches for large objects that are not power-of-two sizes tend to have
> > > > a greater amount of leftover space per slab. In some cases, the leftover
> > > > space is larger than the size of the slabobj_ext array for the slab.
> > > > 
> > > > An excellent example of such a cache is ext4_inode_cache. On my system,
> > > > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > > > and 736 bytes of leftover space per slab.
> > > > 
> > > > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > > > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > > > fits within the leftover space.
> > > > 
> > > > Allocate the slabobj_exts array from this unused space instead of using
> > > > kcalloc() when it is large enough. The array is allocated from unused
> > > > space only when creating new slabs, and it doesn't try to utilize unused
> > > > space if alloc_slab_obj_exts() is called after slab creation because
> > > > implementing lazy allocation involves more expensive synchronization.
> > > > 
> > > > The implementation and evaluation of lazy allocation from unused space
> > > > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > > > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > > > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > > > xarray does that.
> > > > 
> > > > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > > > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > > > array only when either of them is enabled.
> > > > 
> > > > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > > > 
> > > > Before patch (creating ~2.64M directories on ext4):
> > > >   Slab:            4747880 kB
> > > >   SReclaimable:    4169652 kB
> > > >   SUnreclaim:       578228 kB
> > > > 
> > > > After patch (creating ~2.64M directories on ext4):
> > > >   Slab:            4724020 kB
> > > >   SReclaimable:    4169188 kB
> > > >   SUnreclaim:       554832 kB (-22.84 MiB)
> > > > 
> > > > Enjoy the memory savings!
> > > > 
> > > > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > > > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > > > ---
> > > >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > >  1 file changed, 151 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/mm/slub.c b/mm/slub.c
> > > > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > > > --- a/mm/slub.c
> > > > +++ b/mm/slub.c
> > > > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> > > >  	return *(unsigned long *)p;
> > > >  }
> > > >  
> > > > +#ifdef CONFIG_SLAB_OBJ_EXT
> > > > +
> > > > +/*
> > > > + * Check if memory cgroup or memory allocation profiling is enabled.
> > > > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > > > + * slab objects. If neither is enabled when this function is called,
> > > > + * the optimization is simply skipped to avoid affecting caches that do not
> > > > + * need slabobj_ext metadata.
> > > > + *
> > > > + * However, this may disable optimization when memory cgroup or memory
> > > > + * allocation profiling is used, but slabs are created too early
> > > > + * even before those subsystems are initialized.
> > > > + */
> > > > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > > > +{
> > > > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > > > +		return true;
> > > > +
> > > > +	if (mem_alloc_profiling_enabled())
> > > > +		return true;
> > > > +
> > > > +	return false;
> > > > +}
> > > > +
> > > > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > > > +{
> > > > +	return sizeof(struct slabobj_ext) * slab->objects;
> > > > +}
> > > > +
> > > > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > > > +						    struct slab *slab)
> > > > +{
> > > > +	unsigned long objext_offset;
> > > > +
> > > > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> > > 
> > > Hi Harry,
> > 
> > Hi Hao, thanks for the review!
> > Hope you're doing well.
> 
> Thanks Harry. Hope you are too!
> 
> > 
> > > As s->size already includes s->red_left_pad
> > 
> > Great question. It's true that s->size includes s->red_left_pad,
> > but we have also a redzone right before the first object:
> > 
> >   [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]
> > 
> > So we have (slab->objects + 1) red zones and so
> 
> I have a follow-up question regarding the redzones. Unless I'm missing
> some detail, it seems the left redzone should apply to each object as
> well. If so, I would expect the memory layout to be:
> 
> [left redzone | obj 1 | right redzone], [left redzone | obj 2 | right redzone], [ ... ]
> 
> In `calculate_sizes()`, I see:
> 
> if ((flags & SLAB_RED_ZONE) && size == s->object_size)
>     size += sizeof(void *);

Yes, this is the right redzone,

> ...
> ...
> if (flags & SLAB_RED_ZONE) {
>     size += s->red_left_pad;
> }

This is the left red zone.
Both of them are included in the size...

Oh god, I was confused, thanks for the correction!

> Could you please confirm whether my understanding is correct, or point
> out what I'm missing?

I think your understanding is correct.

Hmm, perhaps we should update the "Object layout:" comment above
check_pad_bytes() to avoid future confusion?

> > > do we still need > s->red_left_pad here?
> > 
> > I think this is still needed.
> > 
> > -- 
> > Cheers,
> > Harry / Hyeonggon

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Hao Li 1 month, 2 weeks ago
On Wed, Dec 24, 2025 at 01:25:01AM +0900, Harry Yoo wrote:
> On Wed, Dec 24, 2025 at 12:08:36AM +0800, Hao Li wrote:
> > On Wed, Dec 24, 2025 at 12:31:19AM +0900, Harry Yoo wrote:
> > > On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> > > > On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > > > > The leftover space in a slab is always smaller than s->size, and
> > > > > kmem caches for large objects that are not power-of-two sizes tend to have
> > > > > a greater amount of leftover space per slab. In some cases, the leftover
> > > > > space is larger than the size of the slabobj_ext array for the slab.
> > > > > 
> > > > > An excellent example of such a cache is ext4_inode_cache. On my system,
> > > > > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > > > > and 736 bytes of leftover space per slab.
> > > > > 
> > > > > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > > > > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > > > > fits within the leftover space.
> > > > > 
> > > > > Allocate the slabobj_exts array from this unused space instead of using
> > > > > kcalloc() when it is large enough. The array is allocated from unused
> > > > > space only when creating new slabs, and it doesn't try to utilize unused
> > > > > space if alloc_slab_obj_exts() is called after slab creation because
> > > > > implementing lazy allocation involves more expensive synchronization.
> > > > > 
> > > > > The implementation and evaluation of lazy allocation from unused space
> > > > > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > > > > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > > > > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > > > > xarray does that.
> > > > > 
> > > > > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > > > > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > > > > array only when either of them is enabled.
> > > > > 
> > > > > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > > > > 
> > > > > Before patch (creating ~2.64M directories on ext4):
> > > > >   Slab:            4747880 kB
> > > > >   SReclaimable:    4169652 kB
> > > > >   SUnreclaim:       578228 kB
> > > > > 
> > > > > After patch (creating ~2.64M directories on ext4):
> > > > >   Slab:            4724020 kB
> > > > >   SReclaimable:    4169188 kB
> > > > >   SUnreclaim:       554832 kB (-22.84 MiB)
> > > > > 
> > > > > Enjoy the memory savings!
> > > > > 
> > > > > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > > > > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > > > > ---
> > > > >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > >  1 file changed, 151 insertions(+), 5 deletions(-)
> > > > > 
> > > > > diff --git a/mm/slub.c b/mm/slub.c
> > > > > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > > > > --- a/mm/slub.c
> > > > > +++ b/mm/slub.c
> > > > > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> > > > >  	return *(unsigned long *)p;
> > > > >  }
> > > > >  
> > > > > +#ifdef CONFIG_SLAB_OBJ_EXT
> > > > > +
> > > > > +/*
> > > > > + * Check if memory cgroup or memory allocation profiling is enabled.
> > > > > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > > > > + * slab objects. If neither is enabled when this function is called,
> > > > > + * the optimization is simply skipped to avoid affecting caches that do not
> > > > > + * need slabobj_ext metadata.
> > > > > + *
> > > > > + * However, this may disable optimization when memory cgroup or memory
> > > > > + * allocation profiling is used, but slabs are created too early
> > > > > + * even before those subsystems are initialized.
> > > > > + */
> > > > > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > > > > +{
> > > > > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > > > > +		return true;
> > > > > +
> > > > > +	if (mem_alloc_profiling_enabled())
> > > > > +		return true;
> > > > > +
> > > > > +	return false;
> > > > > +}
> > > > > +
> > > > > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > > > > +{
> > > > > +	return sizeof(struct slabobj_ext) * slab->objects;
> > > > > +}
> > > > > +
> > > > > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > > > > +						    struct slab *slab)
> > > > > +{
> > > > > +	unsigned long objext_offset;
> > > > > +
> > > > > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> > > > 
> > > > Hi Harry,
> > > 
> > > Hi Hao, thanks for the review!
> > > Hope you're doing well.
> > 
> > Thanks Harry. Hope you are too!
> > 
> > > 
> > > > As s->size already includes s->red_left_pad
> > > 
> > > Great question. It's true that s->size includes s->red_left_pad,
> > > but we have also a redzone right before the first object:
> > > 
> > >   [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]
> > > 
> > > So we have (slab->objects + 1) red zones and so
> > 
> > I have a follow-up question regarding the redzones. Unless I'm missing
> > some detail, it seems the left redzone should apply to each object as
> > well. If so, I would expect the memory layout to be:
> > 
> > [left redzone | obj 1 | right redzone], [left redzone | obj 2 | right redzone], [ ... ]
> > 
> > In `calculate_sizes()`, I see:
> > 
> > if ((flags & SLAB_RED_ZONE) && size == s->object_size)
> >     size += sizeof(void *);
> 
> Yes, this is the right redzone,
> 
> > ...
> > ...
> > if (flags & SLAB_RED_ZONE) {
> >     size += s->red_left_pad;
> > }
> 
> This is the left red zone.
> Both of them are included in the size...
> 
> Oh god, I was confused, thanks for the correction!

Glad it helped!

> 
> > Could you please confirm whether my understanding is correct, or point
> > out what I'm missing?
> 
> I think your understanding is correct.
> 
> Hmm, perhaps we should update the "Object layout:" comment above
> check_pad_bytes() to avoid future confusion?

Yes, exactly. That’s a good idea. Also, I feel the layout description in
the check_pad_bytes() comment isn’t very intuitive and can be a bit hard
to follow. I think it might be clearer if we explicitly list out each
field. What do you think about that?

> 
> > > > do we still need > s->red_left_pad here?
> > > 
> > > I think this is still needed.
> > > 
> > > -- 
> > > Cheers,
> > > Harry / Hyeonggon
> 
> -- 
> Cheers,
> Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Harry Yoo 1 month, 2 weeks ago
On Wed, Dec 24, 2025 at 11:18:56AM +0800, Hao Li wrote:
> On Wed, Dec 24, 2025 at 01:25:01AM +0900, Harry Yoo wrote:
> > On Wed, Dec 24, 2025 at 12:08:36AM +0800, Hao Li wrote:
> > > On Wed, Dec 24, 2025 at 12:31:19AM +0900, Harry Yoo wrote:
> > > > On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> > > > > On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > > > > > The leftover space in a slab is always smaller than s->size, and
> > > > > > kmem caches for large objects that are not power-of-two sizes tend to have
> > > > > > a greater amount of leftover space per slab. In some cases, the leftover
> > > > > > space is larger than the size of the slabobj_ext array for the slab.
> > > > > > 
> > > > > > An excellent example of such a cache is ext4_inode_cache. On my system,
> > > > > > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > > > > > and 736 bytes of leftover space per slab.
> > > > > > 
> > > > > > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > > > > > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > > > > > fits within the leftover space.
> > > > > > 
> > > > > > Allocate the slabobj_exts array from this unused space instead of using
> > > > > > kcalloc() when it is large enough. The array is allocated from unused
> > > > > > space only when creating new slabs, and it doesn't try to utilize unused
> > > > > > space if alloc_slab_obj_exts() is called after slab creation because
> > > > > > implementing lazy allocation involves more expensive synchronization.
> > > > > > 
> > > > > > The implementation and evaluation of lazy allocation from unused space
> > > > > > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > > > > > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > > > > > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > > > > > xarray does that.
> > > > > > 
> > > > > > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > > > > > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > > > > > array only when either of them is enabled.
> > > > > > 
> > > > > > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > > > > > 
> > > > > > Before patch (creating ~2.64M directories on ext4):
> > > > > >   Slab:            4747880 kB
> > > > > >   SReclaimable:    4169652 kB
> > > > > >   SUnreclaim:       578228 kB
> > > > > > 
> > > > > > After patch (creating ~2.64M directories on ext4):
> > > > > >   Slab:            4724020 kB
> > > > > >   SReclaimable:    4169188 kB
> > > > > >   SUnreclaim:       554832 kB (-22.84 MiB)
> > > > > > 
> > > > > > Enjoy the memory savings!
> > > > > > 
> > > > > > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > > > > > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > > > > > ---
> > > > > >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > >  1 file changed, 151 insertions(+), 5 deletions(-)
> > > > > > 
> > > > > > diff --git a/mm/slub.c b/mm/slub.c
> > > > > > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > > > > > --- a/mm/slub.c
> > > > > > +++ b/mm/slub.c
> > > > > > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> > > > > >  	return *(unsigned long *)p;
> > > > > >  }
> > > > > >  
> > > > > > +#ifdef CONFIG_SLAB_OBJ_EXT
> > > > > > +
> > > > > > +/*
> > > > > > + * Check if memory cgroup or memory allocation profiling is enabled.
> > > > > > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > > > > > + * slab objects. If neither is enabled when this function is called,
> > > > > > + * the optimization is simply skipped to avoid affecting caches that do not
> > > > > > + * need slabobj_ext metadata.
> > > > > > + *
> > > > > > + * However, this may disable optimization when memory cgroup or memory
> > > > > > + * allocation profiling is used, but slabs are created too early
> > > > > > + * even before those subsystems are initialized.
> > > > > > + */
> > > > > > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > > > > > +{
> > > > > > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > > > > > +		return true;
> > > > > > +
> > > > > > +	if (mem_alloc_profiling_enabled())
> > > > > > +		return true;
> > > > > > +
> > > > > > +	return false;
> > > > > > +}
> > > > > > +
> > > > > > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > > > > > +{
> > > > > > +	return sizeof(struct slabobj_ext) * slab->objects;
> > > > > > +}
> > > > > > +
> > > > > > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > > > > > +						    struct slab *slab)
> > > > > > +{
> > > > > > +	unsigned long objext_offset;
> > > > > > +
> > > > > > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> > > > > 
> > > > > Hi Harry,
> > > > 
> > > > Hi Hao, thanks for the review!
> > > > Hope you're doing well.
> > > 
> > > Thanks Harry. Hope you are too!
> > > 
> > > > 
> > > > > As s->size already includes s->red_left_pad
> > > > 
> > > > Great question. It's true that s->size includes s->red_left_pad,
> > > > but we have also a redzone right before the first object:
> > > > 
> > > >   [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]
> > > > 
> > > > So we have (slab->objects + 1) red zones and so
> > > 
> > > I have a follow-up question regarding the redzones. Unless I'm missing
> > > some detail, it seems the left redzone should apply to each object as
> > > well. If so, I would expect the memory layout to be:
> > > 
> > > [left redzone | obj 1 | right redzone], [left redzone | obj 2 | right redzone], [ ... ]
> > > 
> > > In `calculate_sizes()`, I see:
> > > 
> > > if ((flags & SLAB_RED_ZONE) && size == s->object_size)
> > >     size += sizeof(void *);
> > 
> > Yes, this is the right redzone,
> > 
> > > ...
> > > ...
> > > if (flags & SLAB_RED_ZONE) {
> > >     size += s->red_left_pad;
> > > }
> > 
> > This is the left red zone.
> > Both of them are included in the size...
> > 
> > Oh god, I was confused, thanks for the correction!
> 
> Glad it helped!
> 
> > > Could you please confirm whether my understanding is correct, or point
> > > out what I'm missing?
> > 
> > I think your understanding is correct.
> > 
> > Hmm, perhaps we should update the "Object layout:" comment above
> > check_pad_bytes() to avoid future confusion?
> 
> Yes, exactly. That’s a good idea.
>
> Also, I feel the layout description in the check_pad_bytes() comment
> isn’t very intuitive and can be a bit hard to follow. I think it might be
> clearer if we explicitly list out each field. What do you think about that?

Yeah it's confusing, but from your description
I'm not sure what the end result would look like.

Could you please do a patch that does it? (and also adding left redzone
to the object layout comment, if you are willing to!)

As long as it makes it more understandable/intuitive,
it'd be nice to have!

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by Hao Li 1 month, 2 weeks ago
On Wed, Dec 24, 2025 at 02:53:26PM +0900, Harry Yoo wrote:
> On Wed, Dec 24, 2025 at 11:18:56AM +0800, Hao Li wrote:
> > On Wed, Dec 24, 2025 at 01:25:01AM +0900, Harry Yoo wrote:
> > > On Wed, Dec 24, 2025 at 12:08:36AM +0800, Hao Li wrote:
> > > > On Wed, Dec 24, 2025 at 12:31:19AM +0900, Harry Yoo wrote:
> > > > > On Tue, Dec 23, 2025 at 11:08:32PM +0800, Hao Li wrote:
> > > > > > On Mon, Dec 22, 2025 at 08:08:42PM +0900, Harry Yoo wrote:
> > > > > > > The leftover space in a slab is always smaller than s->size, and
> > > > > > > kmem caches for large objects that are not power-of-two sizes tend to have
> > > > > > > a greater amount of leftover space per slab. In some cases, the leftover
> > > > > > > space is larger than the size of the slabobj_ext array for the slab.
> > > > > > > 
> > > > > > > An excellent example of such a cache is ext4_inode_cache. On my system,
> > > > > > > the object size is 1144, with a preferred order of 3, 28 objects per slab,
> > > > > > > and 736 bytes of leftover space per slab.
> > > > > > > 
> > > > > > > Since the size of the slabobj_ext array is only 224 bytes (w/o mem
> > > > > > > profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
> > > > > > > fits within the leftover space.
> > > > > > > 
> > > > > > > Allocate the slabobj_exts array from this unused space instead of using
> > > > > > > kcalloc() when it is large enough. The array is allocated from unused
> > > > > > > space only when creating new slabs, and it doesn't try to utilize unused
> > > > > > > space if alloc_slab_obj_exts() is called after slab creation because
> > > > > > > implementing lazy allocation involves more expensive synchronization.
> > > > > > > 
> > > > > > > The implementation and evaluation of lazy allocation from unused space
> > > > > > > is left as future-work. As pointed by Vlastimil Babka [1], it could be
> > > > > > > beneficial when a slab cache without SLAB_ACCOUNT can be created, and
> > > > > > > some of the allocations from the cache use __GFP_ACCOUNT. For example,
> > > > > > > xarray does that.
> > > > > > > 
> > > > > > > To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
> > > > > > > MEM_ALLOC_PROFILING are not used for the cache, allocate the slabobj_ext
> > > > > > > array only when either of them is enabled.
> > > > > > > 
> > > > > > > [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> > > > > > > 
> > > > > > > Before patch (creating ~2.64M directories on ext4):
> > > > > > >   Slab:            4747880 kB
> > > > > > >   SReclaimable:    4169652 kB
> > > > > > >   SUnreclaim:       578228 kB
> > > > > > > 
> > > > > > > After patch (creating ~2.64M directories on ext4):
> > > > > > >   Slab:            4724020 kB
> > > > > > >   SReclaimable:    4169188 kB
> > > > > > >   SUnreclaim:       554832 kB (-22.84 MiB)
> > > > > > > 
> > > > > > > Enjoy the memory savings!
> > > > > > > 
> > > > > > > Link: https://lore.kernel.org/linux-mm/48029aab-20ea-4d90-bfd1-255592b2018e@suse.cz
> > > > > > > Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> > > > > > > ---
> > > > > > >  mm/slub.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> > > > > > >  1 file changed, 151 insertions(+), 5 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/mm/slub.c b/mm/slub.c
> > > > > > > index 39c381cc1b2c..3fc3d2ca42e7 100644
> > > > > > > --- a/mm/slub.c
> > > > > > > +++ b/mm/slub.c
> > > > > > > @@ -886,6 +886,99 @@ static inline unsigned long get_orig_size(struct kmem_cache *s, void *object)
> > > > > > >  	return *(unsigned long *)p;
> > > > > > >  }
> > > > > > >  
> > > > > > > +#ifdef CONFIG_SLAB_OBJ_EXT
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * Check if memory cgroup or memory allocation profiling is enabled.
> > > > > > > + * If enabled, SLUB tries to reduce memory overhead of accounting
> > > > > > > + * slab objects. If neither is enabled when this function is called,
> > > > > > > + * the optimization is simply skipped to avoid affecting caches that do not
> > > > > > > + * need slabobj_ext metadata.
> > > > > > > + *
> > > > > > > + * However, this may disable optimization when memory cgroup or memory
> > > > > > > + * allocation profiling is used, but slabs are created too early
> > > > > > > + * even before those subsystems are initialized.
> > > > > > > + */
> > > > > > > +static inline bool need_slab_obj_exts(struct kmem_cache *s)
> > > > > > > +{
> > > > > > > +	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
> > > > > > > +		return true;
> > > > > > > +
> > > > > > > +	if (mem_alloc_profiling_enabled())
> > > > > > > +		return true;
> > > > > > > +
> > > > > > > +	return false;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
> > > > > > > +{
> > > > > > > +	return sizeof(struct slabobj_ext) * slab->objects;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
> > > > > > > +						    struct slab *slab)
> > > > > > > +{
> > > > > > > +	unsigned long objext_offset;
> > > > > > > +
> > > > > > > +	objext_offset = s->red_left_pad + s->size * slab->objects;
> > > > > > 
> > > > > > Hi Harry,
> > > > > 
> > > > > Hi Hao, thanks for the review!
> > > > > Hope you're doing well.
> > > > 
> > > > Thanks Harry. Hope you are too!
> > > > 
> > > > > 
> > > > > > As s->size already includes s->red_left_pad
> > > > > 
> > > > > Great question. It's true that s->size includes s->red_left_pad,
> > > > > but we have also a redzone right before the first object:
> > > > > 
> > > > >   [ redzone ] [ obj 1 | redzone ] [ obj 2| redzone ] [ ... ]
> > > > > 
> > > > > So we have (slab->objects + 1) red zones and so
> > > > 
> > > > I have a follow-up question regarding the redzones. Unless I'm missing
> > > > some detail, it seems the left redzone should apply to each object as
> > > > well. If so, I would expect the memory layout to be:
> > > > 
> > > > [left redzone | obj 1 | right redzone], [left redzone | obj 2 | right redzone], [ ... ]
> > > > 
> > > > In `calculate_sizes()`, I see:
> > > > 
> > > > if ((flags & SLAB_RED_ZONE) && size == s->object_size)
> > > >     size += sizeof(void *);
> > > 
> > > Yes, this is the right redzone,
> > > 
> > > > ...
> > > > ...
> > > > if (flags & SLAB_RED_ZONE) {
> > > >     size += s->red_left_pad;
> > > > }
> > > 
> > > This is the left red zone.
> > > Both of them are included in the size...
> > > 
> > > Oh god, I was confused, thanks for the correction!
> > 
> > Glad it helped!
> > 
> > > > Could you please confirm whether my understanding is correct, or point
> > > > out what I'm missing?
> > > 
> > > I think your understanding is correct.
> > > 
> > > Hmm, perhaps we should update the "Object layout:" comment above
> > > check_pad_bytes() to avoid future confusion?
> > 
> > Yes, exactly. That’s a good idea.
> >
> > Also, I feel the layout description in the check_pad_bytes() comment
> > isn’t very intuitive and can be a bit hard to follow. I think it might be
> > clearer if we explicitly list out each field. What do you think about that?
> 
> Yeah it's confusing, but from your description
> I'm not sure what the end result would look like.
> 
> Could you please do a patch that does it? (and also adding left redzone
> to the object layout comment, if you are willing to!)

Sure — I'd be happy to!

> 
> As long as it makes it more understandable/intuitive,
> it'd be nice to have!

I'll send a patch for review soon.

-- 
Thanks,
Hao
> 
> -- 
> Cheers,
> Harry / Hyeonggon
[PATCH] slub: clarify object field layout comments
Posted by Hao Li 1 month, 2 weeks ago
The comments above check_pad_bytes() document the field layout of a
single object. Rewrite them to improve clarity and precision.

Also update an outdated comment in calculate_sizes().

Suggested-by: Harry Yoo <harry.yoo@oracle.com>
Signed-off-by: Hao Li <hao.li@linux.dev>
---
Hi Harry, this patch adds more detailed object layout documentation. Let
me know if you have any comments.

 mm/slub.c | 92 ++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index a94c64f56504..138e9d13540d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1211,44 +1211,58 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
 }
 
 /*
- * Object layout:
- *
- * object address
- * 	Bytes of the object to be managed.
- * 	If the freepointer may overlay the object then the free
- *	pointer is at the middle of the object.
- *
- * 	Poisoning uses 0x6b (POISON_FREE) and the last byte is
- * 	0xa5 (POISON_END)
- *
- * object + s->object_size
- * 	Padding to reach word boundary. This is also used for Redzoning.
- * 	Padding is extended by another word if Redzoning is enabled and
- * 	object_size == inuse.
- *
- * 	We fill with 0xbb (SLUB_RED_INACTIVE) for inactive objects and with
- * 	0xcc (SLUB_RED_ACTIVE) for objects in use.
- *
- * object + s->inuse
- * 	Meta data starts here.
- *
- * 	A. Free pointer (if we cannot overwrite object on free)
- * 	B. Tracking data for SLAB_STORE_USER
- *	C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
- *	D. Padding to reach required alignment boundary or at minimum
- * 		one word if debugging is on to be able to detect writes
- * 		before the word boundary.
- *
- *	Padding is done using 0x5a (POISON_INUSE)
- *
- * object + s->size
- * 	Nothing is used beyond s->size.
- *
- * If slabcaches are merged then the object_size and inuse boundaries are mostly
- * ignored. And therefore no slab options that rely on these boundaries
+ * Object field layout:
+ *
+ * [Left redzone padding] (if SLAB_RED_ZONE)
+ *   - Field size: s->red_left_pad
+ *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
+ *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.
+ *
+ * [Object bytes]
+ *   - Field size: s->object_size
+ *   - Object payload bytes.
+ *   - If the freepointer may overlap the object, it is stored inside
+ *     the object (typically near the middle).
+ *   - Poisoning uses 0x6b (POISON_FREE) and the last byte is
+ *     0xa5 (POISON_END) when __OBJECT_POISON is enabled.
+ *
+ * [Word-align padding] (right redzone when SLAB_RED_ZONE is set)
+ *   - Field size: s->inuse - s->object_size
+ *   - If redzoning is enabled and ALIGN(size, sizeof(void *)) adds no
+ *     padding, explicitly extend by one word so the right redzone is
+ *     non-empty.
+ *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
+ *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.
+ *
+ * [Metadata starts at object + s->inuse]
+ *   - A. freelist pointer (if freeptr_outside_object)
+ *   - B. alloc tracking (SLAB_STORE_USER)
+ *   - C. free tracking (SLAB_STORE_USER)
+ *   - D. original request size (SLAB_KMALLOC && SLAB_STORE_USER)
+ *   - E. KASAN metadata (if enabled)
+ *
+ * [Mandatory padding] (if CONFIG_SLUB_DEBUG && SLAB_RED_ZONE)
+ *   - One mandatory debug word to guarantee a minimum poisoned gap
+ *     between metadata and the next object, independent of alignment.
+ *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
+ * [Final alignment padding]
+ *   - Any bytes added by ALIGN(size, s->align) to reach s->size.
+ *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
+ *
+ * Notes:
+ * - Redzones are filled by init_object() with SLUB_RED_ACTIVE/INACTIVE.
+ * - Object contents are poisoned with POISON_FREE/END when __OBJECT_POISON.
+ * - The trailing padding is pre-filled with POISON_INUSE by
+ *   setup_slab_debug() when SLAB_POISON is set, and is validated by
+ *   check_pad_bytes().
+ * - The first object pointer is slab_address(slab) +
+ *   (s->red_left_pad if redzoning); subsequent objects are reached by
+ *   adding s->size each time.
+ *
+ * If slabcaches are merged then the object_size and inuse boundaries are
+ * mostly ignored. Therefore no slab options that rely on these boundaries
  * may be used with merged slabcaches.
  */
-
 static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 {
 	unsigned long off = get_info_end(s);	/* The end of info */
@@ -7103,9 +7117,9 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 
 
 	/*
-	 * If we are Redzoning then check if there is some space between the
-	 * end of the object and the free pointer. If not then add an
-	 * additional word to have some bytes to store Redzone information.
+	 * If we are Redzoning and there is no space between the end of the
+	 * object and the following fields, add one word so the right Redzone
+	 * is non-empty.
 	 */
 	if ((flags & SLAB_RED_ZONE) && size == s->object_size)
 		size += sizeof(void *);
-- 
2.50.1
Re: [PATCH] slub: clarify object field layout comments
Posted by Harry Yoo 1 month, 1 week ago
On Wed, Dec 24, 2025 at 08:51:14PM +0800, Hao Li wrote:
> The comments above check_pad_bytes() document the field layout of a
> single object. Rewrite them to improve clarity and precision.
> 
> Also update an outdated comment in calculate_sizes().
> 
> Suggested-by: Harry Yoo <harry.yoo@oracle.com>
> Signed-off-by: Hao Li <hao.li@linux.dev>
> ---
> Hi Harry, this patch adds more detailed object layout documentation. Let
> me know if you have any comments.

Hi Hao, thanks for improving it!
It looks much clearer now.

few nits below.

> + * Object field layout:
> + *
> + * [Left redzone padding] (if SLAB_RED_ZONE)
> + *   - Field size: s->red_left_pad
> + *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
> + *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.

nit: although it becomes clear after reading the Notes: section,
I would like to make it clear that object address starts here (after
the left redzone) and the left redzone is right before each object.

> + * [Object bytes]
> + *   - Field size: s->object_size
> + *   - Object payload bytes.
> + *   - If the freepointer may overlap the object, it is stored inside
> + *     the object (typically near the middle).
> + *   - Poisoning uses 0x6b (POISON_FREE) and the last byte is
> + *     0xa5 (POISON_END) when __OBJECT_POISON is enabled.
> + *
> + * [Word-align padding] (right redzone when SLAB_RED_ZONE is set)
> + *   - Field size: s->inuse - s->object_size
> + *   - If redzoning is enabled and ALIGN(size, sizeof(void *)) adds no
> + *     padding, explicitly extend by one word so the right redzone is
> + *     non-empty.
> + *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
> + *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.
> + *
> + * [Metadata starts at object + s->inuse]
> + *   - A. freelist pointer (if freeptr_outside_object)
> + *   - B. alloc tracking (SLAB_STORE_USER)
> + *   - C. free tracking (SLAB_STORE_USER)
> + *   - D. original request size (SLAB_KMALLOC && SLAB_STORE_USER)
> + *   - E. KASAN metadata (if enabled)
> + *
> + * [Mandatory padding] (if CONFIG_SLUB_DEBUG && SLAB_RED_ZONE)
> + *   - One mandatory debug word to guarantee a minimum poisoned gap
> + *     between metadata and the next object, independent of alignment.
> + *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
>
> + * [Final alignment padding]
> + *   - Any bytes added by ALIGN(size, s->align) to reach s->size.
> + *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
> + *
> + * Notes:
> + * - Redzones are filled by init_object() with SLUB_RED_ACTIVE/INACTIVE.
> + * - Object contents are poisoned with POISON_FREE/END when __OBJECT_POISON.
> + * - The trailing padding is pre-filled with POISON_INUSE by
> + *   setup_slab_debug() when SLAB_POISON is set, and is validated by
> + *   check_pad_bytes().
> + * - The first object pointer is slab_address(slab) +
> + *   (s->red_left_pad if redzoning); subsequent objects are reached by
> + *   adding s->size each time.
> + *
> + * If slabcaches are merged then the object_size and inuse boundaries are
> + * mostly ignored. Therefore no slab options that rely on these boundaries
>   * may be used with merged slabcaches.

For the last paragraph, perhaps it'll be clearer to say:

  "If a slab cache flag relies on specific metadata to exist at a fixed
   offset, the flag must be included in SLAB_NEVER_MERGE to prevent
   merging. Otherwise, the cache would misbehave as s->object_size and
   s->inuse are adjusted during cache merging"

Otherwise looks great to me, so please feel free to add:
Acked-by: Harry Yoo <harry.yoo@oracle.com>

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH] slub: clarify object field layout comments
Posted by Hao Li 1 month, 1 week ago
On Mon, Dec 29, 2025 at 04:07:54PM +0900, Harry Yoo wrote:
> On Wed, Dec 24, 2025 at 08:51:14PM +0800, Hao Li wrote:
> > The comments above check_pad_bytes() document the field layout of a
> > single object. Rewrite them to improve clarity and precision.
> > 
> > Also update an outdated comment in calculate_sizes().
> > 
> > Suggested-by: Harry Yoo <harry.yoo@oracle.com>
> > Signed-off-by: Hao Li <hao.li@linux.dev>
> > ---
> > Hi Harry, this patch adds more detailed object layout documentation. Let
> > me know if you have any comments.
> 
> Hi Hao, thanks for improving it!
> It looks much clearer now.

Hi Harry,

Thanks for the review and the Acked-by!

> 
> few nits below.
> 
> > + * Object field layout:
> > + *
> > + * [Left redzone padding] (if SLAB_RED_ZONE)
> > + *   - Field size: s->red_left_pad
> > + *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
> > + *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.
> 
> nit: although it becomes clear after reading the Notes: section,
> I would like to make it clear that object address starts here (after
> the left redzone) and the left redzone is right before each object.

Good point. I’ll make this explicit in v2.

> 
> > + * [Object bytes]
> > + *   - Field size: s->object_size
> > + *   - Object payload bytes.
> > + *   - If the freepointer may overlap the object, it is stored inside
> > + *     the object (typically near the middle).
> > + *   - Poisoning uses 0x6b (POISON_FREE) and the last byte is
> > + *     0xa5 (POISON_END) when __OBJECT_POISON is enabled.
> > + *
> > + * [Word-align padding] (right redzone when SLAB_RED_ZONE is set)
> > + *   - Field size: s->inuse - s->object_size
> > + *   - If redzoning is enabled and ALIGN(size, sizeof(void *)) adds no
> > + *     padding, explicitly extend by one word so the right redzone is
> > + *     non-empty.
> > + *   - Filled with 0xbb (SLUB_RED_INACTIVE) for inactive objects and
> > + *     0xcc (SLUB_RED_ACTIVE) for objects in use when SLAB_RED_ZONE.
> > + *
> > + * [Metadata starts at object + s->inuse]
> > + *   - A. freelist pointer (if freeptr_outside_object)
> > + *   - B. alloc tracking (SLAB_STORE_USER)
> > + *   - C. free tracking (SLAB_STORE_USER)
> > + *   - D. original request size (SLAB_KMALLOC && SLAB_STORE_USER)
> > + *   - E. KASAN metadata (if enabled)
> > + *
> > + * [Mandatory padding] (if CONFIG_SLUB_DEBUG && SLAB_RED_ZONE)
> > + *   - One mandatory debug word to guarantee a minimum poisoned gap
> > + *     between metadata and the next object, independent of alignment.
> > + *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
> >
> > + * [Final alignment padding]
> > + *   - Any bytes added by ALIGN(size, s->align) to reach s->size.
> > + *   - Filled with 0x5a (POISON_INUSE) when SLAB_POISON is set.
> > + *
> > + * Notes:
> > + * - Redzones are filled by init_object() with SLUB_RED_ACTIVE/INACTIVE.
> > + * - Object contents are poisoned with POISON_FREE/END when __OBJECT_POISON.
> > + * - The trailing padding is pre-filled with POISON_INUSE by
> > + *   setup_slab_debug() when SLAB_POISON is set, and is validated by
> > + *   check_pad_bytes().
> > + * - The first object pointer is slab_address(slab) +
> > + *   (s->red_left_pad if redzoning); subsequent objects are reached by
> > + *   adding s->size each time.
> > + *
> > + * If slabcaches are merged then the object_size and inuse boundaries are
> > + * mostly ignored. Therefore no slab options that rely on these boundaries
> >   * may be used with merged slabcaches.
> 
> For the last paragraph, perhaps it'll be clearer to say:
> 
>   "If a slab cache flag relies on specific metadata to exist at a fixed
>    offset, the flag must be included in SLAB_NEVER_MERGE to prevent
>    merging. Otherwise, the cache would misbehave as s->object_size and
>    s->inuse are adjusted during cache merging"

Agreed. I’ll reword that paragraph along your suggestion to emphasize
the fixed-offset metadata requirement.

> 
> Otherwise looks great to me, so please feel free to add:
> Acked-by: Harry Yoo <harry.yoo@oracle.com>

I'll include this Acked-by in v2. Thanks!

-- 
Thanks
Hao
> 
> -- 
> Cheers,
> Harry / Hyeonggon
Re: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
Posted by kernel test robot 1 month, 2 weeks ago
Hi Harry,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Harry-Yoo/mm-slab-use-unsigned-long-for-orig_size-to-ensure-proper-metadata-align/20251222-191144
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20251222110843.980347-8-harry.yoo%40oracle.com
patch subject: [PATCH V4 7/8] mm/slab: save memory by allocating slabobj_ext array from leftover
config: x86_64-buildonly-randconfig-001-20251223 (https://download.01.org/0day-ci/archive/20251223/202512231042.EEBUajQY-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251223/202512231042.EEBUajQY-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512231042.EEBUajQY-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/slub.c:2140:33: error: passing 'union codetag_ref' to parameter of incompatible type 'union codetag_ref *'; take the address with &
    2140 |                 if (unlikely(is_codetag_empty(ext->ref))) {
         |                                               ^~~~~~~~
         |                                               &
   include/linux/compiler.h:47:41: note: expanded from macro 'unlikely'
      47 | #  define unlikely(x)   (__branch_check__(x, 0, __builtin_constant_p(x)))
         |                                           ^
   include/linux/compiler.h:32:34: note: expanded from macro '__branch_check__'
      32 |                         ______r = __builtin_expect(!!(x), expect);      \
         |                                                       ^
   include/linux/alloc_tag.h:52:56: note: passing argument to parameter 'ref' here
      52 | static inline bool is_codetag_empty(union codetag_ref *ref)
         |                                                        ^
   mm/slub.c:2140:33: error: passing 'union codetag_ref' to parameter of incompatible type 'union codetag_ref *'; take the address with &
    2140 |                 if (unlikely(is_codetag_empty(ext->ref))) {
         |                                               ^~~~~~~~
         |                                               &
   include/linux/compiler.h:47:68: note: expanded from macro 'unlikely'
      47 | #  define unlikely(x)   (__branch_check__(x, 0, __builtin_constant_p(x)))
         |                                                                      ^
   include/linux/compiler.h:34:19: note: expanded from macro '__branch_check__'
      34 |                                              expect, is_constant);      \
         |                                                      ^~~~~~~~~~~
   include/linux/alloc_tag.h:52:56: note: passing argument to parameter 'ref' here
      52 | static inline bool is_codetag_empty(union codetag_ref *ref)
         |                                                        ^
>> mm/slub.c:2326:16: error: use of undeclared identifier 'MEMCG_DATA_OBJEXTS'
    2326 |                         obj_exts |= MEMCG_DATA_OBJEXTS;
         |                                     ^
   3 errors generated.


vim +/MEMCG_DATA_OBJEXTS +2326 mm/slub.c

  2302	
  2303	/*
  2304	 * Try to allocate slabobj_ext array from unused space.
  2305	 * This function must be called on a freshly allocated slab to prevent
  2306	 * concurrency problems.
  2307	 */
  2308	static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
  2309	{
  2310		void *addr;
  2311		unsigned long obj_exts;
  2312	
  2313		if (!need_slab_obj_exts(s))
  2314			return;
  2315	
  2316		if (obj_exts_fit_within_slab_leftover(s, slab)) {
  2317			addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
  2318			addr = kasan_reset_tag(addr);
  2319			obj_exts = (unsigned long)addr;
  2320	
  2321			get_slab_obj_exts(obj_exts);
  2322			memset(addr, 0, obj_exts_size_in_slab(slab));
  2323			put_slab_obj_exts(obj_exts);
  2324	
  2325			if (IS_ENABLED(CONFIG_MEMCG))
> 2326				obj_exts |= MEMCG_DATA_OBJEXTS;
  2327			slab->obj_exts = obj_exts;
  2328			slab_set_stride(slab, sizeof(struct slabobj_ext));
  2329		}
  2330	}
  2331	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki