[PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size

Harry Yoo posted 8 patches 1 month ago
There is a newer version of this series
[PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
When a cache has high s->align value and s->object_size is not aligned
to it, each object ends up with some unused space because of alignment.
If this wasted space is big enough, we can use it to store the
slabobj_ext metadata instead of wasting it.

On my system, this happens with caches like kmem_cache, mm_struct, pid,
task_struct, sighand_cache, xfs_inode, and others.

To place the slabobj_ext metadata within each object, the existing
slab_obj_ext() logic can still be used by setting:

  - slab->obj_exts = slab_address(slab) + s->red_left_zone +
                     (slabobj_ext offset)
  - stride = s->size

slab_obj_ext() doesn't need know where the metadata is stored,
so this method works without adding extra overhead to slab_obj_ext().

A good example benefiting from this optimization is xfs_inode
(object_size: 992, align: 64). To measure memory savings, 2 millions of
files were created on XFS.

[ MEMCG=y, MEM_ALLOC_PROFILING=n ]

Before patch (creating ~2.64M directories on xfs):
  Slab:            5175976 kB
  SReclaimable:    3837524 kB
  SUnreclaim:      1338452 kB

After patch (creating ~2.64M directories on xfs):
  Slab:            5152912 kB
  SReclaimable:    3838568 kB
  SUnreclaim:      1314344 kB (-23.54 MiB)

Enjoy the memory savings!

Suggested-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
 include/linux/slab.h |  9 ++++++
 mm/slab_common.c     |  6 ++--
 mm/slub.c            | 73 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 4554c04a9bd7..da512d9ab1a0 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -59,6 +59,9 @@ enum _slab_flag_bits {
 	_SLAB_CMPXCHG_DOUBLE,
 #ifdef CONFIG_SLAB_OBJ_EXT
 	_SLAB_NO_OBJ_EXT,
+#endif
+#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
+	_SLAB_OBJ_EXT_IN_OBJ,
 #endif
 	_SLAB_FLAGS_LAST_BIT
 };
@@ -244,6 +247,12 @@ enum _slab_flag_bits {
 #define SLAB_NO_OBJ_EXT		__SLAB_FLAG_UNUSED
 #endif
 
+#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
+#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
+#else
+#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_UNUSED
+#endif
+
 /*
  * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
  *
diff --git a/mm/slab_common.c b/mm/slab_common.c
index c4cf9ed2ec92..f0a6db20d7ea 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -43,11 +43,13 @@ DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
 /*
- * Set of flags that will prevent slab merging
+ * Set of flags that will prevent slab merging.
+ * Any flag that adds per-object metadata should be included,
+ * since slab merging can update s->inuse that affects the metadata layout.
  */
 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
 		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
-		SLAB_FAILSLAB | SLAB_NO_MERGE)
+		SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_OBJ_EXT_IN_OBJ)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
 			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
diff --git a/mm/slub.c b/mm/slub.c
index 50b74324e550..43fdbff9d09b 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -977,6 +977,39 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
 {
 	return false;
 }
+
+#endif
+
+#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
+static bool obj_exts_in_object(struct kmem_cache *s)
+{
+	return s->flags & SLAB_OBJ_EXT_IN_OBJ;
+}
+
+static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
+{
+	unsigned int offset = get_info_end(s);
+
+	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
+		offset += sizeof(struct track) * 2;
+
+	if (slub_debug_orig_size(s))
+		offset += sizeof(unsigned long);
+
+	offset += kasan_metadata_size(s, false);
+
+	return offset;
+}
+#else
+static inline bool obj_exts_in_object(struct kmem_cache *s)
+{
+	return false;
+}
+
+static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_SLUB_DEBUG
@@ -1277,6 +1310,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
 
 	off += kasan_metadata_size(s, false);
 
+	if (obj_exts_in_object(s))
+		off += sizeof(struct slabobj_ext);
+
 	if (off != size_from_object(s))
 		/* Beginning of the filler is the free pointer */
 		print_section(KERN_ERR, "Padding  ", p + off,
@@ -1446,7 +1482,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
  * 	A. Free pointer (if we cannot overwrite object on free)
  * 	B. Tracking data for SLAB_STORE_USER
  *	C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
- *	D. Padding to reach required alignment boundary or at minimum
+ *	D. KASAN alloc metadata (KASAN enabled)
+ *	E. struct slabobj_ext to store accounting metadata
+ *	   (SLAB_OBJ_EXT_IN_OBJ enabled)
+ *	F. Padding to reach required alignment boundary or at minimum
  * 		one word if debugging is on to be able to detect writes
  * 		before the word boundary.
  *
@@ -1474,6 +1513,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
 
 	off += kasan_metadata_size(s, false);
 
+	if (obj_exts_in_object(s))
+		off += sizeof(struct slabobj_ext);
+
 	if (size_from_object(s) == off)
 		return 1;
 
@@ -2280,7 +2322,8 @@ static inline void free_slab_obj_exts(struct slab *slab)
 		return;
 	}
 
-	if (obj_exts_in_slab(slab->slab_cache, slab)) {
+	if (obj_exts_in_slab(slab->slab_cache, slab) ||
+			obj_exts_in_object(slab->slab_cache)) {
 		slab->obj_exts = 0;
 		return;
 	}
@@ -2326,6 +2369,23 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
 			obj_exts |= MEMCG_DATA_OBJEXTS;
 		slab->obj_exts = obj_exts;
 		slab_set_stride(slab, sizeof(struct slabobj_ext));
+	} else if (obj_exts_in_object(s)) {
+		unsigned int offset = obj_exts_offset_in_object(s);
+
+		obj_exts = (unsigned long)slab_address(slab);
+		obj_exts += s->red_left_pad;
+		obj_exts += offset;
+
+		get_slab_obj_exts(obj_exts);
+		for_each_object(addr, s, slab_address(slab), slab->objects)
+			memset(kasan_reset_tag(addr) + offset, 0,
+			       sizeof(struct slabobj_ext));
+		put_slab_obj_exts(obj_exts);
+
+		if (IS_ENABLED(CONFIG_MEMCG))
+			obj_exts |= MEMCG_DATA_OBJEXTS;
+		slab->obj_exts = obj_exts;
+		slab_set_stride(slab, s->size);
 	}
 }
 
@@ -8023,6 +8083,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 {
 	slab_flags_t flags = s->flags;
 	unsigned int size = s->object_size;
+	unsigned int aligned_size;
 	unsigned int order;
 
 	/*
@@ -8132,7 +8193,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
 	 * offset 0. In order to align the objects we have to simply size
 	 * each object to conform to the alignment.
 	 */
-	size = ALIGN(size, s->align);
+	aligned_size = ALIGN(size, s->align);
+#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
+	if (aligned_size - size >= sizeof(struct slabobj_ext))
+		s->flags |= SLAB_OBJ_EXT_IN_OBJ;
+#endif
+	size = aligned_size;
+
 	s->size = size;
 	s->reciprocal_size = reciprocal_value(size);
 	order = calculate_order(size);
-- 
2.43.0
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Hao Li 1 month ago
On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> When a cache has high s->align value and s->object_size is not aligned
> to it, each object ends up with some unused space because of alignment.
> If this wasted space is big enough, we can use it to store the
> slabobj_ext metadata instead of wasting it.

Hi, Harry,

When we save obj_ext in s->size space, it seems that slab_ksize() might
be missing the corresponding handling. It still returns s->size, which
could cause callers of slab_ksize() to see unexpected data (i.e.
obj_ext), or even overwrite the obj_ext data.

-- 
Thanks,
Hao

> 
> On my system, this happens with caches like kmem_cache, mm_struct, pid,
> task_struct, sighand_cache, xfs_inode, and others.
> 
> To place the slabobj_ext metadata within each object, the existing
> slab_obj_ext() logic can still be used by setting:
> 
>   - slab->obj_exts = slab_address(slab) + s->red_left_zone +
>                      (slabobj_ext offset)
>   - stride = s->size
> 
> slab_obj_ext() doesn't need know where the metadata is stored,
> so this method works without adding extra overhead to slab_obj_ext().
> 
> A good example benefiting from this optimization is xfs_inode
> (object_size: 992, align: 64). To measure memory savings, 2 millions of
> files were created on XFS.
> 
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> 
> Before patch (creating ~2.64M directories on xfs):
>   Slab:            5175976 kB
>   SReclaimable:    3837524 kB
>   SUnreclaim:      1338452 kB
> 
> After patch (creating ~2.64M directories on xfs):
>   Slab:            5152912 kB
>   SReclaimable:    3838568 kB
>   SUnreclaim:      1314344 kB (-23.54 MiB)
> 
> Enjoy the memory savings!
> 
> Suggested-by: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> ---
>  include/linux/slab.h |  9 ++++++
>  mm/slab_common.c     |  6 ++--
>  mm/slub.c            | 73 ++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 83 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 4554c04a9bd7..da512d9ab1a0 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -59,6 +59,9 @@ enum _slab_flag_bits {
>  	_SLAB_CMPXCHG_DOUBLE,
>  #ifdef CONFIG_SLAB_OBJ_EXT
>  	_SLAB_NO_OBJ_EXT,
> +#endif
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +	_SLAB_OBJ_EXT_IN_OBJ,
>  #endif
>  	_SLAB_FLAGS_LAST_BIT
>  };
> @@ -244,6 +247,12 @@ enum _slab_flag_bits {
>  #define SLAB_NO_OBJ_EXT		__SLAB_FLAG_UNUSED
>  #endif
>  
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
> +#else
> +#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_UNUSED
> +#endif
> +
>  /*
>   * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
>   *
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index c4cf9ed2ec92..f0a6db20d7ea 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -43,11 +43,13 @@ DEFINE_MUTEX(slab_mutex);
>  struct kmem_cache *kmem_cache;
>  
>  /*
> - * Set of flags that will prevent slab merging
> + * Set of flags that will prevent slab merging.
> + * Any flag that adds per-object metadata should be included,
> + * since slab merging can update s->inuse that affects the metadata layout.
>   */
>  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
>  		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> -		SLAB_FAILSLAB | SLAB_NO_MERGE)
> +		SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_OBJ_EXT_IN_OBJ)
>  
>  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
>  			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> diff --git a/mm/slub.c b/mm/slub.c
> index 50b74324e550..43fdbff9d09b 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -977,6 +977,39 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
>  {
>  	return false;
>  }
> +
> +#endif
> +
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +static bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +	return s->flags & SLAB_OBJ_EXT_IN_OBJ;
> +}
> +
> +static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +	unsigned int offset = get_info_end(s);
> +
> +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
> +		offset += sizeof(struct track) * 2;
> +
> +	if (slub_debug_orig_size(s))
> +		offset += sizeof(unsigned long);
> +
> +	offset += kasan_metadata_size(s, false);
> +
> +	return offset;
> +}
> +#else
> +static inline bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +	return false;
> +}
> +
> +static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +	return 0;
> +}
>  #endif
>  
>  #ifdef CONFIG_SLUB_DEBUG
> @@ -1277,6 +1310,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
>  
>  	off += kasan_metadata_size(s, false);
>  
> +	if (obj_exts_in_object(s))
> +		off += sizeof(struct slabobj_ext);
> +
>  	if (off != size_from_object(s))
>  		/* Beginning of the filler is the free pointer */
>  		print_section(KERN_ERR, "Padding  ", p + off,
> @@ -1446,7 +1482,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>   * 	A. Free pointer (if we cannot overwrite object on free)
>   * 	B. Tracking data for SLAB_STORE_USER
>   *	C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
> - *	D. Padding to reach required alignment boundary or at minimum
> + *	D. KASAN alloc metadata (KASAN enabled)
> + *	E. struct slabobj_ext to store accounting metadata
> + *	   (SLAB_OBJ_EXT_IN_OBJ enabled)
> + *	F. Padding to reach required alignment boundary or at minimum
>   * 		one word if debugging is on to be able to detect writes
>   * 		before the word boundary.
>   *
> @@ -1474,6 +1513,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>  
>  	off += kasan_metadata_size(s, false);
>  
> +	if (obj_exts_in_object(s))
> +		off += sizeof(struct slabobj_ext);
> +
>  	if (size_from_object(s) == off)
>  		return 1;
>  
> @@ -2280,7 +2322,8 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  		return;
>  	}
>  
> -	if (obj_exts_in_slab(slab->slab_cache, slab)) {
> +	if (obj_exts_in_slab(slab->slab_cache, slab) ||
> +			obj_exts_in_object(slab->slab_cache)) {
>  		slab->obj_exts = 0;
>  		return;
>  	}
> @@ -2326,6 +2369,23 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
>  			obj_exts |= MEMCG_DATA_OBJEXTS;
>  		slab->obj_exts = obj_exts;
>  		slab_set_stride(slab, sizeof(struct slabobj_ext));
> +	} else if (obj_exts_in_object(s)) {
> +		unsigned int offset = obj_exts_offset_in_object(s);
> +
> +		obj_exts = (unsigned long)slab_address(slab);
> +		obj_exts += s->red_left_pad;
> +		obj_exts += offset;
> +
> +		get_slab_obj_exts(obj_exts);
> +		for_each_object(addr, s, slab_address(slab), slab->objects)
> +			memset(kasan_reset_tag(addr) + offset, 0,
> +			       sizeof(struct slabobj_ext));
> +		put_slab_obj_exts(obj_exts);
> +
> +		if (IS_ENABLED(CONFIG_MEMCG))
> +			obj_exts |= MEMCG_DATA_OBJEXTS;
> +		slab->obj_exts = obj_exts;
> +		slab_set_stride(slab, s->size);
>  	}
>  }
>  
> @@ -8023,6 +8083,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>  {
>  	slab_flags_t flags = s->flags;
>  	unsigned int size = s->object_size;
> +	unsigned int aligned_size;
>  	unsigned int order;
>  
>  	/*
> @@ -8132,7 +8193,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>  	 * offset 0. In order to align the objects we have to simply size
>  	 * each object to conform to the alignment.
>  	 */
> -	size = ALIGN(size, s->align);
> +	aligned_size = ALIGN(size, s->align);
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +	if (aligned_size - size >= sizeof(struct slabobj_ext))
> +		s->flags |= SLAB_OBJ_EXT_IN_OBJ;
> +#endif
> +	size = aligned_size;
> +
>  	s->size = size;
>  	s->reciprocal_size = reciprocal_value(size);
>  	order = calculate_order(size);
> -- 
> 2.43.0
>
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> > When a cache has high s->align value and s->object_size is not aligned
> > to it, each object ends up with some unused space because of alignment.
> > If this wasted space is big enough, we can use it to store the
> > slabobj_ext metadata instead of wasting it.
> 
> Hi, Harry,

Hi Hao,

> When we save obj_ext in s->size space, it seems that slab_ksize() might
> be missing the corresponding handling.

Oops.

> It still returns s->size, which could cause callers of slab_ksize()
> to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.

Yes indeed.
Great point, thanks!

I'll fix it by checking if the slab has obj_exts within the object
layout and returning s->object_size if so.

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Hao Li 1 month ago
On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> > > When a cache has high s->align value and s->object_size is not aligned
> > > to it, each object ends up with some unused space because of alignment.
> > > If this wasted space is big enough, we can use it to store the
> > > slabobj_ext metadata instead of wasting it.
> > 
> > Hi, Harry,
> 
> Hi Hao,
> 
> > When we save obj_ext in s->size space, it seems that slab_ksize() might
> > be missing the corresponding handling.
> 
> Oops.
> 
> > It still returns s->size, which could cause callers of slab_ksize()
> > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> 
> Yes indeed.
> Great point, thanks!
> 
> I'll fix it by checking if the slab has obj_exts within the object
> layout and returning s->object_size if so.

Makes sense - I think there's one more nuance worth capturing.
slab_ksize() seems to compute the maximum safe size by applying layout
constraints from most-restrictive to least-restrictive:
redzones/poison/KASAN clamp it to object_size, tail metadata
(SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
when nothing metadata lives does it return s->size.

With that ordering in mind, SLAB_OBJ_EXT_IN_OBJ should behave like
another "tail metadata" cap: put the check right before `return s->size`,
and if it's set, return s->inuse instead. Curious what you think.

-- 
Thanks,
Hao

> 
> -- 
> Cheers,
> Harry / Hyeonggon
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
> On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> > > > When a cache has high s->align value and s->object_size is not aligned
> > > > to it, each object ends up with some unused space because of alignment.
> > > > If this wasted space is big enough, we can use it to store the
> > > > slabobj_ext metadata instead of wasting it.
> > > 
> > > Hi, Harry,
> > 
> > Hi Hao,
> > 
> > > When we save obj_ext in s->size space, it seems that slab_ksize() might
> > > be missing the corresponding handling.
> > 
> > Oops.
> > 
> > > It still returns s->size, which could cause callers of slab_ksize()
> > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> > 
> > Yes indeed.
> > Great point, thanks!
> > 
> > I'll fix it by checking if the slab has obj_exts within the object
> > layout and returning s->object_size if so.
> 
> Makes sense - I think there's one more nuance worth capturing.
> slab_ksize() seems to compute the maximum safe size by applying layout
> constraints from most-restrictive to least-restrictive:
> redzones/poison/KASAN clamp it to object_size, tail metadata
> (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
> when nothing metadata lives does it return s->size.

Waaaait, SLAB_TYPESAFE_BY_RCU isn't the only case where we put freelist
pointer after the object.

What about caches with constructor?
We do place it after object, but slab_ksize() may return s->size? 

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Hao Li 1 month ago
On Thu, Jan 08, 2026 at 07:44:47PM +0900, Harry Yoo wrote:
> On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
> > On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> > > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> > > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> > > > > When a cache has high s->align value and s->object_size is not aligned
> > > > > to it, each object ends up with some unused space because of alignment.
> > > > > If this wasted space is big enough, we can use it to store the
> > > > > slabobj_ext metadata instead of wasting it.
> > > > 
> > > > Hi, Harry,
> > > 
> > > Hi Hao,
> > > 
> > > > When we save obj_ext in s->size space, it seems that slab_ksize() might
> > > > be missing the corresponding handling.
> > > 
> > > Oops.
> > > 
> > > > It still returns s->size, which could cause callers of slab_ksize()
> > > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> > > 
> > > Yes indeed.
> > > Great point, thanks!
> > > 
> > > I'll fix it by checking if the slab has obj_exts within the object
> > > layout and returning s->object_size if so.
> > 
> > Makes sense - I think there's one more nuance worth capturing.
> > slab_ksize() seems to compute the maximum safe size by applying layout
> > constraints from most-restrictive to least-restrictive:
> > redzones/poison/KASAN clamp it to object_size, tail metadata
> > (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
> > when nothing metadata lives does it return s->size.
> 
> Waaaait, SLAB_TYPESAFE_BY_RCU isn't the only case where we put freelist
> pointer after the object.
> 
> What about caches with constructor?
> We do place it after object, but slab_ksize() may return s->size? 

That's a really good question - thanks for calling it out. I took
another look at the code, and the comment for ksize() notes that it's
only meant to be used with kmalloc()-family allocations; those objects
don't have a ctor pointer. So as long as callers stick to that contract,
I think we should be fine and don't need to worry too much about this
case.

-- 
Thanks,
Hao
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Vlastimil Babka 1 month ago
On 1/8/26 11:44, Harry Yoo wrote:
> On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
>> On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
>> > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
>> > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
>> > > > When a cache has high s->align value and s->object_size is not aligned
>> > > > to it, each object ends up with some unused space because of alignment.
>> > > > If this wasted space is big enough, we can use it to store the
>> > > > slabobj_ext metadata instead of wasting it.
>> > > 
>> > > Hi, Harry,
>> > 
>> > Hi Hao,
>> > 
>> > > When we save obj_ext in s->size space, it seems that slab_ksize() might
>> > > be missing the corresponding handling.
>> > 
>> > Oops.
>> > 
>> > > It still returns s->size, which could cause callers of slab_ksize()
>> > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
>> > 
>> > Yes indeed.
>> > Great point, thanks!
>> > 
>> > I'll fix it by checking if the slab has obj_exts within the object
>> > layout and returning s->object_size if so.
>> 
>> Makes sense - I think there's one more nuance worth capturing.
>> slab_ksize() seems to compute the maximum safe size by applying layout
>> constraints from most-restrictive to least-restrictive:
>> redzones/poison/KASAN clamp it to object_size, tail metadata
>> (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
>> when nothing metadata lives does it return s->size.
> 
> Waaaait, SLAB_TYPESAFE_BY_RCU isn't the only case where we put freelist
> pointer after the object.
> 
> What about caches with constructor?
> We do place it after object, but slab_ksize() may return s->size? 

I think the freelist pointer is fine because it's not used by allocated objects?
Also ksize() should no longer be used to fill more of the object than that
was requested in the first place.
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
On Thu, Jan 08, 2026 at 11:52:36AM +0100, Vlastimil Babka wrote:
> On 1/8/26 11:44, Harry Yoo wrote:
> > On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
> >> On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> >> > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> >> > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> >> > > > When a cache has high s->align value and s->object_size is not aligned
> >> > > > to it, each object ends up with some unused space because of alignment.
> >> > > > If this wasted space is big enough, we can use it to store the
> >> > > > slabobj_ext metadata instead of wasting it.
> >> > > 
> >> > > Hi, Harry,
> >> > 
> >> > Hi Hao,
> >> > 
> >> > > When we save obj_ext in s->size space, it seems that slab_ksize() might
> >> > > be missing the corresponding handling.
> >> > 
> >> > Oops.
> >> > 
> >> > > It still returns s->size, which could cause callers of slab_ksize()
> >> > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> >> > 
> >> > Yes indeed.
> >> > Great point, thanks!
> >> > 
> >> > I'll fix it by checking if the slab has obj_exts within the object
> >> > layout and returning s->object_size if so.
> >> 
> >> Makes sense - I think there's one more nuance worth capturing.
> >> slab_ksize() seems to compute the maximum safe size by applying layout
> >> constraints from most-restrictive to least-restrictive:
> >> redzones/poison/KASAN clamp it to object_size, tail metadata
> >> (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
> >> when nothing metadata lives does it return s->size.
> > 
> > Waaaait, SLAB_TYPESAFE_BY_RCU isn't the only case where we put freelist
> > pointer after the object.
> > 
> > What about caches with constructor?
> > We do place it after object, but slab_ksize() may return s->size? 
> 
> I think the freelist pointer is fine because it's not used by allocated objects?

Ah, right.

Nevermind. I was just confused while reading the code/comment.

> Also ksize() should no longer be used to fill more of the object than that
> was requested in the first place.

since v6.1, yeah.

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Hao Li 1 month ago
On Thu, Jan 08, 2026 at 11:52:36AM +0100, Vlastimil Babka wrote:
> On 1/8/26 11:44, Harry Yoo wrote:
> > On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
> >> On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> >> > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> >> > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> >> > > > When a cache has high s->align value and s->object_size is not aligned
> >> > > > to it, each object ends up with some unused space because of alignment.
> >> > > > If this wasted space is big enough, we can use it to store the
> >> > > > slabobj_ext metadata instead of wasting it.
> >> > > 
> >> > > Hi, Harry,
> >> > 
> >> > Hi Hao,
> >> > 
> >> > > When we save obj_ext in s->size space, it seems that slab_ksize() might
> >> > > be missing the corresponding handling.
> >> > 
> >> > Oops.
> >> > 
> >> > > It still returns s->size, which could cause callers of slab_ksize()
> >> > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> >> > 
> >> > Yes indeed.
> >> > Great point, thanks!
> >> > 
> >> > I'll fix it by checking if the slab has obj_exts within the object
> >> > layout and returning s->object_size if so.
> >> 
> >> Makes sense - I think there's one more nuance worth capturing.
> >> slab_ksize() seems to compute the maximum safe size by applying layout
> >> constraints from most-restrictive to least-restrictive:
> >> redzones/poison/KASAN clamp it to object_size, tail metadata
> >> (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
> >> when nothing metadata lives does it return s->size.
> > 
> > Waaaait, SLAB_TYPESAFE_BY_RCU isn't the only case where we put freelist
> > pointer after the object.
> > 
> > What about caches with constructor?
> > We do place it after object, but slab_ksize() may return s->size? 
> 
> I think the freelist pointer is fine because it's not used by allocated objects?
> Also ksize() should no longer be used to fill more of the object than that
> was requested in the first place.

Yes - being conservative here seems safest. Exposing extra bytes that
callers don't expect could easily break assumptions and lead to subtle
bugs.

-- 
Thanks,
Hao

> 
> 
>
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
On Thu, Jan 08, 2026 at 05:52:27PM +0800, Hao Li wrote:
> On Thu, Jan 08, 2026 at 05:41:00PM +0900, Harry Yoo wrote:
> > On Thu, Jan 08, 2026 at 01:52:09PM +0800, Hao Li wrote:
> > > On Mon, Jan 05, 2026 at 05:02:30PM +0900, Harry Yoo wrote:
> > > > When a cache has high s->align value and s->object_size is not aligned
> > > > to it, each object ends up with some unused space because of alignment.
> > > > If this wasted space is big enough, we can use it to store the
> > > > slabobj_ext metadata instead of wasting it.
> > > 
> > > Hi, Harry,
> > 
> > Hi Hao,
> > 
> > > When we save obj_ext in s->size space, it seems that slab_ksize() might
> > > be missing the corresponding handling.
> > 
> > Oops.
> > 
> > > It still returns s->size, which could cause callers of slab_ksize()
> > > to see unexpected data (i.e. obj_ext), or even overwrite the obj_ext data.
> > 
> > Yes indeed.
> > Great point, thanks!
> > 
> > I'll fix it by checking if the slab has obj_exts within the object
> > layout and returning s->object_size if so.
> 
> Makes sense - I think there's one more nuance worth capturing.
> slab_ksize() seems to compute the maximum safe size by applying layout
> constraints from most-restrictive to least-restrictive:
> redzones/poison/KASAN clamp it to object_size, tail metadata
> (SLAB_TYPESAFE_BY_RCU / SLAB_STORE_USER) clamps it to inuse, and only
> when nothing metadata lives does it return s->size.

Hmm, you're right.
s->object_size is more restrictive than it should be.

> With that ordering in mind, SLAB_OBJ_EXT_IN_OBJ should behave like
> another "tail metadata" cap: put the check right before `return s->size`,
> and if it's set, return s->inuse instead. Curious what you think.

Good point, and that will work. Will do.

Thanks!

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Vlastimil Babka 1 month ago
On 1/5/26 09:02, Harry Yoo wrote:
> When a cache has high s->align value and s->object_size is not aligned
> to it, each object ends up with some unused space because of alignment.
> If this wasted space is big enough, we can use it to store the
> slabobj_ext metadata instead of wasting it.
> 
> On my system, this happens with caches like kmem_cache, mm_struct, pid,
> task_struct, sighand_cache, xfs_inode, and others.
> 
> To place the slabobj_ext metadata within each object, the existing
> slab_obj_ext() logic can still be used by setting:
> 
>   - slab->obj_exts = slab_address(slab) + s->red_left_zone +
>                      (slabobj_ext offset)
>   - stride = s->size
> 
> slab_obj_ext() doesn't need know where the metadata is stored,
> so this method works without adding extra overhead to slab_obj_ext().
> 
> A good example benefiting from this optimization is xfs_inode
> (object_size: 992, align: 64). To measure memory savings, 2 millions of
> files were created on XFS.
> 
> [ MEMCG=y, MEM_ALLOC_PROFILING=n ]
> 
> Before patch (creating ~2.64M directories on xfs):
>   Slab:            5175976 kB
>   SReclaimable:    3837524 kB
>   SUnreclaim:      1338452 kB
> 
> After patch (creating ~2.64M directories on xfs):
>   Slab:            5152912 kB
>   SReclaimable:    3838568 kB
>   SUnreclaim:      1314344 kB (-23.54 MiB)
> 
> Enjoy the memory savings!
> 
> Suggested-by: Vlastimil Babka <vbabka@suse.cz>
> Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
> ---
>  include/linux/slab.h |  9 ++++++
>  mm/slab_common.c     |  6 ++--
>  mm/slub.c            | 73 ++++++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 83 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index 4554c04a9bd7..da512d9ab1a0 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -59,6 +59,9 @@ enum _slab_flag_bits {
>  	_SLAB_CMPXCHG_DOUBLE,
>  #ifdef CONFIG_SLAB_OBJ_EXT
>  	_SLAB_NO_OBJ_EXT,
> +#endif
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +	_SLAB_OBJ_EXT_IN_OBJ,
>  #endif
>  	_SLAB_FLAGS_LAST_BIT
>  };
> @@ -244,6 +247,12 @@ enum _slab_flag_bits {
>  #define SLAB_NO_OBJ_EXT		__SLAB_FLAG_UNUSED
>  #endif
>  
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_BIT(_SLAB_OBJ_EXT_IN_OBJ)
> +#else
> +#define SLAB_OBJ_EXT_IN_OBJ	__SLAB_FLAG_UNUSED
> +#endif
> +
>  /*
>   * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests.
>   *
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index c4cf9ed2ec92..f0a6db20d7ea 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -43,11 +43,13 @@ DEFINE_MUTEX(slab_mutex);
>  struct kmem_cache *kmem_cache;
>  
>  /*
> - * Set of flags that will prevent slab merging
> + * Set of flags that will prevent slab merging.
> + * Any flag that adds per-object metadata should be included,
> + * since slab merging can update s->inuse that affects the metadata layout.
>   */
>  #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
>  		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
> -		SLAB_FAILSLAB | SLAB_NO_MERGE)
> +		SLAB_FAILSLAB | SLAB_NO_MERGE | SLAB_OBJ_EXT_IN_OBJ)
>  
>  #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
>  			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
> diff --git a/mm/slub.c b/mm/slub.c
> index 50b74324e550..43fdbff9d09b 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -977,6 +977,39 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
>  {
>  	return false;
>  }
> +
> +#endif
> +
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +static bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +	return s->flags & SLAB_OBJ_EXT_IN_OBJ;

So this is a property of the cache.

> +}
> +
> +static unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +	unsigned int offset = get_info_end(s);
> +
> +	if (kmem_cache_debug_flags(s, SLAB_STORE_USER))
> +		offset += sizeof(struct track) * 2;
> +
> +	if (slub_debug_orig_size(s))
> +		offset += sizeof(unsigned long);
> +
> +	offset += kasan_metadata_size(s, false);
> +
> +	return offset;
> +}
> +#else
> +static inline bool obj_exts_in_object(struct kmem_cache *s)
> +{
> +	return false;
> +}
> +
> +static inline unsigned int obj_exts_offset_in_object(struct kmem_cache *s)
> +{
> +	return 0;
> +}
>  #endif
>  
>  #ifdef CONFIG_SLUB_DEBUG
> @@ -1277,6 +1310,9 @@ static void print_trailer(struct kmem_cache *s, struct slab *slab, u8 *p)
>  
>  	off += kasan_metadata_size(s, false);
>  
> +	if (obj_exts_in_object(s))
> +		off += sizeof(struct slabobj_ext);
> +
>  	if (off != size_from_object(s))
>  		/* Beginning of the filler is the free pointer */
>  		print_section(KERN_ERR, "Padding  ", p + off,
> @@ -1446,7 +1482,10 @@ check_bytes_and_report(struct kmem_cache *s, struct slab *slab,
>   * 	A. Free pointer (if we cannot overwrite object on free)
>   * 	B. Tracking data for SLAB_STORE_USER
>   *	C. Original request size for kmalloc object (SLAB_STORE_USER enabled)
> - *	D. Padding to reach required alignment boundary or at minimum
> + *	D. KASAN alloc metadata (KASAN enabled)
> + *	E. struct slabobj_ext to store accounting metadata
> + *	   (SLAB_OBJ_EXT_IN_OBJ enabled)
> + *	F. Padding to reach required alignment boundary or at minimum
>   * 		one word if debugging is on to be able to detect writes
>   * 		before the word boundary.
>   *
> @@ -1474,6 +1513,9 @@ static int check_pad_bytes(struct kmem_cache *s, struct slab *slab, u8 *p)
>  
>  	off += kasan_metadata_size(s, false);
>  
> +	if (obj_exts_in_object(s))
> +		off += sizeof(struct slabobj_ext);
> +
>  	if (size_from_object(s) == off)
>  		return 1;
>  
> @@ -2280,7 +2322,8 @@ static inline void free_slab_obj_exts(struct slab *slab)
>  		return;
>  	}
>  
> -	if (obj_exts_in_slab(slab->slab_cache, slab)) {
> +	if (obj_exts_in_slab(slab->slab_cache, slab) ||
> +			obj_exts_in_object(slab->slab_cache)) {

Here we check that property to determine if we can return early and not do
kfree().

>  		slab->obj_exts = 0;
>  		return;
>  	}
> @@ -2326,6 +2369,23 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
>  			obj_exts |= MEMCG_DATA_OBJEXTS;
>  		slab->obj_exts = obj_exts;
>  		slab_set_stride(slab, sizeof(struct slabobj_ext));
> +	} else if (obj_exts_in_object(s)) {
> +		unsigned int offset = obj_exts_offset_in_object(s);

But we reach this only when need_slab_obj_exts() is true above. So there
might be slabs from caches where obj_exts_in_object() is true, but still
have obj_exts allocated by kmalloc, and we leak them in
free_slab_obj_exts(). (and we perform some incorrect action wherever else
obj_exts_in_object() is checked) AFAIU?

So I think we need to check obj_exts_in_slab() (in the simplified way I
suggested for patch 7/8) first, and only look at obj_exts_in_object()
afterwards to distinguish the exact layout where needed? (i.e.
free_slab_obj_exts() is fine to just check obj_exts_in_slab()).

> +		obj_exts = (unsigned long)slab_address(slab);
> +		obj_exts += s->red_left_pad;
> +		obj_exts += offset;
> +
> +		get_slab_obj_exts(obj_exts);
> +		for_each_object(addr, s, slab_address(slab), slab->objects)
> +			memset(kasan_reset_tag(addr) + offset, 0,
> +			       sizeof(struct slabobj_ext));
> +		put_slab_obj_exts(obj_exts);
> +
> +		if (IS_ENABLED(CONFIG_MEMCG))
> +			obj_exts |= MEMCG_DATA_OBJEXTS;
> +		slab->obj_exts = obj_exts;
> +		slab_set_stride(slab, s->size);
>  	}
>  }
>  
> @@ -8023,6 +8083,7 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>  {
>  	slab_flags_t flags = s->flags;
>  	unsigned int size = s->object_size;
> +	unsigned int aligned_size;
>  	unsigned int order;
>  
>  	/*
> @@ -8132,7 +8193,13 @@ static int calculate_sizes(struct kmem_cache_args *args, struct kmem_cache *s)
>  	 * offset 0. In order to align the objects we have to simply size
>  	 * each object to conform to the alignment.
>  	 */
> -	size = ALIGN(size, s->align);
> +	aligned_size = ALIGN(size, s->align);
> +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> +	if (aligned_size - size >= sizeof(struct slabobj_ext))
> +		s->flags |= SLAB_OBJ_EXT_IN_OBJ;
> +#endif
> +	size = aligned_size;
> +
>  	s->size = size;
>  	s->reciprocal_size = reciprocal_value(size);
>  	order = calculate_order(size);
Re: [PATCH V5 8/8] mm/slab: place slabobj_ext metadata in unused space within s->size
Posted by Harry Yoo 1 month ago
On Wed, Jan 07, 2026 at 06:33:52PM +0100, Vlastimil Babka wrote:
> On 1/5/26 09:02, Harry Yoo wrote:
> > diff --git a/mm/slub.c b/mm/slub.c
> > index 50b74324e550..43fdbff9d09b 100644
> > --- a/mm/slub.c
> > +++ b/mm/slub.c
> > @@ -977,6 +977,39 @@ static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
> >  {
> >  	return false;
> >  }
> > +
> > +#endif
> > +
> > +#if defined(CONFIG_SLAB_OBJ_EXT) && defined(CONFIG_64BIT)
> > +static bool obj_exts_in_object(struct kmem_cache *s)
> > +{
> > +	return s->flags & SLAB_OBJ_EXT_IN_OBJ;
> 
> So this is a property of the cache.

Right.

> > @@ -2280,7 +2322,8 @@ static inline void free_slab_obj_exts(struct slab *slab)
> >  		return;
> >  	}
> >  
> > -	if (obj_exts_in_slab(slab->slab_cache, slab)) {
> > +	if (obj_exts_in_slab(slab->slab_cache, slab) ||
> > +			obj_exts_in_object(slab->slab_cache)) {
> 
> Here we check that property to determine if we can return early and not do
> kfree().

Right.

> > @@ -2326,6 +2369,23 @@ static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
> >  			obj_exts |= MEMCG_DATA_OBJEXTS;
> >  		slab->obj_exts = obj_exts;
> >  		slab_set_stride(slab, sizeof(struct slabobj_ext));
> > +	} else if (obj_exts_in_object(s)) {
> > +		unsigned int offset = obj_exts_offset_in_object(s);
> 
> But we reach this only when need_slab_obj_exts() is true above. So there
> might be slabs from caches where obj_exts_in_object() is true, but still
> have obj_exts allocated by kmalloc, and we leak them in
> free_slab_obj_exts().

Oh god, right!

> (and we perform some incorrect action wherever else
> obj_exts_in_object() is checked) AFAIU?

Yes.

It must check if slabs actually have allocated obj_exts from wasted space...

> So I think we need to check obj_exts_in_slab() (in the simplified way I
> suggested for patch 7/8) first, and only look at obj_exts_in_object()
> afterwards to distinguish the exact layout where needed?
> (i.e. free_slab_obj_exts() is fine to just check obj_exts_in_slab()).

That'll work, will do.

-- 
Cheers,
Harry / Hyeonggon