[PATCH 05/11] mm/zsmalloc: Store obj_cgroup pointer in zspage

Joshua Hahn posted 11 patches 3 weeks, 5 days ago
[PATCH 05/11] mm/zsmalloc: Store obj_cgroup pointer in zspage
Posted by Joshua Hahn 3 weeks, 5 days ago
With each zspage now having an array of obj_cgroup pointers, plumb the
obj_cgroup pointer from the zswap / zram layer down to zsmalloc.

zram still sees no visible change from its end. For the zswap path,
store the obj_cgroup pointer after compression when writing the object,
and erase the pointer when the object gets freed.

The lifetime and charging of the obj_cgroup is still handled in the
zswap layer.

Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
 drivers/block/zram/zram_drv.c |  7 ++++---
 include/linux/zsmalloc.h      |  3 ++-
 mm/zsmalloc.c                 | 25 ++++++++++++++++++++++++-
 mm/zswap.c                    |  6 +++---
 4 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d1eae5c20df7..e68e408992e7 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -2232,7 +2232,7 @@ static int write_incompressible_page(struct zram *zram, struct page *page,
 	}
 
 	src = kmap_local_page(page);
-	zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE);
+	zs_obj_write(zram->mem_pool, handle, src, PAGE_SIZE, NULL);
 	kunmap_local(src);
 
 	slot_lock(zram, index);
@@ -2297,7 +2297,7 @@ static int zram_write_page(struct zram *zram, struct page *page, u32 index)
 		return -ENOMEM;
 	}
 
-	zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len);
+	zs_obj_write(zram->mem_pool, handle, zstrm->buffer, comp_len, NULL);
 	zcomp_stream_put(zstrm);
 
 	slot_lock(zram, index);
@@ -2521,7 +2521,8 @@ static int recompress_slot(struct zram *zram, u32 index, struct page *page,
 		return PTR_ERR((void *)handle_new);
 	}
 
-	zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer, comp_len_new);
+	zs_obj_write(zram->mem_pool, handle_new, zstrm->buffer,
+		     comp_len_new, NULL);
 	zcomp_stream_put(zstrm);
 
 	slot_free(zram, index);
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 24fb2e0fdf67..645957a156c4 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -23,6 +23,7 @@ struct zs_pool_stats {
 
 struct zs_pool;
 struct scatterlist;
+struct obj_cgroup;
 enum memcg_stat_item;
 
 struct zs_pool *zs_create_pool(const char *name, bool memcg_aware,
@@ -51,7 +52,7 @@ void zs_obj_read_sg_begin(struct zs_pool *pool, unsigned long handle,
 			  struct scatterlist *sg, size_t mem_len);
 void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle);
 void zs_obj_write(struct zs_pool *pool, unsigned long handle,
-		  void *handle_mem, size_t mem_len);
+		  void *handle_mem, size_t mem_len, struct obj_cgroup *objcg);
 
 extern const struct movable_operations zsmalloc_mops;
 
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index dcf99516227c..d4735451c273 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1195,7 +1195,7 @@ void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle)
 EXPORT_SYMBOL_GPL(zs_obj_read_sg_end);
 
 void zs_obj_write(struct zs_pool *pool, unsigned long handle,
-		  void *handle_mem, size_t mem_len)
+		  void *handle_mem, size_t mem_len, struct obj_cgroup *objcg)
 {
 	struct zspage *zspage;
 	struct zpdesc *zpdesc;
@@ -1216,6 +1216,11 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
 	class = zspage_class(pool, zspage);
 	off = offset_in_page(class->size * obj_idx);
 
+	if (objcg) {
+		WARN_ON_ONCE(!pool->memcg_aware);
+		zspage->objcgs[obj_idx] = objcg;
+	}
+
 	if (!ZsHugePage(zspage))
 		off += ZS_HANDLE_SIZE;
 
@@ -1388,6 +1393,9 @@ static void obj_free(int class_size, unsigned long obj)
 	f_offset = offset_in_page(class_size * f_objidx);
 	zspage = get_zspage(f_zpdesc);
 
+	if (zspage->pool->memcg_aware)
+		zspage->objcgs[f_objidx] = NULL;
+
 	vaddr = kmap_local_zpdesc(f_zpdesc);
 	link = (struct link_free *)(vaddr + f_offset);
 
@@ -1538,6 +1546,16 @@ static unsigned long find_alloced_obj(struct size_class *class,
 	return handle;
 }
 
+static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
+			     unsigned long used_obj, unsigned long free_obj)
+{
+	unsigned int s_idx = used_obj & OBJ_INDEX_MASK;
+	unsigned int d_idx = free_obj & OBJ_INDEX_MASK;
+
+	d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
+	s_zspage->objcgs[s_idx] = NULL;
+}
+
 static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage,
 			   struct zspage *dst_zspage)
 {
@@ -1560,6 +1578,11 @@ static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage,
 		used_obj = handle_to_obj(handle);
 		free_obj = obj_malloc(pool, dst_zspage, handle);
 		zs_obj_copy(class, free_obj, used_obj);
+
+		if (pool->memcg_aware)
+			zs_migrate_objcg(src_zspage, dst_zspage,
+					 used_obj, free_obj);
+
 		obj_idx++;
 		obj_free(class->size, used_obj);
 
diff --git a/mm/zswap.c b/mm/zswap.c
index ff9abaa8aa38..68b87c3cc326 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -852,7 +852,7 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
 }
 
 static bool zswap_compress(struct page *page, struct zswap_entry *entry,
-			   struct zswap_pool *pool)
+			   struct zswap_pool *pool, struct obj_cgroup *objcg)
 {
 	struct crypto_acomp_ctx *acomp_ctx;
 	struct scatterlist input, output;
@@ -912,7 +912,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
 		goto unlock;
 	}
 
-	zs_obj_write(pool->zs_pool, handle, dst, dlen);
+	zs_obj_write(pool->zs_pool, handle, dst, dlen, objcg);
 	entry->handle = handle;
 	entry->length = dlen;
 
@@ -1414,7 +1414,7 @@ static bool zswap_store_page(struct page *page,
 		return false;
 	}
 
-	if (!zswap_compress(page, entry, pool))
+	if (!zswap_compress(page, entry, pool, objcg))
 		goto compress_failed;
 
 	old = xa_store(swap_zswap_tree(page_swpentry),
-- 
2.52.0
Re: [PATCH 05/11] mm/zsmalloc: Store obj_cgroup pointer in zspage
Posted by Yosry Ahmed 3 weeks, 5 days ago
[..]
> @@ -1216,6 +1216,11 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
>         class = zspage_class(pool, zspage);
>         off = offset_in_page(class->size * obj_idx);
>
> +       if (objcg) {
> +               WARN_ON_ONCE(!pool->memcg_aware);
> +               zspage->objcgs[obj_idx] = objcg;
> +       }

If pool->memcg_aware is not set the warning will fire, but the
following line will write to uninitialized memory and probably crash.
We should avoid the write if the warning fires.

Maybe:

if (objcg && !WARN_ON_ONCE(!pool->memcg_aware))
       zspage->objcgs[obj_idx] = objcg;

Not pretty, but the same pattern is followed in many places in the kernel.

> +
>         if (!ZsHugePage(zspage))
>                 off += ZS_HANDLE_SIZE;
>
Re: [PATCH 05/11] mm/zsmalloc: Store obj_cgroup pointer in zspage
Posted by Joshua Hahn 3 weeks, 5 days ago
On Wed, 11 Mar 2026 13:17:26 -0700 Yosry Ahmed <yosry@kernel.org> wrote:

> [..]
> > @@ -1216,6 +1216,11 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
> >         class = zspage_class(pool, zspage);
> >         off = offset_in_page(class->size * obj_idx);
> >
> > +       if (objcg) {
> > +               WARN_ON_ONCE(!pool->memcg_aware);
> > +               zspage->objcgs[obj_idx] = objcg;
> > +       }

Hello Yosry,

I hope you are doing well. Thank you for reviewing this series! : -)

> If pool->memcg_aware is not set the warning will fire, but the
> following line will write to uninitialized memory and probably crash.
> We should avoid the write if the warning fires.
> 
> Maybe:
> 
> if (objcg && !WARN_ON_ONCE(!pool->memcg_aware))
>        zspage->objcgs[obj_idx] = objcg;

Ack. 

> Not pretty, but the same pattern is followed in many places in the kernel.
> 
> > +
> >         if (!ZsHugePage(zspage))
> >                 off += ZS_HANDLE_SIZE;
> >

Definitely better than writing garbage and crashing : -)
I'll make this change in the next version, I think I should also sprinkle
these WARN_ON_ONCEs in a few other places as well. I'll be more
mindful of this for those cases as well.

Thank you again Yosry, I hope you have a great day!
Joshua