Now that zswap_entries do not directly track obj_cgroups of the entries,
handle the lifetime management and charging of these entries into the
zsmalloc layer.
One functional change is that zswap entries are now no longer accounted
by the size of the compressed object, but by the size of the size_class
slot they occupy.
This brings charging one step closer to an accurate representation of
the memory consumed in the zpdesc; even if a compressed object doesn't
consume the entirety of a obj slot, we should account the entirety of
the compressed object slot that the object makes unusable.
While at it, also remove an unnecessary newline in obj_free.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
include/linux/memcontrol.h | 10 ------
mm/memcontrol.c | 54 ++-----------------------------
mm/zsmalloc.c | 65 ++++++++++++++++++++++++++++++++++++--
mm/zswap.c | 8 -----
4 files changed, 66 insertions(+), 71 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0652db4ff2d5..701d9ab6fef1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1851,22 +1851,12 @@ static inline bool memcg_is_dying(struct mem_cgroup *memcg)
#if defined(CONFIG_MEMCG) && defined(CONFIG_ZSWAP)
bool obj_cgroup_may_zswap(struct obj_cgroup *objcg);
-void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size);
-void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size);
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg);
#else
static inline bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
{
return true;
}
-static inline void obj_cgroup_charge_zswap(struct obj_cgroup *objcg,
- size_t size)
-{
-}
-static inline void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg,
- size_t size)
-{
-}
static inline bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a52da3a5e4fd..68139be66a4f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -716,6 +716,7 @@ void mod_memcg_state(struct mem_cgroup *memcg, enum memcg_stat_item idx,
put_cpu();
}
+EXPORT_SYMBOL(mod_memcg_state);
#ifdef CONFIG_MEMCG_V1
/* idx can be of type enum memcg_stat_item or node_stat_item. */
@@ -3169,11 +3170,13 @@ int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
{
return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0);
}
+EXPORT_SYMBOL(obj_cgroup_charge);
void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
{
refill_obj_stock(objcg, size, true, 0, NULL, 0);
}
+EXPORT_SYMBOL(obj_cgroup_uncharge);
static inline size_t obj_full_size(struct kmem_cache *s)
{
@@ -5488,57 +5491,6 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
return ret;
}
-/**
- * obj_cgroup_charge_zswap - charge compression backend memory
- * @objcg: the object cgroup
- * @size: size of compressed object
- *
- * This forces the charge after obj_cgroup_may_zswap() allowed
- * compression and storage in zswap for this cgroup to go ahead.
- */
-void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
-{
- struct mem_cgroup *memcg;
-
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- return;
-
- VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
-
- /* PF_MEMALLOC context, charging must succeed */
- if (obj_cgroup_charge(objcg, GFP_KERNEL, size))
- VM_WARN_ON_ONCE(1);
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
- mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
- rcu_read_unlock();
-}
-
-/**
- * obj_cgroup_uncharge_zswap - uncharge compression backend memory
- * @objcg: the object cgroup
- * @size: size of compressed object
- *
- * Uncharges zswap memory on page in.
- */
-void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
-{
- struct mem_cgroup *memcg;
-
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
- return;
-
- obj_cgroup_uncharge(objcg, size);
-
- rcu_read_lock();
- memcg = obj_cgroup_memcg(objcg);
- mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
- mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
- rcu_read_unlock();
-}
-
bool mem_cgroup_zswap_writeback_enabled(struct mem_cgroup *memcg)
{
/* if zswap is disabled, do not block pages going to the swapping device */
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index a94ca8c26ad9..291194572a09 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1028,6 +1028,59 @@ static bool zspage_empty(struct zspage *zspage)
return get_zspage_inuse(zspage) == 0;
}
+#ifdef CONFIG_MEMCG
+static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return;
+
+ VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
+ WARN_ON_ONCE(!pool->memcg_aware);
+
+ /* PF_MEMALLOC context, charging must succeed */
+ if (obj_cgroup_charge(objcg, GFP_KERNEL, size))
+ VM_WARN_ON_ONCE(1);
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ mod_memcg_state(memcg, pool->compressed_stat, size);
+ mod_memcg_state(memcg, pool->uncompressed_stat, 1);
+ rcu_read_unlock();
+}
+
+static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+ struct mem_cgroup *memcg;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ return;
+
+ WARN_ON_ONCE(!pool->memcg_aware);
+
+ obj_cgroup_uncharge(objcg, size);
+
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ mod_memcg_state(memcg, pool->compressed_stat, -size);
+ mod_memcg_state(memcg, pool->uncompressed_stat, -1);
+ rcu_read_unlock();
+}
+#else
+static void zs_charge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+}
+
+static void zs_uncharge_objcg(struct zs_pool *pool, struct obj_cgroup *objcg,
+ int size)
+{
+}
+#endif
+
/**
* zs_lookup_class_index() - Returns index of the zsmalloc &size_class
* that hold objects of the provided size.
@@ -1244,6 +1297,8 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
if (objcg) {
WARN_ON_ONCE(!pool->memcg_aware);
zspage->objcgs[obj_idx] = objcg;
+ obj_cgroup_get(objcg);
+ zs_charge_objcg(pool, objcg, class->size);
}
if (!ZsHugePage(zspage))
@@ -1409,17 +1464,23 @@ static void obj_free(int class_size, unsigned long obj)
struct link_free *link;
struct zspage *zspage;
struct zpdesc *f_zpdesc;
+ struct zs_pool *pool;
unsigned long f_offset;
unsigned int f_objidx;
void *vaddr;
-
obj_to_location(obj, &f_zpdesc, &f_objidx);
f_offset = offset_in_page(class_size * f_objidx);
zspage = get_zspage(f_zpdesc);
+ pool = zspage->pool;
+
+ if (pool->memcg_aware && zspage->objcgs[f_objidx]) {
+ struct obj_cgroup *objcg = zspage->objcgs[f_objidx];
- if (zspage->pool->memcg_aware)
+ zs_uncharge_objcg(pool, objcg, class_size);
+ obj_cgroup_put(objcg);
zspage->objcgs[f_objidx] = NULL;
+ }
vaddr = kmap_local_zpdesc(f_zpdesc);
link = (struct link_free *)(vaddr + f_offset);
diff --git a/mm/zswap.c b/mm/zswap.c
index 436066965413..bca29a6e18f3 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -711,10 +711,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
zswap_lru_del(&zswap_list_lru, entry, objcg);
zs_free(entry->pool->zs_pool, entry->handle);
zswap_pool_put(entry->pool);
- if (objcg) {
- obj_cgroup_uncharge_zswap(objcg, entry->length);
- obj_cgroup_put(objcg);
- }
if (entry->length == PAGE_SIZE)
atomic_long_dec(&zswap_stored_incompressible_pages);
zswap_entry_cache_free(entry);
@@ -1437,10 +1433,6 @@ static bool zswap_store_page(struct page *page,
* when the entry is removed from the tree.
*/
zswap_pool_get(pool);
- if (objcg) {
- obj_cgroup_get(objcg);
- obj_cgroup_charge_zswap(objcg, entry->length);
- }
atomic_long_inc(&zswap_stored_pages);
if (entry->length == PAGE_SIZE)
atomic_long_inc(&zswap_stored_incompressible_pages);
--
2.52.0
On Wed, Mar 11, 2026 at 12:51:44PM -0700, Joshua Hahn wrote:
> @@ -1244,6 +1297,8 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
> if (objcg) {
> WARN_ON_ONCE(!pool->memcg_aware);
> zspage->objcgs[obj_idx] = objcg;
> + obj_cgroup_get(objcg);
> + zs_charge_objcg(pool, objcg, class->size);
> }
>
> if (!ZsHugePage(zspage))
Note that the obj_cgroup_get() reference is for the pointer, not the
charge. I think it all comes out right in the end, but it's a bit
confusing to follow and verify through the series.
IOW, it's better move that obj_cgroup_get() when you add and store
zspage->objcgs[]. If zswap stil has a reference at that point in the
series, then it's fine for there to be two separate obj_cgroup_get()
as well, with later patches deleting the zswap one when its
entry->objcg pointer disappears.
> @@ -711,10 +711,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
> zswap_lru_del(&zswap_list_lru, entry, objcg);
> zs_free(entry->pool->zs_pool, entry->handle);
> zswap_pool_put(entry->pool);
> - if (objcg) {
> - obj_cgroup_uncharge_zswap(objcg, entry->length);
> - obj_cgroup_put(objcg);
> - }
> if (entry->length == PAGE_SIZE)
> atomic_long_dec(&zswap_stored_incompressible_pages);
> zswap_entry_cache_free(entry);
[ I can see that this was misleading. It was really getting a
reference for the entry->objcg = objcg a few lines down, hitching a
ride on that existing `if (objcg)`. ]
On Thu, 12 Mar 2026 17:42:01 -0400 Johannes Weiner <hannes@cmpxchg.org> wrote:
> On Wed, Mar 11, 2026 at 12:51:44PM -0700, Joshua Hahn wrote:
> > @@ -1244,6 +1297,8 @@ void zs_obj_write(struct zs_pool *pool, unsigned long handle,
> > if (objcg) {
> > WARN_ON_ONCE(!pool->memcg_aware);
> > zspage->objcgs[obj_idx] = objcg;
> > + obj_cgroup_get(objcg);
> > + zs_charge_objcg(pool, objcg, class->size);
> > }
> >
> > if (!ZsHugePage(zspage))
Hello Johannes, thank you for your review!
> Note that the obj_cgroup_get() reference is for the pointer, not the
> charge. I think it all comes out right in the end, but it's a bit
> confusing to follow and verify through the series.
Thank you for pointing that out. I'll try to make it more explicit via
the placement.
> IOW, it's better move that obj_cgroup_get() when you add and store
> zspage->objcgs[]. If zswap stil has a reference at that point in the
> series, then it's fine for there to be two separate obj_cgroup_get()
> as well, with later patches deleting the zswap one when its
> entry->objcg pointer disappears.
Sounds good with me. Maybe for the code block above I just move it one
line up so that it happens before the zspage->objcgs set and
make it more obvious that it's associated with setting the objcg
pointer and not with the charge?
And for the freeing section, putting after we set the pointer to
NULL could be more obvious?
> > @@ -711,10 +711,6 @@ static void zswap_entry_free(struct zswap_entry *entry)
> > zswap_lru_del(&zswap_list_lru, entry, objcg);
> > zs_free(entry->pool->zs_pool, entry->handle);
> > zswap_pool_put(entry->pool);
> > - if (objcg) {
> > - obj_cgroup_uncharge_zswap(objcg, entry->length);
> > - obj_cgroup_put(objcg);
> > - }
> > if (entry->length == PAGE_SIZE)
> > atomic_long_dec(&zswap_stored_incompressible_pages);
> > zswap_entry_cache_free(entry);
>
> [ I can see that this was misleading. It was really getting a
> reference for the entry->objcg = objcg a few lines down, hitching a
> ride on that existing `if (objcg)`. ]
Thank you for the clarification! I hope you have a great day : -)
Joshua
On Fri, Mar 13, 2026 at 08:34:33AM -0700, Joshua Hahn wrote: > > IOW, it's better move that obj_cgroup_get() when you add and store > > zspage->objcgs[]. If zswap stil has a reference at that point in the > > series, then it's fine for there to be two separate obj_cgroup_get() > > as well, with later patches deleting the zswap one when its > > entry->objcg pointer disappears. > > Sounds good with me. Maybe for the code block above I just move it one > line up so that it happens before the zspage->objcgs set and > make it more obvious that it's associated with setting the objcg > pointer and not with the charge? > > And for the freeing section, putting after we set the pointer to > NULL could be more obvious? That makes sense to me! Thanks
© 2016 - 2026 Red Hat, Inc.