In zsmalloc, there are two types of migrations: Migrations of single
compressed objects from one zspage to another, and substitutions of
zpdescs from zspages.
In both of these migrations, memcg association for the compressed
objects do not change. However, the physical location of the compressed
objects may change, which alters their lruvec association.
In this patch, handle the single compressed object migration and
transfer lruvec and node statistics across the affected lruvecs / nodes.
Zsmalloc compressed objects, like slab objects, can span two pages.
When a spanning object is migrated, possibly to another zspage where
it spans two zpdescs, up to 4 nodes can be touched.
Instead of enumerating all possible combinations of node migrations,
simply uncharge entirely from the source (1 or 2 nodes) and charge
entirely to the destination (1 or 2 nodes).
s_off d_off
v v
----------+ +---- -----+ +---------
... ooo ooo xx| |x oo ... --> ... ooo x| |xx ooo oo ...
----------+ +---- -----+ +---------
pg1 pg2 pg3 pg4
s_zspage d_zspage
To do this, calculate how much of the compressed object lives on each
page and perform up to 4 uncharge-charges.
Note that these operations cannot call the existing
zs_{charge, uncharge}_objcg functions we introduced, since we are
holding the class spin lock and obj_cgroup_charge can sleep.
Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
mm/zsmalloc.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 70 insertions(+), 4 deletions(-)
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index ab085961b0e2..f3508ff8b3ab 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1684,15 +1684,81 @@ static unsigned long find_alloced_obj(struct size_class *class,
return handle;
}
+#ifdef CONFIG_MEMCG
static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
- unsigned long used_obj, unsigned long free_obj)
+ unsigned long used_obj, unsigned long free_obj,
+ struct zs_pool *pool, int size)
{
- unsigned int s_idx = used_obj & OBJ_INDEX_MASK;
- unsigned int d_idx = free_obj & OBJ_INDEX_MASK;
+ struct zpdesc *s_zpdesc, *d_zpdesc;
+ struct obj_cgroup *objcg;
+ struct mem_cgroup *memcg;
+ struct lruvec *l;
+ unsigned int s_idx, d_idx;
+ unsigned int s_off, d_off;
+ int charges[4], nids[4], partial;
+ int s_bytes_in_page, d_bytes_in_page;
+ int i;
+
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+ goto out;
+
+ obj_to_location(used_obj, &s_zpdesc, &s_idx);
+ obj_to_location(free_obj, &d_zpdesc, &d_idx);
+
+ objcg = s_zspage->objcgs[s_idx];
+ if (!objcg)
+ goto out;
+
+ /*
+ * The object migration here can touch up to 4 nodes.
+ * Instead of breaking down all possible combinations of node changes,
+ * just uncharge entirely from the source and charge entirely to the
+ * destination, even if there is are node overlaps between src and dst.
+ */
+ s_off = (s_idx * size) % PAGE_SIZE;
+ d_off = (d_idx * size) % PAGE_SIZE;
+ s_bytes_in_page = min_t(int, size, PAGE_SIZE - s_off);
+ d_bytes_in_page = min_t(int, size, PAGE_SIZE - d_off);
+
+ charges[0] = -s_bytes_in_page;
+ nids[0] = page_to_nid(zpdesc_page(s_zpdesc));
+ charges[1] = -(size - s_bytes_in_page); /* 0 if object doesn't span */
+ if (charges[1])
+ nids[1] = page_to_nid(zpdesc_page(get_next_zpdesc(s_zpdesc)));
+
+ charges[2] = d_bytes_in_page;
+ nids[2] = page_to_nid(zpdesc_page(d_zpdesc));
+ charges[3] = size - d_bytes_in_page; /* 0 if object doesn't span */
+ if (charges[3])
+ nids[3] = page_to_nid(zpdesc_page(get_next_zpdesc(d_zpdesc)));
+ rcu_read_lock();
+ memcg = obj_cgroup_memcg(objcg);
+ for (i = 0; i < 4; i++) {
+ if (!charges[i])
+ continue;
+
+ l = mem_cgroup_lruvec(memcg, NODE_DATA(nids[i]));
+ partial = (PAGE_SIZE * charges[i]) / size;
+ mod_memcg_lruvec_state(l, pool->compressed_stat, charges[i]);
+ mod_memcg_lruvec_state(l, pool->uncompressed_stat, partial);
+ }
+ rcu_read_unlock();
+
+ dec_node_page_state(zpdesc_page(s_zpdesc), pool->uncompressed_stat);
+ inc_node_page_state(zpdesc_page(d_zpdesc), pool->uncompressed_stat);
+
+out:
d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
s_zspage->objcgs[s_idx] = NULL;
}
+#else
+static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
+ unsigned long used_obj, unsigned long free_obj,
+ struct zs_pool *pool, int size)
+{
+}
+#endif
static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage,
struct zspage *dst_zspage)
@@ -1719,7 +1785,7 @@ static void migrate_zspage(struct zs_pool *pool, struct zspage *src_zspage,
if (pool->memcg_aware)
zs_migrate_objcg(src_zspage, dst_zspage,
- used_obj, free_obj);
+ used_obj, free_obj, pool, class->size);
obj_idx++;
obj_free(class->size, used_obj);
--
2.52.0
Hi Joshua,
kernel test robot noticed the following build warnings:
[auto build test WARNING on axboe/for-next]
[also build test WARNING on linus/master v7.0-rc3]
[cannot apply to akpm-mm/mm-everything next-20260311]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Joshua-Hahn/mm-zsmalloc-Rename-zs_object_copy-to-zs_obj_copy/20260312-035531
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git for-next
patch link: https://lore.kernel.org/r/20260311195153.4013476-11-joshua.hahnjy%40gmail.com
patch subject: [PATCH 10/11] mm/zsmalloc: Handle single object charge migration in migrate_zspage
config: x86_64-randconfig-001-20260312 (https://download.01.org/0day-ci/archive/20260312/202603121158.g93vlc2U-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260312/202603121158.g93vlc2U-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603121158.g93vlc2U-lkp@intel.com/
All warnings (new ones prefixed by >>):
>> mm/zsmalloc.c:1702:6: warning: variable 's_idx' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mm/zsmalloc.c:1752:45: note: uninitialized use occurs here
1752 | d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
| ^~~~~
mm/zsmalloc.c:1702:2: note: remove the 'if' if its condition is always false
1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1703 | goto out;
| ~~~~~~~~
mm/zsmalloc.c:1696:20: note: initialize the variable 's_idx' to silence this warning
1696 | unsigned int s_idx, d_idx;
| ^
| = 0
>> mm/zsmalloc.c:1702:6: warning: variable 'd_idx' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
mm/zsmalloc.c:1752:19: note: uninitialized use occurs here
1752 | d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
| ^~~~~
mm/zsmalloc.c:1702:2: note: remove the 'if' if its condition is always false
1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1703 | goto out;
| ~~~~~~~~
mm/zsmalloc.c:1696:27: note: initialize the variable 'd_idx' to silence this warning
1696 | unsigned int s_idx, d_idx;
| ^
| = 0
2 warnings generated.
vim +1702 mm/zsmalloc.c
1686
1687 #ifdef CONFIG_MEMCG
1688 static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
1689 unsigned long used_obj, unsigned long free_obj,
1690 struct zs_pool *pool, int size)
1691 {
1692 struct zpdesc *s_zpdesc, *d_zpdesc;
1693 struct obj_cgroup *objcg;
1694 struct mem_cgroup *memcg;
1695 struct lruvec *l;
1696 unsigned int s_idx, d_idx;
1697 unsigned int s_off, d_off;
1698 int charges[4], nids[4], partial;
1699 int s_bytes_in_page, d_bytes_in_page;
1700 int i;
1701
> 1702 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
1703 goto out;
1704
1705 obj_to_location(used_obj, &s_zpdesc, &s_idx);
1706 obj_to_location(free_obj, &d_zpdesc, &d_idx);
1707
1708 objcg = s_zspage->objcgs[s_idx];
1709 if (!objcg)
1710 goto out;
1711
1712 /*
1713 * The object migration here can touch up to 4 nodes.
1714 * Instead of breaking down all possible combinations of node changes,
1715 * just uncharge entirely from the source and charge entirely to the
1716 * destination, even if there is are node overlaps between src and dst.
1717 */
1718 s_off = (s_idx * size) % PAGE_SIZE;
1719 d_off = (d_idx * size) % PAGE_SIZE;
1720 s_bytes_in_page = min_t(int, size, PAGE_SIZE - s_off);
1721 d_bytes_in_page = min_t(int, size, PAGE_SIZE - d_off);
1722
1723 charges[0] = -s_bytes_in_page;
1724 nids[0] = page_to_nid(zpdesc_page(s_zpdesc));
1725 charges[1] = -(size - s_bytes_in_page); /* 0 if object doesn't span */
1726 if (charges[1])
1727 nids[1] = page_to_nid(zpdesc_page(get_next_zpdesc(s_zpdesc)));
1728
1729 charges[2] = d_bytes_in_page;
1730 nids[2] = page_to_nid(zpdesc_page(d_zpdesc));
1731 charges[3] = size - d_bytes_in_page; /* 0 if object doesn't span */
1732 if (charges[3])
1733 nids[3] = page_to_nid(zpdesc_page(get_next_zpdesc(d_zpdesc)));
1734
1735 rcu_read_lock();
1736 memcg = obj_cgroup_memcg(objcg);
1737 for (i = 0; i < 4; i++) {
1738 if (!charges[i])
1739 continue;
1740
1741 l = mem_cgroup_lruvec(memcg, NODE_DATA(nids[i]));
1742 partial = (PAGE_SIZE * charges[i]) / size;
1743 mod_memcg_lruvec_state(l, pool->compressed_stat, charges[i]);
1744 mod_memcg_lruvec_state(l, pool->uncompressed_stat, partial);
1745 }
1746 rcu_read_unlock();
1747
1748 dec_node_page_state(zpdesc_page(s_zpdesc), pool->uncompressed_stat);
1749 inc_node_page_state(zpdesc_page(d_zpdesc), pool->uncompressed_stat);
1750
1751 out:
1752 d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
1753 s_zspage->objcgs[s_idx] = NULL;
1754 }
1755 #else
1756 static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
1757 unsigned long used_obj, unsigned long free_obj,
1758 struct zs_pool *pool, int size)
1759 {
1760 }
1761 #endif
1762
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Thu, 12 Mar 2026 11:51:59 +0800 kernel test robot <lkp@intel.com> wrote: > Hi Joshua, > > kernel test robot noticed the following build warnings: > > [auto build test WARNING on axboe/for-next] > [also build test WARNING on linus/master v7.0-rc3] > [cannot apply to akpm-mm/mm-everything next-20260311] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Joshua-Hahn/mm-zsmalloc-Rename-zs_object_copy-to-zs_obj_copy/20260312-035531 > base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git for-next > patch link: https://lore.kernel.org/r/20260311195153.4013476-11-joshua.hahnjy%40gmail.com > patch subject: [PATCH 10/11] mm/zsmalloc: Handle single object charge migration in migrate_zspage > config: x86_64-randconfig-001-20260312 (https://download.01.org/0day-ci/archive/20260312/202603121158.g93vlc2U-lkp@intel.com/config) > compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261) > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260312/202603121158.g93vlc2U-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202603121158.g93vlc2U-lkp@intel.com/ > > All warnings (new ones prefixed by >>): > > >> mm/zsmalloc.c:1702:6: warning: variable 's_idx' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] > 1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > mm/zsmalloc.c:1752:45: note: uninitialized use occurs here > 1752 | d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx]; > | ^~~~~ > mm/zsmalloc.c:1702:2: note: remove the 'if' if its condition is always false > 1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 1703 | goto out; > | ~~~~~~~~ > mm/zsmalloc.c:1696:20: note: initialize the variable 's_idx' to silence this warning > 1696 | unsigned int s_idx, d_idx; > | ^ > | = 0 > >> mm/zsmalloc.c:1702:6: warning: variable 'd_idx' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized] > 1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > mm/zsmalloc.c:1752:19: note: uninitialized use occurs here > 1752 | d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx]; > | ^~~~~ > mm/zsmalloc.c:1702:2: note: remove the 'if' if its condition is always false > 1702 | if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) > | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > 1703 | goto out; > | ~~~~~~~~ > mm/zsmalloc.c:1696:27: note: initialize the variable 'd_idx' to silence this warning > 1696 | unsigned int s_idx, d_idx; > | ^ > | = 0 > 2 warnings generated. Hello kernel test robot, Thank you for catching this issue! Yes, the MEMCG v1 check should be done after I use obj_to_location to initialize the indices, so that the objcg pointer swap works at the end. Will make the change in the next version! Joshua
Hi Joshua,
kernel test robot noticed the following build warnings:
[auto build test WARNING on axboe/for-next]
[also build test WARNING on linus/master v7.0-rc3]
[cannot apply to akpm-mm/mm-everything next-20260311]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Joshua-Hahn/mm-zsmalloc-Rename-zs_object_copy-to-zs_obj_copy/20260312-035531
base: https://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git for-next
patch link: https://lore.kernel.org/r/20260311195153.4013476-11-joshua.hahnjy%40gmail.com
patch subject: [PATCH 10/11] mm/zsmalloc: Handle single object charge migration in migrate_zspage
config: arc-randconfig-001-20260312 (https://download.01.org/0day-ci/archive/20260312/202603121115.dm3Z6KvA-lkp@intel.com/config)
compiler: arc-linux-gcc (GCC) 8.5.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260312/202603121115.dm3Z6KvA-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603121115.dm3Z6KvA-lkp@intel.com/
All warnings (new ones prefixed by >>):
mm/zsmalloc.c: In function 'zs_compact.part.28':
>> mm/zsmalloc.c:1696:15: warning: 's_idx' is used uninitialized in this function [-Wuninitialized]
unsigned int s_idx, d_idx;
^~~~~
vim +/s_idx +1696 mm/zsmalloc.c
1686
1687 #ifdef CONFIG_MEMCG
1688 static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
1689 unsigned long used_obj, unsigned long free_obj,
1690 struct zs_pool *pool, int size)
1691 {
1692 struct zpdesc *s_zpdesc, *d_zpdesc;
1693 struct obj_cgroup *objcg;
1694 struct mem_cgroup *memcg;
1695 struct lruvec *l;
> 1696 unsigned int s_idx, d_idx;
1697 unsigned int s_off, d_off;
1698 int charges[4], nids[4], partial;
1699 int s_bytes_in_page, d_bytes_in_page;
1700 int i;
1701
1702 if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
1703 goto out;
1704
1705 obj_to_location(used_obj, &s_zpdesc, &s_idx);
1706 obj_to_location(free_obj, &d_zpdesc, &d_idx);
1707
1708 objcg = s_zspage->objcgs[s_idx];
1709 if (!objcg)
1710 goto out;
1711
1712 /*
1713 * The object migration here can touch up to 4 nodes.
1714 * Instead of breaking down all possible combinations of node changes,
1715 * just uncharge entirely from the source and charge entirely to the
1716 * destination, even if there is are node overlaps between src and dst.
1717 */
1718 s_off = (s_idx * size) % PAGE_SIZE;
1719 d_off = (d_idx * size) % PAGE_SIZE;
1720 s_bytes_in_page = min_t(int, size, PAGE_SIZE - s_off);
1721 d_bytes_in_page = min_t(int, size, PAGE_SIZE - d_off);
1722
1723 charges[0] = -s_bytes_in_page;
1724 nids[0] = page_to_nid(zpdesc_page(s_zpdesc));
1725 charges[1] = -(size - s_bytes_in_page); /* 0 if object doesn't span */
1726 if (charges[1])
1727 nids[1] = page_to_nid(zpdesc_page(get_next_zpdesc(s_zpdesc)));
1728
1729 charges[2] = d_bytes_in_page;
1730 nids[2] = page_to_nid(zpdesc_page(d_zpdesc));
1731 charges[3] = size - d_bytes_in_page; /* 0 if object doesn't span */
1732 if (charges[3])
1733 nids[3] = page_to_nid(zpdesc_page(get_next_zpdesc(d_zpdesc)));
1734
1735 rcu_read_lock();
1736 memcg = obj_cgroup_memcg(objcg);
1737 for (i = 0; i < 4; i++) {
1738 if (!charges[i])
1739 continue;
1740
1741 l = mem_cgroup_lruvec(memcg, NODE_DATA(nids[i]));
1742 partial = (PAGE_SIZE * charges[i]) / size;
1743 mod_memcg_lruvec_state(l, pool->compressed_stat, charges[i]);
1744 mod_memcg_lruvec_state(l, pool->uncompressed_stat, partial);
1745 }
1746 rcu_read_unlock();
1747
1748 dec_node_page_state(zpdesc_page(s_zpdesc), pool->uncompressed_stat);
1749 inc_node_page_state(zpdesc_page(d_zpdesc), pool->uncompressed_stat);
1750
1751 out:
1752 d_zspage->objcgs[d_idx] = s_zspage->objcgs[s_idx];
1753 s_zspage->objcgs[s_idx] = NULL;
1754 }
1755 #else
1756 static void zs_migrate_objcg(struct zspage *s_zspage, struct zspage *d_zspage,
1757 unsigned long used_obj, unsigned long free_obj,
1758 struct zs_pool *pool, int size)
1759 {
1760 }
1761 #endif
1762
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.