[PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem

Haifeng Xu posted 3 patches 1 month ago
There is a newer version of this series
[PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem
Posted by Haifeng Xu 1 month ago
When kmem is disabled, memcg slab shrink only call non-slab shrinkers,
so just allocates shrinker info for non-slab shrinkers to non-root memcgs.

Therefore, if memcg_kmem_online is true, all things keep same as before.
Otherwise, root memcg allocates id from shrinker_idr to identify each
shrinker and non-root memcgs use nonslab_id to identify non-slab shrinkers.
The size of shrinkers_info in non-root memcgs can be very low because the
number of shrinkers marked as SHRINKER_NONSLAB | SHRINKER_MEMCG_AWARE is
few. Also, the time spending in expand_shrinker_info() can reduce a lot.

When setting shrinker bit or updating nr_deferred, use nonslab_id for
non-root memcgs if the shrinker is marked as SHRINKER_NONSLAB.

Signed-off-by: Haifeng Xu <haifeng.xu@shopee.com>
---
 include/linux/shrinker.h |   3 +
 mm/huge_memory.c         |  21 ++++--
 mm/shrinker.c            | 135 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 138 insertions(+), 21 deletions(-)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 1a00be90d93a..df53008ed8b5 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -107,6 +107,9 @@ struct shrinker {
 #ifdef CONFIG_MEMCG
 	/* ID in shrinker_idr */
 	int id;
+
+	/* ID in shrinker_nonslab_idr */
+	int nonslab_id;
 #endif
 #ifdef CONFIG_SHRINKER_DEBUG
 	int debugfs_id;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8e2746ea74ad..319349b5da5d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4351,9 +4351,14 @@ void deferred_split_folio(struct folio *folio, bool partially_mapped)
 		memcg = folio_split_queue_memcg(folio, ds_queue);
 		list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
 		ds_queue->split_queue_len++;
-		if (memcg)
-			set_shrinker_bit(memcg, folio_nid(folio),
-					 shrinker_id(deferred_split_shrinker));
+		if (memcg) {
+			int id = deferred_split_shrinker->id;
+
+			if (!memcg_kmem_online() &&  memcg != root_mem_cgroup)
+				id = deferred_split_shrinker->nonslab_id;
+
+			set_shrinker_bit(memcg, folio_nid(folio), id);
+		}
 	}
 	split_queue_unlock_irqrestore(ds_queue, flags);
 }
@@ -4508,8 +4513,14 @@ void reparent_deferred_split_queue(struct mem_cgroup *memcg)
 	parent_ds_queue->split_queue_len += ds_queue->split_queue_len;
 	ds_queue->split_queue_len = 0;
 
-	for_each_node(nid)
-		set_shrinker_bit(parent, nid, shrinker_id(deferred_split_shrinker));
+	for_each_node(nid) {
+		int id = deferred_split_shrinker->id;
+
+		if (!memcg_kmem_online() && parent != root_mem_cgroup)
+			id = deferred_split_shrinker->nonslab_id;
+
+		set_shrinker_bit(parent, nid, id);
+	}
 
 unlock:
 	spin_unlock(&parent_ds_queue->split_queue_lock);
diff --git a/mm/shrinker.c b/mm/shrinker.c
index f0c6dfa026b0..52ea0e6391af 100644
--- a/mm/shrinker.c
+++ b/mm/shrinker.c
@@ -12,6 +12,7 @@ DEFINE_MUTEX(shrinker_mutex);
 
 #ifdef CONFIG_MEMCG
 static int shrinker_nr_max;
+static int shrinker_nonslab_nr_max;
 
 static inline int shrinker_unit_size(int nr_items)
 {
@@ -78,15 +79,25 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
 {
 	int nid, ret = 0;
 	int array_size = 0;
+	int alloc_nr_max;
+
+	if (memcg_kmem_online()) {
+		alloc_nr_max = shrinker_nr_max;
+	} else {
+		if (memcg == root_mem_cgroup)
+			alloc_nr_max = shrinker_nr_max;
+		else
+			alloc_nr_max = shrinker_nonslab_nr_max;
+	}
 
 	mutex_lock(&shrinker_mutex);
-	array_size = shrinker_unit_size(shrinker_nr_max);
+	array_size = shrinker_unit_size(alloc_nr_max);
 	for_each_node(nid) {
 		struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size,
 							   GFP_KERNEL, nid);
 		if (!info)
 			goto err;
-		info->map_nr_max = shrinker_nr_max;
+		info->map_nr_max = alloc_nr_max;
 		if (shrinker_unit_alloc(info, NULL, nid)) {
 			kvfree(info);
 			goto err;
@@ -147,33 +158,47 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size,
 	return 0;
 }
 
-static int expand_shrinker_info(int new_id)
+static int expand_shrinker_info(int new_id, bool full, bool root)
 {
 	int ret = 0;
 	int new_nr_max = round_up(new_id + 1, SHRINKER_UNIT_BITS);
 	int new_size, old_size = 0;
 	struct mem_cgroup *memcg;
+	struct mem_cgroup *start = NULL;
+	int old_nr_max = shrinker_nr_max;
 
 	if (!root_mem_cgroup)
 		goto out;
 
 	lockdep_assert_held(&shrinker_mutex);
 
+	if (!full && !root) {
+		start = root_mem_cgroup;
+		old_nr_max = shrinker_nonslab_nr_max;
+	}
+
 	new_size = shrinker_unit_size(new_nr_max);
-	old_size = shrinker_unit_size(shrinker_nr_max);
+	old_size = shrinker_unit_size(old_nr_max);
+
+	memcg = mem_cgroup_iter(NULL, start, NULL);
+	if (!memcg)
+		goto out;
 
-	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
 		ret = expand_one_shrinker_info(memcg, new_size, old_size,
 					       new_nr_max);
-		if (ret) {
+		if (ret || (root && memcg == root_mem_cgroup)) {
 			mem_cgroup_iter_break(NULL, memcg);
 			goto out;
 		}
 	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
 out:
-	if (!ret)
-		shrinker_nr_max = new_nr_max;
+	if (!ret) {
+		if (!full && !root)
+			shrinker_nonslab_nr_max = new_nr_max;
+		else
+			shrinker_nr_max = new_nr_max;
+	}
 
 	return ret;
 }
@@ -195,7 +220,13 @@ static inline int calc_shrinker_id(int index, int offset)
 
 static inline int get_shrinker_id(struct mem_cgroup *memcg, struct shrinker *shrinker)
 {
-	return shrinker->id;
+	int id = shrinker->id;
+
+	if (!memcg_kmem_online() && (shrinker->flags & SHRINKER_NONSLAB) &&
+	    memcg != root_mem_cgroup)
+		id = shrinker->nonslab_id;
+
+	return id;
 }
 
 void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
@@ -217,6 +248,8 @@ void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
 }
 
 static DEFINE_IDR(shrinker_idr);
+static DEFINE_IDR(shrinker_nonslab_idr);
+
 
 static int shrinker_memcg_alloc(struct shrinker *shrinker)
 {
@@ -230,10 +263,46 @@ static int shrinker_memcg_alloc(struct shrinker *shrinker)
 	if (id < 0)
 		goto unlock;
 
-	if (id >= shrinker_nr_max) {
-		if (expand_shrinker_info(id)) {
-			idr_remove(&shrinker_idr, id);
-			goto unlock;
+	shrinker->nonslab_id = -1;
+
+	if (!mem_cgroup_kmem_disabled()) {
+		if (id >= shrinker_nr_max) {
+			/* expand shrinker info for all memory cgroups */
+			if (expand_shrinker_info(id, true, false)) {
+				idr_remove(&shrinker_idr, id);
+				goto unlock;
+			}
+		}
+	} else {
+		/*
+		 * If cgroup_memory_nokmem is set, every shrinker needs to be recorded in
+		 * root memory cgroup because gloal slab shrink traverse all shrinkers. For
+		 * non-root memcgs, record shrinkers with SHRINKER_NONSLAB because memcg
+		 * slab shrink only call non-slab shrinkers.
+		 */
+		if (id >= shrinker_nr_max) {
+			/* expand shrinker info for root memory cgroup only */
+			if (expand_shrinker_info(id, false, true)) {
+				idr_remove(&shrinker_idr, id);
+				goto unlock;
+			}
+		}
+
+		if (shrinker->flags & SHRINKER_NONSLAB) {
+			int nonslab_id;
+
+			nonslab_id = idr_alloc(&shrinker_nonslab_idr, shrinker, 0, 0, GFP_KERNEL);
+			if (nonslab_id < 0)
+				goto unlock;
+
+			if (nonslab_id >= shrinker_nonslab_nr_max) {
+				/* expand shrinker info for non-root memory cgroups */
+				if (expand_shrinker_info(nonslab_id, false, false)) {
+					idr_remove(&shrinker_nonslab_idr, id);
+					goto unlock;
+				}
+			}
+			shrinker->nonslab_id = nonslab_id;
 		}
 	}
 	shrinker->id = id;
@@ -252,6 +321,12 @@ static void shrinker_memcg_remove(struct shrinker *shrinker)
 	lockdep_assert_held(&shrinker_mutex);
 
 	idr_remove(&shrinker_idr, id);
+
+	if (shrinker->flags & SHRINKER_NONSLAB) {
+		id = shrinker->nonslab_id;
+		if (id >= 0)
+			idr_remove(&shrinker_nonslab_idr, id);
+	}
 }
 
 static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
@@ -310,10 +385,33 @@ void reparent_shrinker_deferred(struct mem_cgroup *memcg)
 		parent_info = shrinker_info_protected(parent, nid);
 		for (index = 0; index < shrinker_id_to_index(child_info->map_nr_max); index++) {
 			child_unit = child_info->unit[index];
-			parent_unit = parent_info->unit[index];
 			for (offset = 0; offset < SHRINKER_UNIT_BITS; offset++) {
 				nr = atomic_long_read(&child_unit->nr_deferred[offset]);
-				atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
+
+				/*
+				 * If memcg_kmem_online() is false, the non-root memcgs use
+				 * nonslab_id but root memory cgroup use id. When reparenting
+				 * shrinker info to it, must convert the nonslab_id to id.
+				 */
+				if (!memcg_kmem_online() &&  parent == root_mem_cgroup) {
+					int id, p_index, p_off;
+					struct shrinker *shrinker;
+
+					id = calc_shrinker_id(index, offset);
+					shrinker = idr_find(&shrinker_nonslab_idr, id);
+					if (shrinker) {
+						id = shrinker->id;
+						p_index = shrinker_id_to_index(id);
+						p_off = shrinker_id_to_offset(id);
+
+						parent_unit = parent_info->unit[p_index];
+						atomic_long_add(nr,
+								&parent_unit->nr_deferred[p_off]);
+					}
+				} else {
+					parent_unit = parent_info->unit[index];
+					atomic_long_add(nr, &parent_unit->nr_deferred[offset]);
+				}
 			}
 		}
 	}
@@ -543,7 +641,12 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
 			int shrinker_id = calc_shrinker_id(index, offset);
 
 			rcu_read_lock();
-			shrinker = idr_find(&shrinker_idr, shrinker_id);
+
+			if (memcg_kmem_online())
+				shrinker = idr_find(&shrinker_idr, shrinker_id);
+			else
+				shrinker = idr_find(&shrinker_nonslab_idr, shrinker_id);
+
 			if (unlikely(!shrinker || !shrinker_try_get(shrinker))) {
 				clear_bit(offset, unit->map);
 				rcu_read_unlock();
-- 
2.43.0
Re: [PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem
Posted by kernel test robot 1 month ago
Hi Haifeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v7.0-rc2 next-20260306]
[cannot apply to akpm-mm/mm-everything]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Haifeng-Xu/mm-shrinker-introduce-new-function-get_shrinker_id/20260306-160247
base:   linus/master
patch link:    https://lore.kernel.org/r/20260306075757.198887-3-haifeng.xu%40shopee.com
patch subject: [PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem
config: i386-randconfig-002-20260307 (https://download.01.org/0day-ci/archive/20260308/202603080606.UQEPDmnB-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260308/202603080606.UQEPDmnB-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603080606.UQEPDmnB-lkp@intel.com/

All errors (new ones prefixed by >>):

>> mm/huge_memory.c:4355:38: error: no member named 'id' in 'struct shrinker'
    4355 |                         int id = deferred_split_shrinker->id;
         |                                  ~~~~~~~~~~~~~~~~~~~~~~~  ^
>> mm/huge_memory.c:4358:35: error: no member named 'nonslab_id' in 'struct shrinker'
    4358 |                                 id = deferred_split_shrinker->nonslab_id;
         |                                      ~~~~~~~~~~~~~~~~~~~~~~~  ^
   2 errors generated.


vim +4355 mm/huge_memory.c

  4307	
  4308	/* partially_mapped=false won't clear PG_partially_mapped folio flag */
  4309	void deferred_split_folio(struct folio *folio, bool partially_mapped)
  4310	{
  4311		struct deferred_split *ds_queue;
  4312		unsigned long flags;
  4313	
  4314		/*
  4315		 * Order 1 folios have no space for a deferred list, but we also
  4316		 * won't waste much memory by not adding them to the deferred list.
  4317		 */
  4318		if (folio_order(folio) <= 1)
  4319			return;
  4320	
  4321		if (!partially_mapped && !split_underused_thp)
  4322			return;
  4323	
  4324		/*
  4325		 * Exclude swapcache: originally to avoid a corrupt deferred split
  4326		 * queue. Nowadays that is fully prevented by memcg1_swapout();
  4327		 * but if page reclaim is already handling the same folio, it is
  4328		 * unnecessary to handle it again in the shrinker, so excluding
  4329		 * swapcache here may still be a useful optimization.
  4330		 */
  4331		if (folio_test_swapcache(folio))
  4332			return;
  4333	
  4334		ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
  4335		if (partially_mapped) {
  4336			if (!folio_test_partially_mapped(folio)) {
  4337				folio_set_partially_mapped(folio);
  4338				if (folio_test_pmd_mappable(folio))
  4339					count_vm_event(THP_DEFERRED_SPLIT_PAGE);
  4340				count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
  4341				mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, 1);
  4342	
  4343			}
  4344		} else {
  4345			/* partially mapped folios cannot become non-partially mapped */
  4346			VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
  4347		}
  4348		if (list_empty(&folio->_deferred_list)) {
  4349			struct mem_cgroup *memcg;
  4350	
  4351			memcg = folio_split_queue_memcg(folio, ds_queue);
  4352			list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
  4353			ds_queue->split_queue_len++;
  4354			if (memcg) {
> 4355				int id = deferred_split_shrinker->id;
  4356	
  4357				if (!memcg_kmem_online() &&  memcg != root_mem_cgroup)
> 4358					id = deferred_split_shrinker->nonslab_id;
  4359	
  4360				set_shrinker_bit(memcg, folio_nid(folio), id);
  4361			}
  4362		}
  4363		split_queue_unlock_irqrestore(ds_queue, flags);
  4364	}
  4365	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem
Posted by kernel test robot 1 month ago
Hi Haifeng,

kernel test robot noticed the following build errors:

[auto build test ERROR on linus/master]
[also build test ERROR on v7.0-rc2 next-20260306]
[cannot apply to akpm-mm/mm-everything]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Haifeng-Xu/mm-shrinker-introduce-new-function-get_shrinker_id/20260306-160247
base:   linus/master
patch link:    https://lore.kernel.org/r/20260306075757.198887-3-haifeng.xu%40shopee.com
patch subject: [PATCH 2/3] mm: shrinker: optimize the allocation of shrinker_info when setting cgroup_memory_nokmem
config: i386-randconfig-004-20260307 (https://download.01.org/0day-ci/archive/20260308/202603080143.ehWbe58J-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260308/202603080143.ehWbe58J-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603080143.ehWbe58J-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/huge_memory.c: In function 'deferred_split_folio':
>> mm/huge_memory.c:4355:57: error: 'struct shrinker' has no member named 'id'
    4355 |                         int id = deferred_split_shrinker->id;
         |                                                         ^~
>> mm/huge_memory.c:4358:61: error: 'struct shrinker' has no member named 'nonslab_id'
    4358 |                                 id = deferred_split_shrinker->nonslab_id;
         |                                                             ^~


vim +4355 mm/huge_memory.c

  4307	
  4308	/* partially_mapped=false won't clear PG_partially_mapped folio flag */
  4309	void deferred_split_folio(struct folio *folio, bool partially_mapped)
  4310	{
  4311		struct deferred_split *ds_queue;
  4312		unsigned long flags;
  4313	
  4314		/*
  4315		 * Order 1 folios have no space for a deferred list, but we also
  4316		 * won't waste much memory by not adding them to the deferred list.
  4317		 */
  4318		if (folio_order(folio) <= 1)
  4319			return;
  4320	
  4321		if (!partially_mapped && !split_underused_thp)
  4322			return;
  4323	
  4324		/*
  4325		 * Exclude swapcache: originally to avoid a corrupt deferred split
  4326		 * queue. Nowadays that is fully prevented by memcg1_swapout();
  4327		 * but if page reclaim is already handling the same folio, it is
  4328		 * unnecessary to handle it again in the shrinker, so excluding
  4329		 * swapcache here may still be a useful optimization.
  4330		 */
  4331		if (folio_test_swapcache(folio))
  4332			return;
  4333	
  4334		ds_queue = folio_split_queue_lock_irqsave(folio, &flags);
  4335		if (partially_mapped) {
  4336			if (!folio_test_partially_mapped(folio)) {
  4337				folio_set_partially_mapped(folio);
  4338				if (folio_test_pmd_mappable(folio))
  4339					count_vm_event(THP_DEFERRED_SPLIT_PAGE);
  4340				count_mthp_stat(folio_order(folio), MTHP_STAT_SPLIT_DEFERRED);
  4341				mod_mthp_stat(folio_order(folio), MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, 1);
  4342	
  4343			}
  4344		} else {
  4345			/* partially mapped folios cannot become non-partially mapped */
  4346			VM_WARN_ON_FOLIO(folio_test_partially_mapped(folio), folio);
  4347		}
  4348		if (list_empty(&folio->_deferred_list)) {
  4349			struct mem_cgroup *memcg;
  4350	
  4351			memcg = folio_split_queue_memcg(folio, ds_queue);
  4352			list_add_tail(&folio->_deferred_list, &ds_queue->split_queue);
  4353			ds_queue->split_queue_len++;
  4354			if (memcg) {
> 4355				int id = deferred_split_shrinker->id;
  4356	
  4357				if (!memcg_kmem_online() &&  memcg != root_mem_cgroup)
> 4358					id = deferred_split_shrinker->nonslab_id;
  4359	
  4360				set_shrinker_bit(memcg, folio_nid(folio), id);
  4361			}
  4362		}
  4363		split_queue_unlock_irqrestore(ds_queue, flags);
  4364	}
  4365	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki