[PATCH v1 05/26] mm: memcontrol: allocate object cgroup for non-kmem case

Qi Zheng posted 26 patches 3 months, 1 week ago
There is a newer version of this series
[PATCH v1 05/26] mm: memcontrol: allocate object cgroup for non-kmem case
Posted by Qi Zheng 3 months, 1 week ago
From: Muchun Song <songmuchun@bytedance.com>

Pagecache pages are charged at allocation time and hold a reference
to the original memory cgroup until reclaimed. Depending on memory
pressure, page sharing patterns between different cgroups and cgroup
creation/destruction rates, many dying memory cgroups can be pinned
by pagecache pages, reducing page reclaim efficiency and wasting
memory. Converting LRU folios and most other raw memory cgroup pins
to the object cgroup direction can fix this long-living problem.

As a result, the objcg infrastructure is no longer solely applicable
to the kmem case. In this patch, we extend the scope of the objcg
infrastructure beyond the kmem case, enabling LRU folios to reuse
it for folio charging purposes.

It should be noted that LRU folios are not accounted for at the root
level, yet the folio->memcg_data points to the root_mem_cgroup. Hence,
the folio->memcg_data of LRU folios always points to a valid pointer.
However, the root_mem_cgroup does not possess an object cgroup.
Therefore, we also allocate an object cgroup for the root_mem_cgroup.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 mm/memcontrol.c | 51 +++++++++++++++++++++++--------------------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5257465c9d75..2afd7f99ca101 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -204,10 +204,10 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
 	return objcg;
 }
 
-static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
-				  struct mem_cgroup *parent)
+static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
 {
 	struct obj_cgroup *objcg, *iter;
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
 
 	objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
 
@@ -3302,30 +3302,17 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 	return val;
 }
 
-static int memcg_online_kmem(struct mem_cgroup *memcg)
+static void memcg_online_kmem(struct mem_cgroup *memcg)
 {
-	struct obj_cgroup *objcg;
-
 	if (mem_cgroup_kmem_disabled())
-		return 0;
+		return;
 
 	if (unlikely(mem_cgroup_is_root(memcg)))
-		return 0;
-
-	objcg = obj_cgroup_alloc();
-	if (!objcg)
-		return -ENOMEM;
-
-	objcg->memcg = memcg;
-	rcu_assign_pointer(memcg->objcg, objcg);
-	obj_cgroup_get(objcg);
-	memcg->orig_objcg = objcg;
+		return;
 
 	static_branch_enable(&memcg_kmem_online_key);
 
 	memcg->kmemcg_id = memcg->id.id;
-
-	return 0;
 }
 
 static void memcg_offline_kmem(struct mem_cgroup *memcg)
@@ -3340,12 +3327,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
 
 	parent = parent_mem_cgroup(memcg);
 	memcg_reparent_list_lrus(memcg, parent);
-
-	/*
-	 * Objcg's reparenting must be after list_lru's, make sure list_lru
-	 * helpers won't use parent's list_lru until child is drained.
-	 */
-	memcg_reparent_objcgs(memcg, parent);
 }
 
 #ifdef CONFIG_CGROUP_WRITEBACK
@@ -3862,9 +3843,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct obj_cgroup *objcg;
 
-	if (memcg_online_kmem(memcg))
-		goto remove_id;
+	memcg_online_kmem(memcg);
 
 	/*
 	 * A memcg must be visible for expand_shrinker_info()
@@ -3874,6 +3855,15 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (alloc_shrinker_info(memcg))
 		goto offline_kmem;
 
+	objcg = obj_cgroup_alloc();
+	if (!objcg)
+		goto free_shrinker;
+
+	objcg->memcg = memcg;
+	rcu_assign_pointer(memcg->objcg, objcg);
+	obj_cgroup_get(objcg);
+	memcg->orig_objcg = objcg;
+
 	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
 		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
 				   FLUSH_TIME);
@@ -3896,9 +3886,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL);
 
 	return 0;
+free_shrinker:
+	free_shrinker_info(memcg);
 offline_kmem:
 	memcg_offline_kmem(memcg);
-remove_id:
 	mem_cgroup_id_remove(memcg);
 	return -ENOMEM;
 }
@@ -3916,6 +3907,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 
 	memcg_offline_kmem(memcg);
 	reparent_deferred_split_queue(memcg);
+	/*
+	 * The reparenting of objcg must be after the reparenting of the
+	 * list_lru and deferred_split_queue above, which ensures that they will
+	 * not mistakenly get the parent list_lru and deferred_split_queue.
+	 */
+	memcg_reparent_objcgs(memcg);
 	reparent_shrinker_deferred(memcg);
 	wb_memcg_offline(memcg);
 	lru_gen_offline_memcg(memcg);
-- 
2.20.1
Re: [PATCH v1 05/26] mm: memcontrol: allocate object cgroup for non-kmem case
Posted by Chen Ridong 2 months, 2 weeks ago

On 2025/10/28 21:58, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> Pagecache pages are charged at allocation time and hold a reference
> to the original memory cgroup until reclaimed. Depending on memory
> pressure, page sharing patterns between different cgroups and cgroup
> creation/destruction rates, many dying memory cgroups can be pinned
> by pagecache pages, reducing page reclaim efficiency and wasting
> memory. Converting LRU folios and most other raw memory cgroup pins
> to the object cgroup direction can fix this long-living problem.
> 
> As a result, the objcg infrastructure is no longer solely applicable
> to the kmem case. In this patch, we extend the scope of the objcg
> infrastructure beyond the kmem case, enabling LRU folios to reuse
> it for folio charging purposes.
> 
> It should be noted that LRU folios are not accounted for at the root
> level, yet the folio->memcg_data points to the root_mem_cgroup. Hence,
> the folio->memcg_data of LRU folios always points to a valid pointer.
> However, the root_mem_cgroup does not possess an object cgroup.
> Therefore, we also allocate an object cgroup for the root_mem_cgroup.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  mm/memcontrol.c | 51 +++++++++++++++++++++++--------------------------
>  1 file changed, 24 insertions(+), 27 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index d5257465c9d75..2afd7f99ca101 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -204,10 +204,10 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
>  	return objcg;
>  }
>  
> -static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
> -				  struct mem_cgroup *parent)
> +static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
>  {
>  	struct obj_cgroup *objcg, *iter;
> +	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
>  
>  	objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
>  
> @@ -3302,30 +3302,17 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
>  	return val;
>  }
>  
> -static int memcg_online_kmem(struct mem_cgroup *memcg)
> +static void memcg_online_kmem(struct mem_cgroup *memcg)
>  {
> -	struct obj_cgroup *objcg;
> -
>  	if (mem_cgroup_kmem_disabled())
> -		return 0;
> +		return;
>  
>  	if (unlikely(mem_cgroup_is_root(memcg)))
> -		return 0;
> -
> -	objcg = obj_cgroup_alloc();
> -	if (!objcg)
> -		return -ENOMEM;
> -
> -	objcg->memcg = memcg;
> -	rcu_assign_pointer(memcg->objcg, objcg);
> -	obj_cgroup_get(objcg);
> -	memcg->orig_objcg = objcg;
> +		return;
>  
>  	static_branch_enable(&memcg_kmem_online_key);
>  
>  	memcg->kmemcg_id = memcg->id.id;
> -
> -	return 0;
>  }
>  
>  static void memcg_offline_kmem(struct mem_cgroup *memcg)
> @@ -3340,12 +3327,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
>  
>  	parent = parent_mem_cgroup(memcg);
>  	memcg_reparent_list_lrus(memcg, parent);
> -
> -	/*
> -	 * Objcg's reparenting must be after list_lru's, make sure list_lru
> -	 * helpers won't use parent's list_lru until child is drained.
> -	 */
> -	memcg_reparent_objcgs(memcg, parent);
>  }
>  
>  #ifdef CONFIG_CGROUP_WRITEBACK
> @@ -3862,9 +3843,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>  static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  {
>  	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
> +	struct obj_cgroup *objcg;
>  
> -	if (memcg_online_kmem(memcg))
> -		goto remove_id;
> +	memcg_online_kmem(memcg);
>  
>  	/*
>  	 * A memcg must be visible for expand_shrinker_info()
> @@ -3874,6 +3855,15 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (alloc_shrinker_info(memcg))
>  		goto offline_kmem;
>  
> +	objcg = obj_cgroup_alloc();
> +	if (!objcg)
> +		goto free_shrinker;
> +
> +	objcg->memcg = memcg;
> +	rcu_assign_pointer(memcg->objcg, objcg);
> +	obj_cgroup_get(objcg);
> +	memcg->orig_objcg = objcg;
> +

Will it be better to add a helper function like obj_cgroup_init()?

>  	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
>  		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
>  				   FLUSH_TIME);
> @@ -3896,9 +3886,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL);
>  
>  	return 0;
> +free_shrinker:
> +	free_shrinker_info(memcg);
>  offline_kmem:
>  	memcg_offline_kmem(memcg);
> -remove_id:
>  	mem_cgroup_id_remove(memcg);
>  	return -ENOMEM;
>  }
> @@ -3916,6 +3907,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
>  
>  	memcg_offline_kmem(memcg);
>  	reparent_deferred_split_queue(memcg);
> +	/*
> +	 * The reparenting of objcg must be after the reparenting of the
> +	 * list_lru and deferred_split_queue above, which ensures that they will
> +	 * not mistakenly get the parent list_lru and deferred_split_queue.
> +	 */
> +	memcg_reparent_objcgs(memcg);
>  	reparent_shrinker_deferred(memcg);
>  	wb_memcg_offline(memcg);
>  	lru_gen_offline_memcg(memcg);

-- 
Best regards,
Ridong
Re: [PATCH v1 05/26] mm: memcontrol: allocate object cgroup for non-kmem case
Posted by Qi Zheng 2 months, 2 weeks ago

On 11/21/25 11:58 AM, Chen Ridong wrote:
> 
> 
> On 2025/10/28 21:58, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@bytedance.com>
>>
>> Pagecache pages are charged at allocation time and hold a reference
>> to the original memory cgroup until reclaimed. Depending on memory
>> pressure, page sharing patterns between different cgroups and cgroup
>> creation/destruction rates, many dying memory cgroups can be pinned
>> by pagecache pages, reducing page reclaim efficiency and wasting
>> memory. Converting LRU folios and most other raw memory cgroup pins
>> to the object cgroup direction can fix this long-living problem.
>>
>> As a result, the objcg infrastructure is no longer solely applicable
>> to the kmem case. In this patch, we extend the scope of the objcg
>> infrastructure beyond the kmem case, enabling LRU folios to reuse
>> it for folio charging purposes.
>>
>> It should be noted that LRU folios are not accounted for at the root
>> level, yet the folio->memcg_data points to the root_mem_cgroup. Hence,
>> the folio->memcg_data of LRU folios always points to a valid pointer.
>> However, the root_mem_cgroup does not possess an object cgroup.
>> Therefore, we also allocate an object cgroup for the root_mem_cgroup.
>>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   mm/memcontrol.c | 51 +++++++++++++++++++++++--------------------------
>>   1 file changed, 24 insertions(+), 27 deletions(-)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index d5257465c9d75..2afd7f99ca101 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -204,10 +204,10 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
>>   	return objcg;
>>   }
>>   
>> -static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
>> -				  struct mem_cgroup *parent)
>> +static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
>>   {
>>   	struct obj_cgroup *objcg, *iter;
>> +	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
>>   
>>   	objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
>>   
>> @@ -3302,30 +3302,17 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
>>   	return val;
>>   }
>>   
>> -static int memcg_online_kmem(struct mem_cgroup *memcg)
>> +static void memcg_online_kmem(struct mem_cgroup *memcg)
>>   {
>> -	struct obj_cgroup *objcg;
>> -
>>   	if (mem_cgroup_kmem_disabled())
>> -		return 0;
>> +		return;
>>   
>>   	if (unlikely(mem_cgroup_is_root(memcg)))
>> -		return 0;
>> -
>> -	objcg = obj_cgroup_alloc();
>> -	if (!objcg)
>> -		return -ENOMEM;
>> -
>> -	objcg->memcg = memcg;
>> -	rcu_assign_pointer(memcg->objcg, objcg);
>> -	obj_cgroup_get(objcg);
>> -	memcg->orig_objcg = objcg;
>> +		return;
>>   
>>   	static_branch_enable(&memcg_kmem_online_key);
>>   
>>   	memcg->kmemcg_id = memcg->id.id;
>> -
>> -	return 0;
>>   }
>>   
>>   static void memcg_offline_kmem(struct mem_cgroup *memcg)
>> @@ -3340,12 +3327,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
>>   
>>   	parent = parent_mem_cgroup(memcg);
>>   	memcg_reparent_list_lrus(memcg, parent);
>> -
>> -	/*
>> -	 * Objcg's reparenting must be after list_lru's, make sure list_lru
>> -	 * helpers won't use parent's list_lru until child is drained.
>> -	 */
>> -	memcg_reparent_objcgs(memcg, parent);
>>   }
>>   
>>   #ifdef CONFIG_CGROUP_WRITEBACK
>> @@ -3862,9 +3843,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
>>   static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>   {
>>   	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
>> +	struct obj_cgroup *objcg;
>>   
>> -	if (memcg_online_kmem(memcg))
>> -		goto remove_id;
>> +	memcg_online_kmem(memcg);
>>   
>>   	/*
>>   	 * A memcg must be visible for expand_shrinker_info()
>> @@ -3874,6 +3855,15 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>   	if (alloc_shrinker_info(memcg))
>>   		goto offline_kmem;
>>   
>> +	objcg = obj_cgroup_alloc();
>> +	if (!objcg)
>> +		goto free_shrinker;
>> +
>> +	objcg->memcg = memcg;
>> +	rcu_assign_pointer(memcg->objcg, objcg);
>> +	obj_cgroup_get(objcg);
>> +	memcg->orig_objcg = objcg;
>> +
> 
> Will it be better to add a helper function like obj_cgroup_init()?

This part is not complicated, and it is only called in this one place,
so perhaps there is no need to add a helper function?

Of course, it doesn't matter, I'm fine with either method.

> 
>>   	if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
>>   		queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
>>   				   FLUSH_TIME);
>> @@ -3896,9 +3886,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>   	xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL);
>>   
>>   	return 0;
>> +free_shrinker:
>> +	free_shrinker_info(memcg);
>>   offline_kmem:
>>   	memcg_offline_kmem(memcg);
>> -remove_id:
>>   	mem_cgroup_id_remove(memcg);
>>   	return -ENOMEM;
>>   }
>> @@ -3916,6 +3907,12 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
>>   
>>   	memcg_offline_kmem(memcg);
>>   	reparent_deferred_split_queue(memcg);
>> +	/*
>> +	 * The reparenting of objcg must be after the reparenting of the
>> +	 * list_lru and deferred_split_queue above, which ensures that they will
>> +	 * not mistakenly get the parent list_lru and deferred_split_queue.
>> +	 */
>> +	memcg_reparent_objcgs(memcg);
>>   	reparent_shrinker_deferred(memcg);
>>   	wb_memcg_offline(memcg);
>>   	lru_gen_offline_memcg(memcg);
>
Re: [PATCH v1 05/26] mm: memcontrol: allocate object cgroup for non-kmem case
Posted by Harry Yoo 2 months, 3 weeks ago
On Tue, Oct 28, 2025 at 09:58:18PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> Pagecache pages are charged at allocation time and hold a reference
> to the original memory cgroup until reclaimed. Depending on memory
> pressure, page sharing patterns between different cgroups and cgroup
> creation/destruction rates, many dying memory cgroups can be pinned
> by pagecache pages, reducing page reclaim efficiency and wasting
> memory. Converting LRU folios and most other raw memory cgroup pins
> to the object cgroup direction can fix this long-living problem.
> 
> As a result, the objcg infrastructure is no longer solely applicable
> to the kmem case. In this patch, we extend the scope of the objcg
> infrastructure beyond the kmem case, enabling LRU folios to reuse
> it for folio charging purposes.
> 
> It should be noted that LRU folios are not accounted for at the root
> level, yet the folio->memcg_data points to the root_mem_cgroup. Hence,
> the folio->memcg_data of LRU folios always points to a valid pointer.
> However, the root_mem_cgroup does not possess an object cgroup.
> Therefore, we also allocate an object cgroup for the root_mem_cgroup.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---

Looks good to me,
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>

-- 
Cheers,
Harry / Hyeonggon