[PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup

Qi Zheng posted 28 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Qi Zheng 1 month, 3 weeks ago
From: Muchun Song <songmuchun@bytedance.com>

Memory cgroup functions such as get_mem_cgroup_from_folio() and
get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
even for the root memory cgroup. In contrast, the situation for
object cgroups has been different.

Previously, the root object cgroup couldn't be returned because
it didn't exist. Now that a valid root object cgroup exists, for
the sake of consistency, it's necessary to align the behavior of
object-cgroup-related operations with that of memory cgroup APIs.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 include/linux/memcontrol.h | 26 +++++++++++++++++-----
 mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
 mm/percpu.c                |  2 +-
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 288dd6337f80f..776d9be1f446a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -332,6 +332,7 @@ struct mem_cgroup {
 #define MEMCG_CHARGE_BATCH 64U
 
 extern struct mem_cgroup *root_mem_cgroup;
+extern struct obj_cgroup *root_obj_cgroup;
 
 enum page_memcg_data_flags {
 	/* page->memcg_data is a pointer to an slabobj_ext vector */
@@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return (memcg == root_mem_cgroup);
 }
 
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+	return objcg == root_obj_cgroup;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return !cgroup_subsys_enabled(memory_cgrp_subsys);
@@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
 
 static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
 {
+	if (obj_cgroup_is_root(objcg))
+		return true;
 	return percpu_ref_tryget(&objcg->refcnt);
 }
 
-static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
+				       unsigned long nr)
 {
-	percpu_ref_get(&objcg->refcnt);
+	if (!obj_cgroup_is_root(objcg))
+		percpu_ref_get_many(&objcg->refcnt, nr);
 }
 
-static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
-				       unsigned long nr)
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
 {
-	percpu_ref_get_many(&objcg->refcnt, nr);
+	obj_cgroup_get_many(objcg, 1);
 }
 
 static inline void obj_cgroup_put(struct obj_cgroup *objcg)
 {
-	if (objcg)
+	if (objcg && !obj_cgroup_is_root(objcg))
 		percpu_ref_put(&objcg->refcnt);
 }
 
@@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
 	return true;
 }
 
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+	return true;
+}
+
 static inline bool mem_cgroup_disabled(void)
 {
 	return true;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 544b3200db12d..21b5aad34cae7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
 struct mem_cgroup *root_mem_cgroup __read_mostly;
 EXPORT_SYMBOL(root_mem_cgroup);
 
+struct obj_cgroup *root_obj_cgroup __read_mostly;
+
 /* Active memory cgroup to use from an interrupt context */
 DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
@@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
 
 static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 {
-	struct obj_cgroup *objcg = NULL;
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
 
-	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
-		objcg = rcu_dereference(memcg->objcg);
 		if (likely(objcg && obj_cgroup_tryget(objcg)))
-			break;
-		objcg = NULL;
+			return objcg;
 	}
-	return objcg;
+
+	return NULL;
 }
 
 static struct obj_cgroup *current_objcg_update(void)
@@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 * Objcg reference is kept by the task, so it's safe
 		 * to use the objcg by the current task.
 		 */
-		return objcg;
+		return objcg ? : root_obj_cgroup;
 	}
 
 	memcg = this_cpu_read(int_active_memcg);
 	if (unlikely(memcg))
 		goto from_memcg;
 
-	return NULL;
+	return root_obj_cgroup;
 
 from_memcg:
-	objcg = NULL;
-	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
+	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		/*
 		 * Memcg pointer is protected by scope (see set_active_memcg())
 		 * and is pinning the corresponding objcg, so objcg can't go
@@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
 		 */
 		objcg = rcu_dereference_check(memcg->objcg, 1);
 		if (likely(objcg))
-			break;
+			return objcg;
 	}
 
-	return objcg;
+	return root_obj_cgroup;
 }
 
 struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
@@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
 		objcg = __folio_objcg(folio);
 		obj_cgroup_get(objcg);
 	} else {
-		struct mem_cgroup *memcg;
-
 		rcu_read_lock();
-		memcg = __folio_memcg(folio);
-		if (memcg)
-			objcg = __get_obj_cgroup_from_memcg(memcg);
-		else
-			objcg = NULL;
+		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
 		rcu_read_unlock();
 	}
 	return objcg;
@@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
 	int ret = 0;
 
 	objcg = current_obj_cgroup();
-	if (objcg) {
+	if (objcg && !obj_cgroup_is_root(objcg)) {
 		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
 		if (!ret) {
 			obj_cgroup_get(objcg);
@@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 	 * obj_cgroup_get() is used to get a permanent reference.
 	 */
 	objcg = current_obj_cgroup();
-	if (!objcg)
+	if (!objcg || obj_cgroup_is_root(objcg))
 		return true;
 
 	/*
@@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	if (!objcg)
 		goto free_shrinker;
 
+	if (unlikely(mem_cgroup_is_root(memcg)))
+		root_obj_cgroup = objcg;
+
 	objcg->memcg = memcg;
 	rcu_assign_pointer(memcg->objcg, objcg);
 	obj_cgroup_get(objcg);
@@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return;
 
+	if (obj_cgroup_is_root(objcg))
+		return;
+
 	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
 
 	/* PF_MEMALLOC context, charging must succeed */
@@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
 		return;
 
+	if (obj_cgroup_is_root(objcg))
+		return;
+
 	obj_cgroup_uncharge(objcg, size);
 
 	rcu_read_lock();
diff --git a/mm/percpu.c b/mm/percpu.c
index 81462ce5866e1..5c1a9b77d6b93 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
 		return true;
 
 	objcg = current_obj_cgroup();
-	if (!objcg)
+	if (!objcg || obj_cgroup_is_root(objcg))
 		return true;
 
 	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
-- 
2.20.1
Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Chen Ridong 1 month, 2 weeks ago

On 2025/12/17 15:27, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
> 
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  include/linux/memcontrol.h | 26 +++++++++++++++++-----
>  mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
>  mm/percpu.c                |  2 +-
>  3 files changed, 45 insertions(+), 28 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 288dd6337f80f..776d9be1f446a 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -332,6 +332,7 @@ struct mem_cgroup {
>  #define MEMCG_CHARGE_BATCH 64U
>  
>  extern struct mem_cgroup *root_mem_cgroup;
> +extern struct obj_cgroup *root_obj_cgroup;
>  
>  enum page_memcg_data_flags {
>  	/* page->memcg_data is a pointer to an slabobj_ext vector */
> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>  	return (memcg == root_mem_cgroup);
>  }
>  
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> +	return objcg == root_obj_cgroup;
> +}
> +
>  static inline bool mem_cgroup_disabled(void)
>  {
>  	return !cgroup_subsys_enabled(memory_cgrp_subsys);
> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>  
>  static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>  {
> +	if (obj_cgroup_is_root(objcg))
> +		return true;
>  	return percpu_ref_tryget(&objcg->refcnt);
>  }
>  
> -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> +				       unsigned long nr)
>  {
> -	percpu_ref_get(&objcg->refcnt);
> +	if (!obj_cgroup_is_root(objcg))
> +		percpu_ref_get_many(&objcg->refcnt, nr);
>  }
>  
> -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> -				       unsigned long nr)
> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>  {
> -	percpu_ref_get_many(&objcg->refcnt, nr);
> +	obj_cgroup_get_many(objcg, 1);
>  }
>  
>  static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>  {
> -	if (objcg)
> +	if (objcg && !obj_cgroup_is_root(objcg))
>  		percpu_ref_put(&objcg->refcnt);
>  }
>  
> @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>  	return true;
>  }
>  
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> +	return true;
> +}
> +
>  static inline bool mem_cgroup_disabled(void)
>  {
>  	return true;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 544b3200db12d..21b5aad34cae7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
>  struct mem_cgroup *root_mem_cgroup __read_mostly;
>  EXPORT_SYMBOL(root_mem_cgroup);
>  
> +struct obj_cgroup *root_obj_cgroup __read_mostly;
> +
>  /* Active memory cgroup to use from an interrupt context */
>  DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>  EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>  
>  static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>  {
> -	struct obj_cgroup *objcg = NULL;
> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> +		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>  
> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> -		objcg = rcu_dereference(memcg->objcg);
>  		if (likely(objcg && obj_cgroup_tryget(objcg)))
> -			break;
> -		objcg = NULL;
> +			return objcg;
>  	}
> -	return objcg;
> +
> +	return NULL;
>  }
>  
>  static struct obj_cgroup *current_objcg_update(void)
> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>  		 * Objcg reference is kept by the task, so it's safe
>  		 * to use the objcg by the current task.
>  		 */
> -		return objcg;
> +		return objcg ? : root_obj_cgroup;
>  	}
>  
>  	memcg = this_cpu_read(int_active_memcg);
>  	if (unlikely(memcg))
>  		goto from_memcg;
>  
> -	return NULL;
> +	return root_obj_cgroup;
>  
>  from_memcg:
> -	objcg = NULL;
> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>  		/*
>  		 * Memcg pointer is protected by scope (see set_active_memcg())
>  		 * and is pinning the corresponding objcg, so objcg can't go
> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>  		 */
>  		objcg = rcu_dereference_check(memcg->objcg, 1);
>  		if (likely(objcg))
> -			break;
> +			return objcg;
>  	}
>  
> -	return objcg;
> +	return root_obj_cgroup;
>  }
>  
>  struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>  		objcg = __folio_objcg(folio);
>  		obj_cgroup_get(objcg);
>  	} else {
> -		struct mem_cgroup *memcg;
> -
>  		rcu_read_lock();
> -		memcg = __folio_memcg(folio);
> -		if (memcg)
> -			objcg = __get_obj_cgroup_from_memcg(memcg);
> -		else
> -			objcg = NULL;
> +		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>  		rcu_read_unlock();
>  	}
>  	return objcg;
> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>  	int ret = 0;
>  
>  	objcg = current_obj_cgroup();
> -	if (objcg) {
> +	if (objcg && !obj_cgroup_is_root(objcg)) {
>  		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>  		if (!ret) {
>  			obj_cgroup_get(objcg);
> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>  	 * obj_cgroup_get() is used to get a permanent reference.
>  	 */
>  	objcg = current_obj_cgroup();
> -	if (!objcg)
> +	if (!objcg || obj_cgroup_is_root(objcg))
>  		return true;
>  
>  	/*
> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>  	if (!objcg)
>  		goto free_shrinker;
>  
> +	if (unlikely(mem_cgroup_is_root(memcg)))
> +		root_obj_cgroup = objcg;
> +
>  	objcg->memcg = memcg;
>  	rcu_assign_pointer(memcg->objcg, objcg);
>  	obj_cgroup_get(objcg);
> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>  	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return;
>  
> +	if (obj_cgroup_is_root(objcg))
> +		return;
> +
>  	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>  
>  	/* PF_MEMALLOC context, charging must succeed */
> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>  	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>  		return;
>  
> +	if (obj_cgroup_is_root(objcg))
> +		return;
> +
>  	obj_cgroup_uncharge(objcg, size);
>  

If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?

>  	rcu_read_lock();
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 81462ce5866e1..5c1a9b77d6b93 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>  		return true;
>  
>  	objcg = current_obj_cgroup();
> -	if (!objcg)
> +	if (!objcg || obj_cgroup_is_root(objcg))
>  		return true;
>  
>  	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))

-- 
Best regards,
Ridong
Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Muchun Song 1 month, 2 weeks ago

On 2025/12/26 09:03, Chen Ridong wrote:
>
> On 2025/12/17 15:27, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@bytedance.com>
>>
>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>> even for the root memory cgroup. In contrast, the situation for
>> object cgroups has been different.
>>
>> Previously, the root object cgroup couldn't be returned because
>> it didn't exist. Now that a valid root object cgroup exists, for
>> the sake of consistency, it's necessary to align the behavior of
>> object-cgroup-related operations with that of memory cgroup APIs.
>>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   include/linux/memcontrol.h | 26 +++++++++++++++++-----
>>   mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
>>   mm/percpu.c                |  2 +-
>>   3 files changed, 45 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 288dd6337f80f..776d9be1f446a 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>   #define MEMCG_CHARGE_BATCH 64U
>>   
>>   extern struct mem_cgroup *root_mem_cgroup;
>> +extern struct obj_cgroup *root_obj_cgroup;
>>   
>>   enum page_memcg_data_flags {
>>   	/* page->memcg_data is a pointer to an slabobj_ext vector */
>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>   	return (memcg == root_mem_cgroup);
>>   }
>>   
>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> +{
>> +	return objcg == root_obj_cgroup;
>> +}
>> +
>>   static inline bool mem_cgroup_disabled(void)
>>   {
>>   	return !cgroup_subsys_enabled(memory_cgrp_subsys);
>> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>>   
>>   static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>>   {
>> +	if (obj_cgroup_is_root(objcg))
>> +		return true;
>>   	return percpu_ref_tryget(&objcg->refcnt);
>>   }
>>   
>> -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>> +				       unsigned long nr)
>>   {
>> -	percpu_ref_get(&objcg->refcnt);
>> +	if (!obj_cgroup_is_root(objcg))
>> +		percpu_ref_get_many(&objcg->refcnt, nr);
>>   }
>>   
>> -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>> -				       unsigned long nr)
>> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>   {
>> -	percpu_ref_get_many(&objcg->refcnt, nr);
>> +	obj_cgroup_get_many(objcg, 1);
>>   }
>>   
>>   static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>>   {
>> -	if (objcg)
>> +	if (objcg && !obj_cgroup_is_root(objcg))
>>   		percpu_ref_put(&objcg->refcnt);
>>   }
>>   
>> @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>   	return true;
>>   }
>>   
>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> +{
>> +	return true;
>> +}
>> +
>>   static inline bool mem_cgroup_disabled(void)
>>   {
>>   	return true;
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 544b3200db12d..21b5aad34cae7 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
>>   struct mem_cgroup *root_mem_cgroup __read_mostly;
>>   EXPORT_SYMBOL(root_mem_cgroup);
>>   
>> +struct obj_cgroup *root_obj_cgroup __read_mostly;
>> +
>>   /* Active memory cgroup to use from an interrupt context */
>>   DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>>   EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
>> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>>   
>>   static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>>   {
>> -	struct obj_cgroup *objcg = NULL;
>> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>> +		struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>>   
>> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>> -		objcg = rcu_dereference(memcg->objcg);
>>   		if (likely(objcg && obj_cgroup_tryget(objcg)))
>> -			break;
>> -		objcg = NULL;
>> +			return objcg;
>>   	}
>> -	return objcg;
>> +
>> +	return NULL;
>>   }
>>   
>>   static struct obj_cgroup *current_objcg_update(void)
>> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>   		 * Objcg reference is kept by the task, so it's safe
>>   		 * to use the objcg by the current task.
>>   		 */
>> -		return objcg;
>> +		return objcg ? : root_obj_cgroup;
>>   	}
>>   
>>   	memcg = this_cpu_read(int_active_memcg);
>>   	if (unlikely(memcg))
>>   		goto from_memcg;
>>   
>> -	return NULL;
>> +	return root_obj_cgroup;
>>   
>>   from_memcg:
>> -	objcg = NULL;
>> -	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>> +	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>   		/*
>>   		 * Memcg pointer is protected by scope (see set_active_memcg())
>>   		 * and is pinning the corresponding objcg, so objcg can't go
>> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>   		 */
>>   		objcg = rcu_dereference_check(memcg->objcg, 1);
>>   		if (likely(objcg))
>> -			break;
>> +			return objcg;
>>   	}
>>   
>> -	return objcg;
>> +	return root_obj_cgroup;
>>   }
>>   
>>   struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>   		objcg = __folio_objcg(folio);
>>   		obj_cgroup_get(objcg);
>>   	} else {
>> -		struct mem_cgroup *memcg;
>> -
>>   		rcu_read_lock();
>> -		memcg = __folio_memcg(folio);
>> -		if (memcg)
>> -			objcg = __get_obj_cgroup_from_memcg(memcg);
>> -		else
>> -			objcg = NULL;
>> +		objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>>   		rcu_read_unlock();
>>   	}
>>   	return objcg;
>> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>   	int ret = 0;
>>   
>>   	objcg = current_obj_cgroup();
>> -	if (objcg) {
>> +	if (objcg && !obj_cgroup_is_root(objcg)) {
>>   		ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>>   		if (!ret) {
>>   			obj_cgroup_get(objcg);
>> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>>   	 * obj_cgroup_get() is used to get a permanent reference.
>>   	 */
>>   	objcg = current_obj_cgroup();
>> -	if (!objcg)
>> +	if (!objcg || obj_cgroup_is_root(objcg))
>>   		return true;
>>   
>>   	/*
>> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>   	if (!objcg)
>>   		goto free_shrinker;
>>   
>> +	if (unlikely(mem_cgroup_is_root(memcg)))
>> +		root_obj_cgroup = objcg;
>> +
>>   	objcg->memcg = memcg;
>>   	rcu_assign_pointer(memcg->objcg, objcg);
>>   	obj_cgroup_get(objcg);
>> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>>   	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>   		return;
>>   
>> +	if (obj_cgroup_is_root(objcg))
>> +		return;
>> +
>>   	VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>>   
>>   	/* PF_MEMALLOC context, charging must succeed */
>> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>>   	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>   		return;
>>   
>> +	if (obj_cgroup_is_root(objcg))
>> +		return;
>> +
>>   	obj_cgroup_uncharge(objcg, size);
>>   
> If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
> remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
> cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
> this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?

I'm not sure I fully understand the problem—how could this happen, given 
that
obj_cgroup_charge_zswap also checks for the root objcg?

Muchun,
Thanks.
>
>>   	rcu_read_lock();
>> diff --git a/mm/percpu.c b/mm/percpu.c
>> index 81462ce5866e1..5c1a9b77d6b93 100644
>> --- a/mm/percpu.c
>> +++ b/mm/percpu.c
>> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>>   		return true;
>>   
>>   	objcg = current_obj_cgroup();
>> -	if (!objcg)
>> +	if (!objcg || obj_cgroup_is_root(objcg))
>>   		return true;
>>   
>>   	if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))

Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Chen Ridong 1 month, 2 weeks ago

On 2025/12/26 11:10, Muchun Song wrote:
> 
> 
> On 2025/12/26 09:03, Chen Ridong wrote:
>>
>> On 2025/12/17 15:27, Qi Zheng wrote:
>>> From: Muchun Song <songmuchun@bytedance.com>
>>>
>>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>>> even for the root memory cgroup. In contrast, the situation for
>>> object cgroups has been different.
>>>
>>> Previously, the root object cgroup couldn't be returned because
>>> it didn't exist. Now that a valid root object cgroup exists, for
>>> the sake of consistency, it's necessary to align the behavior of
>>> object-cgroup-related operations with that of memory cgroup APIs.
>>>
>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>>   include/linux/memcontrol.h | 26 +++++++++++++++++-----
>>>   mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
>>>   mm/percpu.c                |  2 +-
>>>   3 files changed, 45 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>> index 288dd6337f80f..776d9be1f446a 100644
>>> --- a/include/linux/memcontrol.h
>>> +++ b/include/linux/memcontrol.h
>>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>>   #define MEMCG_CHARGE_BATCH 64U
>>>     extern struct mem_cgroup *root_mem_cgroup;
>>> +extern struct obj_cgroup *root_obj_cgroup;
>>>     enum page_memcg_data_flags {
>>>       /* page->memcg_data is a pointer to an slabobj_ext vector */
>>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>       return (memcg == root_mem_cgroup);
>>>   }
>>>   +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>> +{
>>> +    return objcg == root_obj_cgroup;
>>> +}
>>> +
>>>   static inline bool mem_cgroup_disabled(void)
>>>   {
>>>       return !cgroup_subsys_enabled(memory_cgrp_subsys);
>>> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>>>     static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>>>   {
>>> +    if (obj_cgroup_is_root(objcg))
>>> +        return true;
>>>       return percpu_ref_tryget(&objcg->refcnt);
>>>   }
>>>   -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>>> +                       unsigned long nr)
>>>   {
>>> -    percpu_ref_get(&objcg->refcnt);
>>> +    if (!obj_cgroup_is_root(objcg))
>>> +        percpu_ref_get_many(&objcg->refcnt, nr);
>>>   }
>>>   -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>>> -                       unsigned long nr)
>>> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>>   {
>>> -    percpu_ref_get_many(&objcg->refcnt, nr);
>>> +    obj_cgroup_get_many(objcg, 1);
>>>   }
>>>     static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>>>   {
>>> -    if (objcg)
>>> +    if (objcg && !obj_cgroup_is_root(objcg))
>>>           percpu_ref_put(&objcg->refcnt);
>>>   }
>>>   @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>       return true;
>>>   }
>>>   +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>> +{
>>> +    return true;
>>> +}
>>> +
>>>   static inline bool mem_cgroup_disabled(void)
>>>   {
>>>       return true;
>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>> index 544b3200db12d..21b5aad34cae7 100644
>>> --- a/mm/memcontrol.c
>>> +++ b/mm/memcontrol.c
>>> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
>>>   struct mem_cgroup *root_mem_cgroup __read_mostly;
>>>   EXPORT_SYMBOL(root_mem_cgroup);
>>>   +struct obj_cgroup *root_obj_cgroup __read_mostly;
>>> +
>>>   /* Active memory cgroup to use from an interrupt context */
>>>   DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>>>   EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
>>> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>>>     static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>>>   {
>>> -    struct obj_cgroup *objcg = NULL;
>>> +    for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>> +        struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>>>   -    for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>>> -        objcg = rcu_dereference(memcg->objcg);
>>>           if (likely(objcg && obj_cgroup_tryget(objcg)))
>>> -            break;
>>> -        objcg = NULL;
>>> +            return objcg;
>>>       }
>>> -    return objcg;
>>> +
>>> +    return NULL;
>>>   }
>>>     static struct obj_cgroup *current_objcg_update(void)
>>> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>>            * Objcg reference is kept by the task, so it's safe
>>>            * to use the objcg by the current task.
>>>            */
>>> -        return objcg;
>>> +        return objcg ? : root_obj_cgroup;
>>>       }
>>>         memcg = this_cpu_read(int_active_memcg);
>>>       if (unlikely(memcg))
>>>           goto from_memcg;
>>>   -    return NULL;
>>> +    return root_obj_cgroup;
>>>     from_memcg:
>>> -    objcg = NULL;
>>> -    for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>>> +    for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>>           /*
>>>            * Memcg pointer is protected by scope (see set_active_memcg())
>>>            * and is pinning the corresponding objcg, so objcg can't go
>>> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>>            */
>>>           objcg = rcu_dereference_check(memcg->objcg, 1);
>>>           if (likely(objcg))
>>> -            break;
>>> +            return objcg;
>>>       }
>>>   -    return objcg;
>>> +    return root_obj_cgroup;
>>>   }
>>>     struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>>           objcg = __folio_objcg(folio);
>>>           obj_cgroup_get(objcg);
>>>       } else {
>>> -        struct mem_cgroup *memcg;
>>> -
>>>           rcu_read_lock();
>>> -        memcg = __folio_memcg(folio);
>>> -        if (memcg)
>>> -            objcg = __get_obj_cgroup_from_memcg(memcg);
>>> -        else
>>> -            objcg = NULL;
>>> +        objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>>>           rcu_read_unlock();
>>>       }
>>>       return objcg;
>>> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>>       int ret = 0;
>>>         objcg = current_obj_cgroup();
>>> -    if (objcg) {
>>> +    if (objcg && !obj_cgroup_is_root(objcg)) {
>>>           ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>>>           if (!ret) {
>>>               obj_cgroup_get(objcg);
>>> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>>>        * obj_cgroup_get() is used to get a permanent reference.
>>>        */
>>>       objcg = current_obj_cgroup();
>>> -    if (!objcg)
>>> +    if (!objcg || obj_cgroup_is_root(objcg))
>>>           return true;
>>>         /*
>>> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>>       if (!objcg)
>>>           goto free_shrinker;
>>>   +    if (unlikely(mem_cgroup_is_root(memcg)))
>>> +        root_obj_cgroup = objcg;
>>> +
>>>       objcg->memcg = memcg;
>>>       rcu_assign_pointer(memcg->objcg, objcg);
>>>       obj_cgroup_get(objcg);
>>> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>>>       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>>           return;
>>>   +    if (obj_cgroup_is_root(objcg))
>>> +        return;
>>> +
>>>       VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>>>         /* PF_MEMALLOC context, charging must succeed */
>>> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>>>       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>>           return;
>>>   +    if (obj_cgroup_is_root(objcg))
>>> +        return;
>>> +
>>>       obj_cgroup_uncharge(objcg, size);
>>>   
>> If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
>> remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
>> cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
>> this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?
> 
> I'm not sure I fully understand the problem—how could this happen, given that
> obj_cgroup_charge_zswap also checks for the root objcg?
> 
> Muchun,
> Thanks.

That is:

1. memcg A is under the root memcg.
2. obj_cgroup_charge_zswap charges memcg A.
3. After rmdir A, the obj of A is reparented to the root memcg.
4. obj_cgroup_uncharge_zswap does nothing and returns, since the object is now associated with the root.

Thus, the root memcg will miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED, correct? Or am I
missing something?

>>
>>>       rcu_read_lock();
>>> diff --git a/mm/percpu.c b/mm/percpu.c
>>> index 81462ce5866e1..5c1a9b77d6b93 100644
>>> --- a/mm/percpu.c
>>> +++ b/mm/percpu.c
>>> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>>>           return true;
>>>         objcg = current_obj_cgroup();
>>> -    if (!objcg)
>>> +    if (!objcg || obj_cgroup_is_root(objcg))
>>>           return true;
>>>         if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
> 

-- 
Best regards,
Ridong

Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Chen Ridong 1 month, 2 weeks ago

On 2025/12/26 11:50, Chen Ridong wrote:
> 
> 
> On 2025/12/26 11:10, Muchun Song wrote:
>>
>>
>> On 2025/12/26 09:03, Chen Ridong wrote:
>>>
>>> On 2025/12/17 15:27, Qi Zheng wrote:
>>>> From: Muchun Song <songmuchun@bytedance.com>
>>>>
>>>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>>>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>>>> even for the root memory cgroup. In contrast, the situation for
>>>> object cgroups has been different.
>>>>
>>>> Previously, the root object cgroup couldn't be returned because
>>>> it didn't exist. Now that a valid root object cgroup exists, for
>>>> the sake of consistency, it's necessary to align the behavior of
>>>> object-cgroup-related operations with that of memory cgroup APIs.
>>>>
>>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>>   include/linux/memcontrol.h | 26 +++++++++++++++++-----
>>>>   mm/memcontrol.c            | 45 ++++++++++++++++++++------------------
>>>>   mm/percpu.c                |  2 +-
>>>>   3 files changed, 45 insertions(+), 28 deletions(-)
>>>>
>>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>>> index 288dd6337f80f..776d9be1f446a 100644
>>>> --- a/include/linux/memcontrol.h
>>>> +++ b/include/linux/memcontrol.h
>>>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>>>   #define MEMCG_CHARGE_BATCH 64U
>>>>     extern struct mem_cgroup *root_mem_cgroup;
>>>> +extern struct obj_cgroup *root_obj_cgroup;
>>>>     enum page_memcg_data_flags {
>>>>       /* page->memcg_data is a pointer to an slabobj_ext vector */
>>>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>>       return (memcg == root_mem_cgroup);
>>>>   }
>>>>   +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>>> +{
>>>> +    return objcg == root_obj_cgroup;
>>>> +}
>>>> +
>>>>   static inline bool mem_cgroup_disabled(void)
>>>>   {
>>>>       return !cgroup_subsys_enabled(memory_cgrp_subsys);
>>>> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>>>>     static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
>>>>   {
>>>> +    if (obj_cgroup_is_root(objcg))
>>>> +        return true;
>>>>       return percpu_ref_tryget(&objcg->refcnt);
>>>>   }
>>>>   -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>>> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>>>> +                       unsigned long nr)
>>>>   {
>>>> -    percpu_ref_get(&objcg->refcnt);
>>>> +    if (!obj_cgroup_is_root(objcg))
>>>> +        percpu_ref_get_many(&objcg->refcnt, nr);
>>>>   }
>>>>   -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
>>>> -                       unsigned long nr)
>>>> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
>>>>   {
>>>> -    percpu_ref_get_many(&objcg->refcnt, nr);
>>>> +    obj_cgroup_get_many(objcg, 1);
>>>>   }
>>>>     static inline void obj_cgroup_put(struct obj_cgroup *objcg)
>>>>   {
>>>> -    if (objcg)
>>>> +    if (objcg && !obj_cgroup_is_root(objcg))
>>>>           percpu_ref_put(&objcg->refcnt);
>>>>   }
>>>>   @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>>       return true;
>>>>   }
>>>>   +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>>> +{
>>>> +    return true;
>>>> +}
>>>> +
>>>>   static inline bool mem_cgroup_disabled(void)
>>>>   {
>>>>       return true;
>>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>>> index 544b3200db12d..21b5aad34cae7 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
>>>>   struct mem_cgroup *root_mem_cgroup __read_mostly;
>>>>   EXPORT_SYMBOL(root_mem_cgroup);
>>>>   +struct obj_cgroup *root_obj_cgroup __read_mostly;
>>>> +
>>>>   /* Active memory cgroup to use from an interrupt context */
>>>>   DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>>>>   EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
>>>> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>>>>     static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
>>>>   {
>>>> -    struct obj_cgroup *objcg = NULL;
>>>> +    for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>>> +        struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>>>>   -    for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>>>> -        objcg = rcu_dereference(memcg->objcg);
>>>>           if (likely(objcg && obj_cgroup_tryget(objcg)))
>>>> -            break;
>>>> -        objcg = NULL;
>>>> +            return objcg;
>>>>       }
>>>> -    return objcg;
>>>> +
>>>> +    return NULL;
>>>>   }
>>>>     static struct obj_cgroup *current_objcg_update(void)
>>>> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>>>            * Objcg reference is kept by the task, so it's safe
>>>>            * to use the objcg by the current task.
>>>>            */
>>>> -        return objcg;
>>>> +        return objcg ? : root_obj_cgroup;
>>>>       }
>>>>         memcg = this_cpu_read(int_active_memcg);
>>>>       if (unlikely(memcg))
>>>>           goto from_memcg;
>>>>   -    return NULL;
>>>> +    return root_obj_cgroup;
>>>>     from_memcg:
>>>> -    objcg = NULL;
>>>> -    for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
>>>> +    for (; memcg; memcg = parent_mem_cgroup(memcg)) {
>>>>           /*
>>>>            * Memcg pointer is protected by scope (see set_active_memcg())
>>>>            * and is pinning the corresponding objcg, so objcg can't go
>>>> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
>>>>            */
>>>>           objcg = rcu_dereference_check(memcg->objcg, 1);
>>>>           if (likely(objcg))
>>>> -            break;
>>>> +            return objcg;
>>>>       }
>>>>   -    return objcg;
>>>> +    return root_obj_cgroup;
>>>>   }
>>>>     struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>>> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
>>>>           objcg = __folio_objcg(folio);
>>>>           obj_cgroup_get(objcg);
>>>>       } else {
>>>> -        struct mem_cgroup *memcg;
>>>> -
>>>>           rcu_read_lock();
>>>> -        memcg = __folio_memcg(folio);
>>>> -        if (memcg)
>>>> -            objcg = __get_obj_cgroup_from_memcg(memcg);
>>>> -        else
>>>> -            objcg = NULL;
>>>> +        objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
>>>>           rcu_read_unlock();
>>>>       }
>>>>       return objcg;
>>>> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>>>       int ret = 0;
>>>>         objcg = current_obj_cgroup();
>>>> -    if (objcg) {
>>>> +    if (objcg && !obj_cgroup_is_root(objcg)) {
>>>>           ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
>>>>           if (!ret) {
>>>>               obj_cgroup_get(objcg);
>>>> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
>>>>        * obj_cgroup_get() is used to get a permanent reference.
>>>>        */
>>>>       objcg = current_obj_cgroup();
>>>> -    if (!objcg)
>>>> +    if (!objcg || obj_cgroup_is_root(objcg))
>>>>           return true;
>>>>         /*
>>>> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
>>>>       if (!objcg)
>>>>           goto free_shrinker;
>>>>   +    if (unlikely(mem_cgroup_is_root(memcg)))
>>>> +        root_obj_cgroup = objcg;
>>>> +
>>>>       objcg->memcg = memcg;
>>>>       rcu_assign_pointer(memcg->objcg, objcg);
>>>>       obj_cgroup_get(objcg);
>>>> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>>>>       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>>>           return;
>>>>   +    if (obj_cgroup_is_root(objcg))
>>>> +        return;
>>>> +
>>>>       VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>>>>         /* PF_MEMALLOC context, charging must succeed */
>>>> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>>>>       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
>>>>           return;
>>>>   +    if (obj_cgroup_is_root(objcg))
>>>> +        return;
>>>> +
>>>>       obj_cgroup_uncharge(objcg, size);
>>>>   
>>> If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
>>> remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
>>> cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
>>> this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?
>>
>> I'm not sure I fully understand the problem—how could this happen, given that
>> obj_cgroup_charge_zswap also checks for the root objcg?
>>
>> Muchun,
>> Thanks.
> 
> That is:
> 
> 1. memcg A is under the root memcg.
> 2. obj_cgroup_charge_zswap charges memcg A.
> 3. After rmdir A, the obj of A is reparented to the root memcg.
> 4. obj_cgroup_uncharge_zswap does nothing and returns, since the object is now associated with the root.
> 
> Thus, the root memcg will miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED, correct? Or am I
> missing something?
> 

Oh, I understand now — please ignore my previous question. Apologies for the noise.

Even after reparenting, the object from A should not be treated as a root object.

>>>
>>>>       rcu_read_lock();
>>>> diff --git a/mm/percpu.c b/mm/percpu.c
>>>> index 81462ce5866e1..5c1a9b77d6b93 100644
>>>> --- a/mm/percpu.c
>>>> +++ b/mm/percpu.c
>>>> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
>>>>           return true;
>>>>         objcg = current_obj_cgroup();
>>>> -    if (!objcg)
>>>> +    if (!objcg || obj_cgroup_is_root(objcg))
>>>>           return true;
>>>>         if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
>>
> 

-- 
Best regards,
Ridong

Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Shakeel Butt 1 month, 3 weeks ago
On Wed, Dec 17, 2025 at 03:27:31PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
> 
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Posted by Johannes Weiner 1 month, 3 weeks ago
On Wed, Dec 17, 2025 at 03:27:31PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
> 
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>