From: Muchun Song <songmuchun@bytedance.com>
Memory cgroup functions such as get_mem_cgroup_from_folio() and
get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
even for the root memory cgroup. In contrast, the situation for
object cgroups has been different.
Previously, the root object cgroup couldn't be returned because
it didn't exist. Now that a valid root object cgroup exists, for
the sake of consistency, it's necessary to align the behavior of
object-cgroup-related operations with that of memory cgroup APIs.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
include/linux/memcontrol.h | 29 +++++++++++++++++-------
mm/memcontrol.c | 45 ++++++++++++++++++++------------------
mm/percpu.c | 2 +-
3 files changed, 46 insertions(+), 30 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6185d8399a54e..9fdbd4970021d 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -332,6 +332,7 @@ struct mem_cgroup {
#define MEMCG_CHARGE_BATCH 64U
extern struct mem_cgroup *root_mem_cgroup;
+extern struct obj_cgroup *root_obj_cgroup;
enum page_memcg_data_flags {
/* page->memcg_data is a pointer to an slabobj_ext vector */
@@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
return (memcg == root_mem_cgroup);
}
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+ return objcg == root_obj_cgroup;
+}
+
static inline bool mem_cgroup_disabled(void)
{
return !cgroup_subsys_enabled(memory_cgrp_subsys);
@@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
{
+ if (obj_cgroup_is_root(objcg))
+ return true;
return percpu_ref_tryget(&objcg->refcnt);
}
-static inline void obj_cgroup_get(struct obj_cgroup *objcg)
+static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
+ unsigned long nr)
{
- percpu_ref_get(&objcg->refcnt);
+ if (!obj_cgroup_is_root(objcg))
+ percpu_ref_get_many(&objcg->refcnt, nr);
}
-static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
- unsigned long nr)
+static inline void obj_cgroup_get(struct obj_cgroup *objcg)
{
- percpu_ref_get_many(&objcg->refcnt, nr);
+ obj_cgroup_get_many(objcg, 1);
}
static inline void obj_cgroup_put(struct obj_cgroup *objcg)
{
- if (objcg)
+ if (objcg && !obj_cgroup_is_root(objcg))
percpu_ref_put(&objcg->refcnt);
}
@@ -1094,6 +1103,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
return true;
}
+static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
+{
+ return true;
+}
+
static inline bool mem_cgroup_disabled(void)
{
return true;
@@ -1710,8 +1724,7 @@ static inline struct obj_cgroup *get_obj_cgroup_from_current(void)
{
struct obj_cgroup *objcg = current_obj_cgroup();
- if (objcg)
- obj_cgroup_get(objcg);
+ obj_cgroup_get(objcg);
return objcg;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2afd7f99ca101..d484b632c790f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
struct mem_cgroup *root_mem_cgroup __read_mostly;
EXPORT_SYMBOL(root_mem_cgroup);
+struct obj_cgroup *root_obj_cgroup __read_mostly;
+
/* Active memory cgroup to use from an interrupt context */
DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
@@ -2642,15 +2644,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
{
- struct obj_cgroup *objcg = NULL;
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
+ struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
- for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
- objcg = rcu_dereference(memcg->objcg);
if (likely(objcg && obj_cgroup_tryget(objcg)))
- break;
- objcg = NULL;
+ return objcg;
}
- return objcg;
+
+ return NULL;
}
static struct obj_cgroup *current_objcg_update(void)
@@ -2724,18 +2725,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
* Objcg reference is kept by the task, so it's safe
* to use the objcg by the current task.
*/
- return objcg;
+ return objcg ? : root_obj_cgroup;
}
memcg = this_cpu_read(int_active_memcg);
if (unlikely(memcg))
goto from_memcg;
- return NULL;
+ return root_obj_cgroup;
from_memcg:
- objcg = NULL;
- for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
+ for (; memcg; memcg = parent_mem_cgroup(memcg)) {
/*
* Memcg pointer is protected by scope (see set_active_memcg())
* and is pinning the corresponding objcg, so objcg can't go
@@ -2744,10 +2744,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
*/
objcg = rcu_dereference_check(memcg->objcg, 1);
if (likely(objcg))
- break;
+ return objcg;
}
- return objcg;
+ return root_obj_cgroup;
}
struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
@@ -2761,14 +2761,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
objcg = __folio_objcg(folio);
obj_cgroup_get(objcg);
} else {
- struct mem_cgroup *memcg;
-
rcu_read_lock();
- memcg = __folio_memcg(folio);
- if (memcg)
- objcg = __get_obj_cgroup_from_memcg(memcg);
- else
- objcg = NULL;
+ objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
rcu_read_unlock();
}
return objcg;
@@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
int ret = 0;
objcg = current_obj_cgroup();
- if (objcg) {
+ if (!obj_cgroup_is_root(objcg)) {
ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
if (!ret) {
obj_cgroup_get(objcg);
@@ -3172,7 +3166,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
* obj_cgroup_get() is used to get a permanent reference.
*/
objcg = current_obj_cgroup();
- if (!objcg)
+ if (obj_cgroup_is_root(objcg))
return true;
/*
@@ -3859,6 +3853,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (!objcg)
goto free_shrinker;
+ if (unlikely(mem_cgroup_is_root(memcg)))
+ root_obj_cgroup = objcg;
+
objcg->memcg = memcg;
rcu_assign_pointer(memcg->objcg, objcg);
obj_cgroup_get(objcg);
@@ -5479,6 +5476,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
+ if (obj_cgroup_is_root(objcg))
+ return;
+
VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
/* PF_MEMALLOC context, charging must succeed */
@@ -5506,6 +5506,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return;
+ if (obj_cgroup_is_root(objcg))
+ return;
+
obj_cgroup_uncharge(objcg, size);
rcu_read_lock();
diff --git a/mm/percpu.c b/mm/percpu.c
index 81462ce5866e1..78bdffe1fcb57 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
return true;
objcg = current_obj_cgroup();
- if (!objcg)
+ if (obj_cgroup_is_root(objcg))
return true;
if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
--
2.20.1
On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
>
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
>
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
> include/linux/memcontrol.h | 29 +++++++++++++++++-------
> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
> mm/percpu.c | 2 +-
> 3 files changed, 46 insertions(+), 30 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 6185d8399a54e..9fdbd4970021d 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -332,6 +332,7 @@ struct mem_cgroup {
> #define MEMCG_CHARGE_BATCH 64U
>
> extern struct mem_cgroup *root_mem_cgroup;
> +extern struct obj_cgroup *root_obj_cgroup;
>
> enum page_memcg_data_flags {
> /* page->memcg_data is a pointer to an slabobj_ext vector */
> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> return (memcg == root_mem_cgroup);
> }
>
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> + return objcg == root_obj_cgroup;
> +}
After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
while objcg != root_obj_cgroup. Should they be considered as
root objcgs?
> static inline bool mem_cgroup_disabled(void)
> {
> return !cgroup_subsys_enabled(memory_cgrp_subsys);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 2afd7f99ca101..d484b632c790f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> int ret = 0;
>
> objcg = current_obj_cgroup();
> - if (objcg) {
> + if (!obj_cgroup_is_root(objcg)) {
Now that we support the page and slab allocators support allocating memory
in NMI contexts (on some archs), current_obj_cgroup() can return NULL
if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
(then it leads to a NULL-pointer-deref bug).
But IIUC this is applied to kmem charging only (as they use this_cpu ops
for stats update), and we don't have to apply the same restriction to
charging LRU pages with objcg.
Maybe Shakeel has more insight on this.
Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
--
Cheers,
Harry / Hyeonggon
Hi Harry,
On 11/17/25 5:17 PM, Harry Yoo wrote:
> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@bytedance.com>
>>
>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>> even for the root memory cgroup. In contrast, the situation for
>> object cgroups has been different.
>>
>> Previously, the root object cgroup couldn't be returned because
>> it didn't exist. Now that a valid root object cgroup exists, for
>> the sake of consistency, it's necessary to align the behavior of
>> object-cgroup-related operations with that of memory cgroup APIs.
>>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>> include/linux/memcontrol.h | 29 +++++++++++++++++-------
>> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
>> mm/percpu.c | 2 +-
>> 3 files changed, 46 insertions(+), 30 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 6185d8399a54e..9fdbd4970021d 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>> #define MEMCG_CHARGE_BATCH 64U
>>
>> extern struct mem_cgroup *root_mem_cgroup;
>> +extern struct obj_cgroup *root_obj_cgroup;
>>
>> enum page_memcg_data_flags {
>> /* page->memcg_data is a pointer to an slabobj_ext vector */
>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>> return (memcg == root_mem_cgroup);
>> }
>>
>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> +{
>> + return objcg == root_obj_cgroup;
>> +}
>
> After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
> while objcg != root_obj_cgroup. Should they be considered as
> root objcgs?
Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
So it should be:
static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
{
return (objcg == root_obj_cgroup) || (objcg->memcg == root_mem_cgroup);
}
>
>> static inline bool mem_cgroup_disabled(void)
>> {
>> return !cgroup_subsys_enabled(memory_cgrp_subsys);
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 2afd7f99ca101..d484b632c790f 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>> int ret = 0;
>>
>> objcg = current_obj_cgroup();
>> - if (objcg) {
>> + if (!obj_cgroup_is_root(objcg)) {
>
> Now that we support the page and slab allocators support allocating memory
> in NMI contexts (on some archs), current_obj_cgroup() can return NULL
> if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
> (then it leads to a NULL-pointer-deref bug).
>
> But IIUC this is applied to kmem charging only (as they use this_cpu ops
> for stats update), and we don't have to apply the same restriction to
> charging LRU pages with objcg.
>
> Maybe Shakeel has more insight on this.
>
> Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
Thanks for this information, and it seems there's nothing wrong here.
Thanks,
Qi
>
On Tue, Nov 18, 2025 at 07:28:41PM +0800, Qi Zheng wrote:
> Hi Harry,
>
> On 11/17/25 5:17 PM, Harry Yoo wrote:
> > On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
> > > From: Muchun Song <songmuchun@bytedance.com>
> > >
> > > Memory cgroup functions such as get_mem_cgroup_from_folio() and
> > > get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> > > even for the root memory cgroup. In contrast, the situation for
> > > object cgroups has been different.
> > >
> > > Previously, the root object cgroup couldn't be returned because
> > > it didn't exist. Now that a valid root object cgroup exists, for
> > > the sake of consistency, it's necessary to align the behavior of
> > > object-cgroup-related operations with that of memory cgroup APIs.
> > >
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> > > ---
> > > include/linux/memcontrol.h | 29 +++++++++++++++++-------
> > > mm/memcontrol.c | 45 ++++++++++++++++++++------------------
> > > mm/percpu.c | 2 +-
> > > 3 files changed, 46 insertions(+), 30 deletions(-)
> > >
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index 6185d8399a54e..9fdbd4970021d 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -332,6 +332,7 @@ struct mem_cgroup {
> > > #define MEMCG_CHARGE_BATCH 64U
> > > extern struct mem_cgroup *root_mem_cgroup;
> > > +extern struct obj_cgroup *root_obj_cgroup;
> > > enum page_memcg_data_flags {
> > > /* page->memcg_data is a pointer to an slabobj_ext vector */
> > > @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> > > return (memcg == root_mem_cgroup);
> > > }
> > > +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> > > +{
> > > + return objcg == root_obj_cgroup;
> > > +}
> >
> > After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
> > while objcg != root_obj_cgroup. Should they be considered as
> > root objcgs?
>
> Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
>
> So it should be:
>
> static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> {
> return (objcg == root_obj_cgroup) || (objcg->memcg == root_mem_cgroup);
> }
>
Thanks and tomorrow I'll try to review if will be correct ;)
> > > static inline bool mem_cgroup_disabled(void)
> > > {
> > > return !cgroup_subsys_enabled(memory_cgrp_subsys);
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 2afd7f99ca101..d484b632c790f 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> > > int ret = 0;
> > > objcg = current_obj_cgroup();
> > > - if (objcg) {
> > > + if (!obj_cgroup_is_root(objcg)) {
> >
> > Now that we support the page and slab allocators support allocating memory
> > in NMI contexts (on some archs), current_obj_cgroup() can return NULL
> > if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
> > (then it leads to a NULL-pointer-deref bug).
> >
> > But IIUC this is applied to kmem charging only (as they use this_cpu ops
> > for stats update), and we don't have to apply the same restriction to
> > charging LRU pages with objcg.
> >
> > Maybe Shakeel has more insight on this.
> >
> > Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
>
> Thanks for this information, and it seems there's nothing wrong here.
I mean at least we should not introduce a NULL-pointer-deref bug in
__memcg_kmem_charge_page(), by assuming objcg returned by
current_obj_cgroup() is non-NULL?
1. Someone allocates non-slab kmem in an NMI context (in_nmi() == true),
calling __memcg_kmem_charge_page().
2. current_obj_cgruop() returns NULL because the architectures
has CONFIG_MEMCG_NMI_UNSAFE and it's in an NMI context.
3. obj_cgroup_is_root() returns false since
objcg (NULL) != root_obj_cgroup
4. we pass NULL to obj_cgroup_charge_pages().
5. obj_cgroup_charge_pages() calls get_mem_cgroup_from_objcg(),
dereference objcg->memcg (! a NULL-pointer-deref).
> Thanks,
> Qi
>
> >
--
Cheers,
Harry / Hyeonggon
On 11/18/25 8:12 PM, Harry Yoo wrote:
> On Tue, Nov 18, 2025 at 07:28:41PM +0800, Qi Zheng wrote:
>> Hi Harry,
>>
>> On 11/17/25 5:17 PM, Harry Yoo wrote:
>>> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
>>>> From: Muchun Song <songmuchun@bytedance.com>
>>>>
>>>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>>>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>>>> even for the root memory cgroup. In contrast, the situation for
>>>> object cgroups has been different.
>>>>
>>>> Previously, the root object cgroup couldn't be returned because
>>>> it didn't exist. Now that a valid root object cgroup exists, for
>>>> the sake of consistency, it's necessary to align the behavior of
>>>> object-cgroup-related operations with that of memory cgroup APIs.
>>>>
>>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>> include/linux/memcontrol.h | 29 +++++++++++++++++-------
>>>> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
>>>> mm/percpu.c | 2 +-
>>>> 3 files changed, 46 insertions(+), 30 deletions(-)
>>>>
>>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>>> index 6185d8399a54e..9fdbd4970021d 100644
>>>> --- a/include/linux/memcontrol.h
>>>> +++ b/include/linux/memcontrol.h
>>>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>>> #define MEMCG_CHARGE_BATCH 64U
>>>> extern struct mem_cgroup *root_mem_cgroup;
>>>> +extern struct obj_cgroup *root_obj_cgroup;
>>>> enum page_memcg_data_flags {
>>>> /* page->memcg_data is a pointer to an slabobj_ext vector */
>>>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>> return (memcg == root_mem_cgroup);
>>>> }
>>>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>>> +{
>>>> + return objcg == root_obj_cgroup;
>>>> +}
>>>
>>> After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
>>> while objcg != root_obj_cgroup. Should they be considered as
>>> root objcgs?
>>
>> Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
>>
>> So it should be:
>>
>> static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>> {
>> return (objcg == root_obj_cgroup) || (objcg->memcg == root_mem_cgroup);
>> }
>>
>
> Thanks and tomorrow I'll try to review if will be correct ;)
>
>>>> static inline bool mem_cgroup_disabled(void)
>>>> {
>>>> return !cgroup_subsys_enabled(memory_cgrp_subsys);
>>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>>> index 2afd7f99ca101..d484b632c790f 100644
>>>> --- a/mm/memcontrol.c
>>>> +++ b/mm/memcontrol.c
>>>> @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>>> int ret = 0;
>>>> objcg = current_obj_cgroup();
>>>> - if (objcg) {
>>>> + if (!obj_cgroup_is_root(objcg)) {
>>>
>>> Now that we support the page and slab allocators support allocating memory
>>> in NMI contexts (on some archs), current_obj_cgroup() can return NULL
>>> if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
>>> (then it leads to a NULL-pointer-deref bug).
>>>
>>> But IIUC this is applied to kmem charging only (as they use this_cpu ops
>>> for stats update), and we don't have to apply the same restriction to
>>> charging LRU pages with objcg.
>>>
>>> Maybe Shakeel has more insight on this.
>>>
>>> Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
>>
>> Thanks for this information, and it seems there's nothing wrong here.
>
> I mean at least we should not introduce a NULL-pointer-deref bug in
> __memcg_kmem_charge_page(), by assuming objcg returned by
> current_obj_cgroup() is non-NULL?
>
> 1. Someone allocates non-slab kmem in an NMI context (in_nmi() == true),
> calling __memcg_kmem_charge_page().
> 2. current_obj_cgruop() returns NULL because the architectures
> has CONFIG_MEMCG_NMI_UNSAFE and it's in an NMI context.
> 3. obj_cgroup_is_root() returns false since
> objcg (NULL) != root_obj_cgroup
> 4. we pass NULL to obj_cgroup_charge_pages().
> 5. obj_cgroup_charge_pages() calls get_mem_cgroup_from_objcg(),
> dereference objcg->memcg (! a NULL-pointer-deref).
Oh, indeed. After adding MEMCG_NMI_UNSAFE, we should first check
if objcg is NULL.
Thanks!
>
>> Thanks,
>> Qi
>>
>>>
>
On 11/18/25 7:28 PM, Qi Zheng wrote:
> Hi Harry,
>
> On 11/17/25 5:17 PM, Harry Yoo wrote:
>> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
>>> From: Muchun Song <songmuchun@bytedance.com>
>>>
>>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>>> even for the root memory cgroup. In contrast, the situation for
>>> object cgroups has been different.
>>>
>>> Previously, the root object cgroup couldn't be returned because
>>> it didn't exist. Now that a valid root object cgroup exists, for
>>> the sake of consistency, it's necessary to align the behavior of
>>> object-cgroup-related operations with that of memory cgroup APIs.
>>>
>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>> include/linux/memcontrol.h | 29 +++++++++++++++++-------
>>> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
>>> mm/percpu.c | 2 +-
>>> 3 files changed, 46 insertions(+), 30 deletions(-)
>>>
>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>> index 6185d8399a54e..9fdbd4970021d 100644
>>> --- a/include/linux/memcontrol.h
>>> +++ b/include/linux/memcontrol.h
>>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>> #define MEMCG_CHARGE_BATCH 64U
>>> extern struct mem_cgroup *root_mem_cgroup;
>>> +extern struct obj_cgroup *root_obj_cgroup;
>>> enum page_memcg_data_flags {
>>> /* page->memcg_data is a pointer to an slabobj_ext vector */
>>> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct
>>> mem_cgroup *memcg)
>>> return (memcg == root_mem_cgroup);
>>> }
>>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>> +{
>>> + return objcg == root_obj_cgroup;
>>> +}
>>
>> After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
>> while objcg != root_obj_cgroup. Should they be considered as
>> root objcgs?
>
> Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
>
> So it should be:
>
> static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> {
> return (objcg == root_obj_cgroup) || (objcg->memcg ==
> root_mem_cgroup);
> }
Oh, we can't do that because we still need to consider this objcg when
uncharging. Some pages may be charged before reparenting.
>
>>
>>> static inline bool mem_cgroup_disabled(void)
>>> {
>>> return !cgroup_subsys_enabled(memory_cgrp_subsys);
>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>> index 2afd7f99ca101..d484b632c790f 100644
>>> --- a/mm/memcontrol.c
>>> +++ b/mm/memcontrol.c
>>> @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page,
>>> gfp_t gfp, int order)
>>> int ret = 0;
>>> objcg = current_obj_cgroup();
>>> - if (objcg) {
>>> + if (!obj_cgroup_is_root(objcg)) {
>>
>> Now that we support the page and slab allocators support allocating
>> memory
>> in NMI contexts (on some archs), current_obj_cgroup() can return NULL
>> if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
>> (then it leads to a NULL-pointer-deref bug).
>>
>> But IIUC this is applied to kmem charging only (as they use this_cpu ops
>> for stats update), and we don't have to apply the same restriction to
>> charging LRU pages with objcg.
>>
>> Maybe Shakeel has more insight on this.
>>
>> Link: https://lore.kernel.org/all/20250519063142.111219-1-
>> shakeel.butt@linux.dev
>
> Thanks for this information, and it seems there's nothing wrong here.
>
> Thanks,
> Qi
>
>>
>
On Tue, Nov 18, 2025 at 08:11:04PM +0800, Qi Zheng wrote:
>
>
> On 11/18/25 7:28 PM, Qi Zheng wrote:
> > Hi Harry,
> >
> > On 11/17/25 5:17 PM, Harry Yoo wrote:
> > > On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
> > > > From: Muchun Song <songmuchun@bytedance.com>
> > > >
> > > > Memory cgroup functions such as get_mem_cgroup_from_folio() and
> > > > get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> > > > even for the root memory cgroup. In contrast, the situation for
> > > > object cgroups has been different.
> > > >
> > > > Previously, the root object cgroup couldn't be returned because
> > > > it didn't exist. Now that a valid root object cgroup exists, for
> > > > the sake of consistency, it's necessary to align the behavior of
> > > > object-cgroup-related operations with that of memory cgroup APIs.
> > > >
> > > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> > > > ---
> > > > include/linux/memcontrol.h | 29 +++++++++++++++++-------
> > > > mm/memcontrol.c | 45 ++++++++++++++++++++------------------
> > > > mm/percpu.c | 2 +-
> > > > 3 files changed, 46 insertions(+), 30 deletions(-)
> > > >
> > > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > > index 6185d8399a54e..9fdbd4970021d 100644
> > > > --- a/include/linux/memcontrol.h
> > > > +++ b/include/linux/memcontrol.h
> > > > @@ -332,6 +332,7 @@ struct mem_cgroup {
> > > > #define MEMCG_CHARGE_BATCH 64U
> > > > extern struct mem_cgroup *root_mem_cgroup;
> > > > +extern struct obj_cgroup *root_obj_cgroup;
> > > > enum page_memcg_data_flags {
> > > > /* page->memcg_data is a pointer to an slabobj_ext vector */
> > > > @@ -549,6 +550,11 @@ static inline bool
> > > > mem_cgroup_is_root(struct mem_cgroup *memcg)
> > > > return (memcg == root_mem_cgroup);
> > > > }
> > > > +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> > > > +{
> > > > + return objcg == root_obj_cgroup;
> > > > +}
> > >
> > > After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
> > > while objcg != root_obj_cgroup. Should they be considered as
> > > root objcgs?
> >
> > Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
> >
> > So it should be:
> >
> > static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> > {
> > return (objcg == root_obj_cgroup) || (objcg->memcg ==
> > root_mem_cgroup);
> > }
>
> Oh, we can't do that because we still need to consider this objcg when
> uncharging. Some pages may be charged before reparenting.
Ouch, right. We don't know if it's charged before reparenting and so
it can break statistics in a few places if we skip uncharging it after
repareting.
And I think we don't charge new pages to objcgs that satisfy
(objcg->memcg == root_mem_cgroup) && (objcg != root_obj_cgroup)
after they're reparented anyway...
--
Cheers,
Harry / Hyeonggon
On 11/19/25 3:24 PM, Harry Yoo wrote:
> On Tue, Nov 18, 2025 at 08:11:04PM +0800, Qi Zheng wrote:
>>
>>
>> On 11/18/25 7:28 PM, Qi Zheng wrote:
>>> Hi Harry,
>>>
>>> On 11/17/25 5:17 PM, Harry Yoo wrote:
>>>> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
>>>>> From: Muchun Song <songmuchun@bytedance.com>
>>>>>
>>>>> Memory cgroup functions such as get_mem_cgroup_from_folio() and
>>>>> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
>>>>> even for the root memory cgroup. In contrast, the situation for
>>>>> object cgroups has been different.
>>>>>
>>>>> Previously, the root object cgroup couldn't be returned because
>>>>> it didn't exist. Now that a valid root object cgroup exists, for
>>>>> the sake of consistency, it's necessary to align the behavior of
>>>>> object-cgroup-related operations with that of memory cgroup APIs.
>>>>>
>>>>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>> include/linux/memcontrol.h | 29 +++++++++++++++++-------
>>>>> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
>>>>> mm/percpu.c | 2 +-
>>>>> 3 files changed, 46 insertions(+), 30 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>>>>> index 6185d8399a54e..9fdbd4970021d 100644
>>>>> --- a/include/linux/memcontrol.h
>>>>> +++ b/include/linux/memcontrol.h
>>>>> @@ -332,6 +332,7 @@ struct mem_cgroup {
>>>>> #define MEMCG_CHARGE_BATCH 64U
>>>>> extern struct mem_cgroup *root_mem_cgroup;
>>>>> +extern struct obj_cgroup *root_obj_cgroup;
>>>>> enum page_memcg_data_flags {
>>>>> /* page->memcg_data is a pointer to an slabobj_ext vector */
>>>>> @@ -549,6 +550,11 @@ static inline bool
>>>>> mem_cgroup_is_root(struct mem_cgroup *memcg)
>>>>> return (memcg == root_mem_cgroup);
>>>>> }
>>>>> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>>>> +{
>>>>> + return objcg == root_obj_cgroup;
>>>>> +}
>>>>
>>>> After reparenting, an objcg may satisfy objcg->memcg == root_mem_cgroup
>>>> while objcg != root_obj_cgroup. Should they be considered as
>>>> root objcgs?
>>>
>>> Indeed, it's pointless to charge to root_mem_cgroup (objcg->memcg).
>>>
>>> So it should be:
>>>
>>> static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
>>> {
>>> return (objcg == root_obj_cgroup) || (objcg->memcg ==
>>> root_mem_cgroup);
>>> }
>>
>> Oh, we can't do that because we still need to consider this objcg when
>> uncharging. Some pages may be charged before reparenting.
>
> Ouch, right. We don't know if it's charged before reparenting and so
> it can break statistics in a few places if we skip uncharging it after
> repareting.
Right.
>
> And I think we don't charge new pages to objcgs that satisfy
> (objcg->memcg == root_mem_cgroup) && (objcg != root_obj_cgroup)
> after they're reparented anyway...
The charge and uncharge operations must be symmetrical, so we cannot
control the charge operation independently.
Otherwise:
charge
======
if ((objcg->memcg == root_mem_cgroup))
skip charge this page
uncharge
========
we can't decide whether to skip this page.
Thanks,
Qi
>
On Mon, Nov 17, 2025 at 06:17:47PM +0900, Harry Yoo wrote:
> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 2afd7f99ca101..d484b632c790f 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> > int ret = 0;
> >
> > objcg = current_obj_cgroup();
> > - if (objcg) {
> > + if (!obj_cgroup_is_root(objcg)) {
>
> Now that we support the page and slab allocators support allocating memory
> in NMI contexts (on some archs), current_obj_cgroup() can return NULL
> if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
> (then it leads to a NULL-pointer-deref bug).
This is a real issue, but
> But IIUC this is applied to kmem charging only (as they use this_cpu ops
> for stats update), and we don't have to apply the same restriction to
> charging LRU pages with objcg.
actually this should be fine for now since we use get_mem_cgroup_from_mm()
and obj_cgroup_from_memcg() instead of current_obj_cgroup() when charging
LRU pages.
But it is not immediately obvious that there are multiple ways to get
an objcg, each with different restrictions depending on what you are
going to charge :/
> Maybe Shakeel has more insight on this.
>
> Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
--
Cheers,
Harry / Hyeonggon
On 11/17/25 5:41 PM, Harry Yoo wrote:
> On Mon, Nov 17, 2025 at 06:17:47PM +0900, Harry Yoo wrote:
>> On Tue, Oct 28, 2025 at 09:58:19PM +0800, Qi Zheng wrote:
>>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>>> index 2afd7f99ca101..d484b632c790f 100644
>>> --- a/mm/memcontrol.c
>>> +++ b/mm/memcontrol.c
>>> @@ -2871,7 +2865,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
>>> int ret = 0;
>>>
>>> objcg = current_obj_cgroup();
>>> - if (objcg) {
>>> + if (!obj_cgroup_is_root(objcg)) {
>>
>> Now that we support the page and slab allocators support allocating memory
>> in NMI contexts (on some archs), current_obj_cgroup() can return NULL
>> if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) returns true
>> (then it leads to a NULL-pointer-deref bug).
>
> This is a real issue, but
>
>> But IIUC this is applied to kmem charging only (as they use this_cpu ops
>> for stats update), and we don't have to apply the same restriction to
>> charging LRU pages with objcg.
>
> actually this should be fine for now since we use get_mem_cgroup_from_mm()
> and obj_cgroup_from_memcg() instead of current_obj_cgroup() when charging
> LRU pages.
>
> But it is not immediately obvious that there are multiple ways to get
> an objcg, each with different restrictions depending on what you are
> going to charge :/
Perhaps some comments need to be added. :)
>
>> Maybe Shakeel has more insight on this.
>>
>> Link: https://lore.kernel.org/all/20250519063142.111219-1-shakeel.butt@linux.dev
>
© 2016 - 2026 Red Hat, Inc.