Reading from the memory.stat file can be expensive because of the string
encoding/decoding and text filtering involved. Introduce three kfuncs for
fetching each type of memcg stat from a bpf program. This allows data to be
transferred directly to userspace, eliminating the need for string
encoding/decoding. It also removes the need for text filtering since it
allows for fetching specific stats.
The patch also includes a kfunc for flushing stats in order to read the
latest values. Note that this is not required for fetching stats, since the
kernel periodically flushes memcg stats. It is left up to the programmer
whether they want more recent stats or not.
Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
---
mm/memcontrol.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4deda33625f4..6547c27d4430 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -871,6 +871,73 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
}
#endif
+static inline struct mem_cgroup *memcg_from_cgroup(struct cgroup *cgrp)
+{
+ return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
+}
+
+__bpf_kfunc static void memcg_flush_stats(struct cgroup *cgrp)
+{
+ struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
+
+ if (!memcg)
+ return;
+
+ mem_cgroup_flush_stats(memcg);
+}
+
+__bpf_kfunc static unsigned long memcg_stat_fetch(struct cgroup *cgrp,
+ enum memcg_stat_item item)
+{
+ struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_page_state_output(memcg, item);
+}
+
+__bpf_kfunc static unsigned long memcg_node_stat_fetch(struct cgroup *cgrp,
+ enum node_stat_item item)
+{
+ struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_page_state_output(memcg, item);
+}
+
+__bpf_kfunc static unsigned long memcg_vm_event_fetch(struct cgroup *cgrp,
+ enum vm_event_item item)
+{
+ struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_events(memcg, item);
+}
+
+BTF_KFUNCS_START(bpf_memcontrol_kfunc_ids)
+BTF_ID_FLAGS(func, memcg_flush_stats, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+BTF_ID_FLAGS(func, memcg_stat_fetch, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, memcg_node_stat_fetch, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, memcg_vm_event_fetch, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_memcontrol_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_memcontrol_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_memcontrol_kfunc_ids,
+};
+
+static int __init bpf_memcontrol_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
+ &bpf_memcontrol_kfunc_set);
+}
+late_initcall(bpf_memcontrol_kfunc_init);
+
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
/*
--
2.47.3
Hi JP,
kernel test robot noticed the following build warnings:
[auto build test WARNING on bpf-next/net]
[also build test WARNING on bpf-next/master bpf/master akpm-mm/mm-everything linus/master v6.18-rc1 next-20251016]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/JP-Kobryn/memcg-introduce-kfuncs-for-fetching-memcg-stats/20251016-030920
base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git net
patch link: https://lore.kernel.org/r/20251015190813.80163-2-inwardvessel%40gmail.com
patch subject: [PATCH v2 1/2] memcg: introduce kfuncs for fetching memcg stats
config: x86_64-randconfig-121-20251016 (https://download.01.org/0day-ci/archive/20251017/202510170654.s2j4GuCs-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251017/202510170654.s2j4GuCs-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202510170654.s2j4GuCs-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
mm/memcontrol.c:4236:52: sparse: sparse: incompatible types in comparison expression (different address spaces):
mm/memcontrol.c:4236:52: sparse: struct task_struct [noderef] __rcu *
mm/memcontrol.c:4236:52: sparse: struct task_struct *
>> mm/memcontrol.c:876:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct cgroup_subsys_state *css @@ got struct cgroup_subsys_state [noderef] __rcu * @@
mm/memcontrol.c:876:55: sparse: expected struct cgroup_subsys_state *css
mm/memcontrol.c:876:55: sparse: got struct cgroup_subsys_state [noderef] __rcu *
>> mm/memcontrol.c:876:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct cgroup_subsys_state *css @@ got struct cgroup_subsys_state [noderef] __rcu * @@
mm/memcontrol.c:876:55: sparse: expected struct cgroup_subsys_state *css
mm/memcontrol.c:876:55: sparse: got struct cgroup_subsys_state [noderef] __rcu *
>> mm/memcontrol.c:876:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct cgroup_subsys_state *css @@ got struct cgroup_subsys_state [noderef] __rcu * @@
mm/memcontrol.c:876:55: sparse: expected struct cgroup_subsys_state *css
mm/memcontrol.c:876:55: sparse: got struct cgroup_subsys_state [noderef] __rcu *
>> mm/memcontrol.c:876:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@ expected struct cgroup_subsys_state *css @@ got struct cgroup_subsys_state [noderef] __rcu * @@
mm/memcontrol.c:876:55: sparse: expected struct cgroup_subsys_state *css
mm/memcontrol.c:876:55: sparse: got struct cgroup_subsys_state [noderef] __rcu *
mm/memcontrol.c: note: in included file:
include/linux/memcontrol.h:729:9: sparse: sparse: context imbalance in 'folio_lruvec_lock' - wrong count at exit
include/linux/memcontrol.h:729:9: sparse: sparse: context imbalance in 'folio_lruvec_lock_irq' - wrong count at exit
include/linux/memcontrol.h:729:9: sparse: sparse: context imbalance in 'folio_lruvec_lock_irqsave' - wrong count at exit
vim +876 mm/memcontrol.c
873
874 static inline struct mem_cgroup *memcg_from_cgroup(struct cgroup *cgrp)
875 {
> 876 return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
877 }
878
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On Wed, Oct 15, 2025 at 12:08 PM JP Kobryn <inwardvessel@gmail.com> wrote:
>
[...]
> ---
> mm/memcontrol.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 67 insertions(+)
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 4deda33625f4..6547c27d4430 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -871,6 +871,73 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
> }
> #endif
>
> +static inline struct mem_cgroup *memcg_from_cgroup(struct cgroup *cgrp)
> +{
> + return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
> +}
> +
We should add __bpf_kfunc_start_defs() here, and __bpf_kfunc_end_defs()
after all the kfuncs.
> +__bpf_kfunc static void memcg_flush_stats(struct cgroup *cgrp)
We mostly do not make kfunc static, but it seems to also work.
> +{
> + struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
> +
> + if (!memcg)
> + return;
Maybe we can let memcg_flush_stats return int, and return -EINVAL
on memcg == NULL cases?
> +
> + mem_cgroup_flush_stats(memcg);
> +}
> +
[...]
On 10/15/25 4:12 PM, Song Liu wrote:
> On Wed, Oct 15, 2025 at 12:08 PM JP Kobryn <inwardvessel@gmail.com> wrote:
>>
> [...]
>> ---
>> mm/memcontrol.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 67 insertions(+)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 4deda33625f4..6547c27d4430 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -871,6 +871,73 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
>> }
>> #endif
>>
>> +static inline struct mem_cgroup *memcg_from_cgroup(struct cgroup *cgrp)
>> +{
>> + return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
>> +}
>> +
>
> We should add __bpf_kfunc_start_defs() here, and __bpf_kfunc_end_defs()
> after all the kfuncs.
Good call.
>
>> +__bpf_kfunc static void memcg_flush_stats(struct cgroup *cgrp)
>
> We mostly do not make kfunc static, but it seems to also work.
>
>> +{
>> + struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
>> +
>> + if (!memcg)
>> + return;
>
> Maybe we can let memcg_flush_stats return int, and return -EINVAL
> on memcg == NULL cases?
Sure, I'll do that in v3.
>
>> +
>> + mem_cgroup_flush_stats(memcg);
>> +}
>> +
> [...]
On 10/15/25 4:12 PM, Song Liu wrote:
> On Wed, Oct 15, 2025 at 12:08 PM JP Kobryn <inwardvessel@gmail.com> wrote:
> [...]
>> ---
>> mm/memcontrol.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 67 insertions(+)
>>
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 4deda33625f4..6547c27d4430 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -871,6 +871,73 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
>> }
>> #endif
>>
>> +static inline struct mem_cgroup *memcg_from_cgroup(struct cgroup *cgrp)
>> +{
>> + return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
>> +}
>> +
> We should add __bpf_kfunc_start_defs() here, and __bpf_kfunc_end_defs()
> after all the kfuncs.
>
>> +__bpf_kfunc static void memcg_flush_stats(struct cgroup *cgrp)
> We mostly do not make kfunc static, but it seems to also work.
Let us remove 'static' in __bpf_kfunc functions in order to be consistent
with other existing kfuncs.
The __bpf_kfunc macro is
linux/btf.h:#define __bpf_kfunc __used __retain __noclone noinline
__used and __retain attributes ensure the function won't be removed
by compiler/linker.
>
>> +{
>> + struct mem_cgroup *memcg = memcg_from_cgroup(cgrp);
>> +
>> + if (!memcg)
>> + return;
> Maybe we can let memcg_flush_stats return int, and return -EINVAL
> on memcg == NULL cases?
>
>> +
>> + mem_cgroup_flush_stats(memcg);
>> +}
>> +
> [...]
>
On Wed, Oct 15, 2025 at 12:08:12PM -0700, JP Kobryn wrote: > Reading from the memory.stat file can be expensive because of the string > encoding/decoding and text filtering involved. Introduce three kfuncs for > fetching each type of memcg stat from a bpf program. This allows data to be > transferred directly to userspace, eliminating the need for string > encoding/decoding. It also removes the need for text filtering since it > allows for fetching specific stats. > > The patch also includes a kfunc for flushing stats in order to read the > latest values. Note that this is not required for fetching stats, since the > kernel periodically flushes memcg stats. It is left up to the programmer > whether they want more recent stats or not. > > Signed-off-by: JP Kobryn <inwardvessel@gmail.com> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
© 2016 - 2025 Red Hat, Inc.