mm/memcontrol-v1.c | 97 ++++++++++++++++++++++++++-------------------- mm/memcontrol-v1.h | 34 ++++++++++++++++ mm/memcontrol.c | 87 ++++++++++++++++++++++------------------- 3 files changed, 137 insertions(+), 81 deletions(-)
From: Jianyue Wu <wujianyue000@gmail.com>
Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
printf parsing in memcg stats output.
Key changes:
- Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
- Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
- Update __memory_events_show(), swap_events_show(),
memory_stat_format(), memory_numa_stat_show(), and related helpers
Performance:
- 1M reads of memory.stat+memory.numa_stat
- Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
- After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
Tests:
- Script:
for ((i=1; i<=1000000; i++)); do
: > /dev/null < /sys/fs/cgroup/memory.stat
: > /dev/null < /sys/fs/cgroup/memory.numa_stat
done
Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
---
mm/memcontrol-v1.c | 97 ++++++++++++++++++++++++++--------------------
mm/memcontrol-v1.h | 34 ++++++++++++++++
mm/memcontrol.c | 87 ++++++++++++++++++++++-------------------
3 files changed, 137 insertions(+), 81 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 6eed14bff742..400feb798aec 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/sort.h>
#include <linux/file.h>
-#include <linux/seq_buf.h>
+#include <linux/string.h>
#include "internal.h"
#include "swap.h"
@@ -1795,25 +1795,33 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
mem_cgroup_flush_stats(memcg);
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
- seq_printf(m, "%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- false));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, false));
+ seq_puts(m, stat->name);
+ seq_put_decimal_ull(m, "=",
+ (u64)mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ false));
+ for_each_node_state(nid, N_MEMORY) {
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=",
+ (u64)mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask, false));
+ }
seq_putc(m, '\n');
}
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
- seq_printf(m, "hierarchical_%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- true));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, true));
+ seq_puts(m, "hierarchical_");
+ seq_puts(m, stat->name);
+ seq_put_decimal_ull(m, "=",
+ (u64)mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ true));
+ for_each_node_state(nid, N_MEMORY) {
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=",
+ (u64)mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ true));
+ }
seq_putc(m, '\n');
}
@@ -1879,17 +1887,17 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long nr;
nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
- memcg_events_local(memcg, memcg1_events[i]));
+ memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
+ (u64)memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
- memcg_page_state_local(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ memcg_seq_buf_put_name_val(s, lru_list_name(i),
+ memcg_page_state_local(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);
/* Hierarchical information */
memory = memsw = PAGE_COUNTER_MAX;
@@ -1897,28 +1905,33 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memory = min(memory, READ_ONCE(mi->memory.max));
memsw = min(memsw, READ_ONCE(mi->memsw.max));
}
- seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
- (u64)memory * PAGE_SIZE);
- seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
- (u64)memsw * PAGE_SIZE);
+ memcg_seq_buf_put_name_val(s, "hierarchical_memory_limit",
+ (u64)memory * PAGE_SIZE);
+ memcg_seq_buf_put_name_val(s, "hierarchical_memsw_limit",
+ (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
- nr = memcg_page_state_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr);
+ nr = memcg_page_state_output(memcg, memcg1_stats[i]);
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i],
+ (u64)nr);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "total_%s %llu\n",
- vm_event_name(memcg1_events[i]),
- (u64)memcg_events(memcg, memcg1_events[i]));
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s,
+ vm_event_name(memcg1_events[i]),
+ (u64)memcg_events(memcg, memcg1_events[i]));
+ }
- for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
- (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ for (i = 0; i < NR_LRU_LISTS; i++) {
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, lru_list_name(i),
+ (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+ (PAGE_SIZE));
+ }
#ifdef CONFIG_DEBUG_VM
{
@@ -1933,8 +1946,8 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
anon_cost += mz->lruvec.anon_cost;
file_cost += mz->lruvec.file_cost;
}
- seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
- seq_buf_printf(s, "file_cost %lu\n", file_cost);
+ memcg_seq_buf_put_name_val(s, "anon_cost", (u64)anon_cost);
+ memcg_seq_buf_put_name_val(s, "file_cost", (u64)file_cost);
}
#endif
}
@@ -1969,10 +1982,10 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
- seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable));
- seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
+ memcg_seq_put_name_val(sf, "oom_kill_disable", READ_ONCE(memcg->oom_kill_disable));
+ memcg_seq_put_name_val(sf, "under_oom", (bool)memcg->under_oom);
+ memcg_seq_put_name_val(sf, "oom_kill",
+ (u64)atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
return 0;
}
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..a4ac2d869506 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -4,6 +4,9 @@
#define __MM_MEMCONTROL_V1_H
#include <linux/cgroup-defs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
/* Cgroup v1 and v2 common declarations */
@@ -33,6 +36,37 @@ int memory_stat_show(struct seq_file *m, void *v);
void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
+/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
+#define MEMCG_DEC_U64_MAX_LEN 20
+
+static inline void memcg_seq_put_name_val(struct seq_file *m, const char *name,
+ u64 val)
+{
+ seq_puts(m, name);
+ /* need a space between name and value */
+ seq_put_decimal_ull(m, " ", val);
+ seq_putc(m, '\n');
+}
+
+static inline void memcg_seq_buf_put_name_val(struct seq_buf *s,
+ const char *name, u64 val)
+{
+ char num_buf[MEMCG_DEC_U64_MAX_LEN];
+ int num_len;
+
+ num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
+ if (num_len <= 0)
+ return;
+
+ if (seq_buf_puts(s, name))
+ return;
+ if (seq_buf_putc(s, ' '))
+ return;
+ if (seq_buf_putmem(s, num_buf, num_len))
+ return;
+ seq_buf_putc(s, '\n');
+}
+
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86f43b7e5f71..62652a3c8681 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -42,6 +42,7 @@
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/sprintf.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -1485,26 +1486,26 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
continue;
#endif
size = memcg_page_state_output(memcg, memory_stats[i].idx);
- seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
+ memcg_seq_buf_put_name_val(s, memory_stats[i].name, size);
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
size += memcg_page_state_output(memcg,
NR_SLAB_RECLAIMABLE_B);
- seq_buf_printf(s, "slab %llu\n", size);
+ memcg_seq_buf_put_name_val(s, "slab", size);
}
}
/* Accumulated memory events */
- seq_buf_printf(s, "pgscan %lu\n",
- memcg_events(memcg, PGSCAN_KSWAPD) +
- memcg_events(memcg, PGSCAN_DIRECT) +
- memcg_events(memcg, PGSCAN_PROACTIVE) +
- memcg_events(memcg, PGSCAN_KHUGEPAGED));
- seq_buf_printf(s, "pgsteal %lu\n",
- memcg_events(memcg, PGSTEAL_KSWAPD) +
- memcg_events(memcg, PGSTEAL_DIRECT) +
- memcg_events(memcg, PGSTEAL_PROACTIVE) +
- memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+ memcg_seq_buf_put_name_val(s, "pgscan",
+ memcg_events(memcg, PGSCAN_KSWAPD) +
+ memcg_events(memcg, PGSCAN_DIRECT) +
+ memcg_events(memcg, PGSCAN_PROACTIVE) +
+ memcg_events(memcg, PGSCAN_KHUGEPAGED));
+ memcg_seq_buf_put_name_val(s, "pgsteal",
+ memcg_events(memcg, PGSTEAL_KSWAPD) +
+ memcg_events(memcg, PGSTEAL_DIRECT) +
+ memcg_events(memcg, PGSTEAL_PROACTIVE) +
+ memcg_events(memcg, PGSTEAL_KHUGEPAGED));
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
#ifdef CONFIG_MEMCG_V1
@@ -1512,9 +1513,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memcg_vm_event_stat[i] == PGPGOUT)
continue;
#endif
- seq_buf_printf(s, "%s %lu\n",
- vm_event_name(memcg_vm_event_stat[i]),
- memcg_events(memcg, memcg_vm_event_stat[i]));
+ memcg_seq_buf_put_name_val(s,
+ vm_event_name(memcg_vm_event_stat[i]),
+ memcg_events(memcg, memcg_vm_event_stat[i]));
}
}
@@ -4218,10 +4219,12 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
- if (value == PAGE_COUNTER_MAX)
+ if (value == PAGE_COUNTER_MAX) {
seq_puts(m, "max\n");
- else
- seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE);
+ } else {
+ seq_put_decimal_ull(m, "", (u64)value * PAGE_SIZE);
+ seq_putc(m, '\n');
+ }
return 0;
}
@@ -4247,7 +4250,8 @@ static int peak_show(struct seq_file *sf, void *v, struct page_counter *pc)
else
peak = max(fd_peak, READ_ONCE(pc->local_watermark));
- seq_printf(sf, "%llu\n", peak * PAGE_SIZE);
+ seq_put_decimal_ull(sf, "", peak * PAGE_SIZE);
+ seq_putc(sf, '\n');
return 0;
}
@@ -4480,16 +4484,16 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
*/
static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
{
- seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW]));
- seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH]));
- seq_printf(m, "max %lu\n", atomic_long_read(&events[MEMCG_MAX]));
- seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
- seq_printf(m, "oom_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_KILL]));
- seq_printf(m, "oom_group_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
- seq_printf(m, "sock_throttled %lu\n",
- atomic_long_read(&events[MEMCG_SOCK_THROTTLED]));
+ memcg_seq_put_name_val(m, "low", atomic_long_read(&events[MEMCG_LOW]));
+ memcg_seq_put_name_val(m, "high", atomic_long_read(&events[MEMCG_HIGH]));
+ memcg_seq_put_name_val(m, "max", atomic_long_read(&events[MEMCG_MAX]));
+ memcg_seq_put_name_val(m, "oom", atomic_long_read(&events[MEMCG_OOM]));
+ memcg_seq_put_name_val(m, "oom_kill",
+ atomic_long_read(&events[MEMCG_OOM_KILL]));
+ memcg_seq_put_name_val(m, "oom_group_kill",
+ atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
+ memcg_seq_put_name_val(m, "sock_throttled",
+ atomic_long_read(&events[MEMCG_SOCK_THROTTLED]));
}
static int memory_events_show(struct seq_file *m, void *v)
@@ -4544,7 +4548,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
if (memory_stats[i].idx >= NR_VM_NODE_STAT_ITEMS)
continue;
- seq_printf(m, "%s", memory_stats[i].name);
+ seq_puts(m, memory_stats[i].name);
for_each_node_state(nid, N_MEMORY) {
u64 size;
struct lruvec *lruvec;
@@ -4552,7 +4556,10 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
size = lruvec_page_state_output(lruvec,
memory_stats[i].idx);
- seq_printf(m, " N%d=%llu", nid, size);
+
+ seq_put_decimal_ull(m, " N", nid);
+ seq_putc(m, '=');
+ seq_put_decimal_ull(m, "", size);
}
seq_putc(m, '\n');
}
@@ -4565,7 +4572,8 @@ static int memory_oom_group_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->oom_group));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->oom_group));
+ seq_putc(m, '\n');
return 0;
}
@@ -5373,12 +5381,12 @@ static int swap_events_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "high %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
- seq_printf(m, "max %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
- seq_printf(m, "fail %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
+ memcg_seq_put_name_val(m, "high",
+ atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
+ memcg_seq_put_name_val(m, "max",
+ atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
+ memcg_seq_put_name_val(m, "fail",
+ atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
return 0;
}
@@ -5564,7 +5572,8 @@ static int zswap_writeback_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->zswap_writeback));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->zswap_writeback));
+ seq_putc(m, '\n');
return 0;
}
--
2.43.0
On Thu, 8 Jan 2026 17:37:29 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote:
> From: Jianyue Wu <wujianyue000@gmail.com>
>
> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
> printf parsing in memcg stats output.
>
> Key changes:
> - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
> - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
> - Update __memory_events_show(), swap_events_show(),
> memory_stat_format(), memory_numa_stat_show(), and related helpers
>
> Performance:
> - 1M reads of memory.stat+memory.numa_stat
> - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
> - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
>
> Tests:
> - Script:
> for ((i=1; i<=1000000; i++)); do
> : > /dev/null < /sys/fs/cgroup/memory.stat
> : > /dev/null < /sys/fs/cgroup/memory.numa_stat
> done
>
I suspect there are workloads which read these files frequently.
I'd be interested in learning "how frequently". Perhaps
ascii-through-sysfs simply isn't an appropriate API for this data?
> @@ -1795,25 +1795,33 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
> mem_cgroup_flush_stats(memcg);
>
> for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
> - seq_printf(m, "%s=%lu", stat->name,
> - mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
> - false));
> - for_each_node_state(nid, N_MEMORY)
> - seq_printf(m, " N%d=%lu", nid,
> - mem_cgroup_node_nr_lru_pages(memcg, nid,
> - stat->lru_mask, false));
> + seq_puts(m, stat->name);
> + seq_put_decimal_ull(m, "=",
> + (u64)mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
> + false));
> + for_each_node_state(nid, N_MEMORY) {
> + seq_put_decimal_ull(m, " N", nid);
> + seq_put_decimal_ull(m, "=",
> + (u64)mem_cgroup_node_nr_lru_pages(memcg, nid,
> + stat->lru_mask, false));
The indenting went wrong here.
The patch does do a lot of ugly tricks to constrain the number of
columns used. Perhaps introduce some new local variables to clean this
up?
Andrew Morton <akpm@linux-foundation.org> writes:
> On Thu, 8 Jan 2026 17:37:29 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote:
>
>> From: Jianyue Wu <wujianyue000@gmail.com>
>>
>> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
>> printf parsing in memcg stats output.
>>
>> Key changes:
>> - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
>> - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
>> - Update __memory_events_show(), swap_events_show(),
>> memory_stat_format(), memory_numa_stat_show(), and related helpers
>>
>> Performance:
>> - 1M reads of memory.stat+memory.numa_stat
>> - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
>> - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
>>
>> Tests:
>> - Script:
>> for ((i=1; i<=1000000; i++)); do
>> : > /dev/null < /sys/fs/cgroup/memory.stat
>> : > /dev/null < /sys/fs/cgroup/memory.numa_stat
>> done
>>
>
> I suspect there are workloads which read these files frequently.
>
> I'd be interested in learning "how frequently". Perhaps
> ascii-through-sysfs simply isn't an appropriate API for this data?
We just got a bpf interface for this data merged, exactly to speed
things up: commit 99430ab8b804 ("mm: introduce BPF kfuncs to access
memcg statistics and events") in bpf-next.
Just checked the BPF kfuncs patch, nice, speed so much, I think better
use bpf to read, instead of normal read.
The workload normally is read about every 2s, but there could be
several different services needing polling this.
Yes, previously the indent was hacked manually:)
On Fri, Jan 9, 2026 at 6:50 AM Roman Gushchin <roman.gushchin@linux.dev> wrote:
>
> Andrew Morton <akpm@linux-foundation.org> writes:
>
> > On Thu, 8 Jan 2026 17:37:29 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote:
> >
> >> From: Jianyue Wu <wujianyue000@gmail.com>
> >>
> >> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
> >> printf parsing in memcg stats output.
> >>
> >> Key changes:
> >> - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
> >> - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
> >> - Update __memory_events_show(), swap_events_show(),
> >> memory_stat_format(), memory_numa_stat_show(), and related helpers
> >>
> >> Performance:
> >> - 1M reads of memory.stat+memory.numa_stat
> >> - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
> >> - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
> >>
> >> Tests:
> >> - Script:
> >> for ((i=1; i<=1000000; i++)); do
> >> : > /dev/null < /sys/fs/cgroup/memory.stat
> >> : > /dev/null < /sys/fs/cgroup/memory.numa_stat
> >> done
> >>
> >
> > I suspect there are workloads which read these files frequently.
> >
> > I'd be interested in learning "how frequently". Perhaps
> > ascii-through-sysfs simply isn't an appropriate API for this data?
>
> We just got a bpf interface for this data merged, exactly to speed
> things up: commit 99430ab8b804 ("mm: introduce BPF kfuncs to access
> memcg statistics and events") in bpf-next.
From: Jianyue Wu <wujianyue000@gmail.com>
Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
printf parsing in memcg stats output.
Key changes:
- Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
- Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
- Update __memory_events_show(), swap_events_show(),
memory_stat_format(), memory_numa_stat_show(), and related helpers
- Introduce local variables to improve readability and reduce line length
Performance:
- 1M reads of memory.stat+memory.numa_stat
- Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
- After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
Tests:
- Script:
for ((i=1; i<=1000000; i++)); do
: > /dev/null < /sys/fs/cgroup/memory.stat
: > /dev/null < /sys/fs/cgroup/memory.numa_stat
done
Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
---
Changes in v2:
- Fixed indentation issues
- Added local variables in helper functions to improve code readability
and avoid overly long parameter lines
v1: Initial submission
mm/memcontrol-v1.c | 120 ++++++++++++++++++++++++++++-----------------
mm/memcontrol-v1.h | 34 +++++++++++++
mm/memcontrol.c | 102 +++++++++++++++++++++++---------------
3 files changed, 173 insertions(+), 83 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 6eed14bff742..482475333876 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/sort.h>
#include <linux/file.h>
-#include <linux/seq_buf.h>
+#include <linux/string.h>
#include "internal.h"
#include "swap.h"
@@ -1795,25 +1795,36 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
mem_cgroup_flush_stats(memcg);
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
- seq_printf(m, "%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- false));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, false));
+ u64 nr_pages;
+
+ seq_puts(m, stat->name);
+ nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ false);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ for_each_node_state(nid, N_MEMORY) {
+ nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ false);
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ }
seq_putc(m, '\n');
}
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
-
- seq_printf(m, "hierarchical_%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- true));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, true));
+ u64 nr_pages;
+
+ seq_puts(m, "hierarchical_");
+ seq_puts(m, stat->name);
+ nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, true);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ for_each_node_state(nid, N_MEMORY) {
+ nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ true);
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ }
seq_putc(m, '\n');
}
@@ -1870,6 +1881,7 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long memory, memsw;
struct mem_cgroup *mi;
unsigned int i;
+ u64 memory_limit, memsw_limit;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
@@ -1879,17 +1891,24 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long nr;
nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
- memcg_events_local(memcg, memcg1_events[i]));
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
+ u64 events;
- for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
- memcg_page_state_local(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ events = memcg_events_local(memcg, memcg1_events[i]);
+ memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
+ events);
+ }
+
+ for (i = 0; i < NR_LRU_LISTS; i++) {
+ u64 nr_pages;
+
+ nr_pages = memcg_page_state_local(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE;
+ memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
+ }
/* Hierarchical information */
memory = memsw = PAGE_COUNTER_MAX;
@@ -1897,28 +1916,38 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memory = min(memory, READ_ONCE(mi->memory.max));
memsw = min(memsw, READ_ONCE(mi->memsw.max));
}
- seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
- (u64)memory * PAGE_SIZE);
- seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
- (u64)memsw * PAGE_SIZE);
+ memory_limit = (u64)memory * PAGE_SIZE;
+ memsw_limit = (u64)memsw * PAGE_SIZE;
+
+ memcg_seq_buf_put_name_val(s, "hierarchical_memory_limit",
+ memory_limit);
+ memcg_seq_buf_put_name_val(s, "hierarchical_memsw_limit",
+ memsw_limit);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
nr = memcg_page_state_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr);
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
+ u64 events;
+
+ events = memcg_events(memcg, memcg1_events[i]);
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
+ events);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "total_%s %llu\n",
- vm_event_name(memcg1_events[i]),
- (u64)memcg_events(memcg, memcg1_events[i]));
+ for (i = 0; i < NR_LRU_LISTS; i++) {
+ u64 nr_pages;
- for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
- (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ nr_pages = memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE;
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
+ }
#ifdef CONFIG_DEBUG_VM
{
@@ -1933,8 +1962,8 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
anon_cost += mz->lruvec.anon_cost;
file_cost += mz->lruvec.file_cost;
}
- seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
- seq_buf_printf(s, "file_cost %lu\n", file_cost);
+ memcg_seq_buf_put_name_val(s, "anon_cost", (u64)anon_cost);
+ memcg_seq_buf_put_name_val(s, "file_cost", (u64)file_cost);
}
#endif
}
@@ -1968,11 +1997,14 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
+ u64 oom_kill;
+
+ memcg_seq_put_name_val(sf, "oom_kill_disable",
+ READ_ONCE(memcg->oom_kill_disable));
+ memcg_seq_put_name_val(sf, "under_oom", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable));
- seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
+ oom_kill = atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]);
+ memcg_seq_put_name_val(sf, "oom_kill", oom_kill);
return 0;
}
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..a4ac2d869506 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -4,6 +4,9 @@
#define __MM_MEMCONTROL_V1_H
#include <linux/cgroup-defs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
/* Cgroup v1 and v2 common declarations */
@@ -33,6 +36,37 @@ int memory_stat_show(struct seq_file *m, void *v);
void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
+/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
+#define MEMCG_DEC_U64_MAX_LEN 20
+
+static inline void memcg_seq_put_name_val(struct seq_file *m, const char *name,
+ u64 val)
+{
+ seq_puts(m, name);
+ /* need a space between name and value */
+ seq_put_decimal_ull(m, " ", val);
+ seq_putc(m, '\n');
+}
+
+static inline void memcg_seq_buf_put_name_val(struct seq_buf *s,
+ const char *name, u64 val)
+{
+ char num_buf[MEMCG_DEC_U64_MAX_LEN];
+ int num_len;
+
+ num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
+ if (num_len <= 0)
+ return;
+
+ if (seq_buf_puts(s, name))
+ return;
+ if (seq_buf_putc(s, ' '))
+ return;
+ if (seq_buf_putmem(s, num_buf, num_len))
+ return;
+ seq_buf_putc(s, '\n');
+}
+
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86f43b7e5f71..52dde3b8e386 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -42,6 +42,7 @@
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/sprintf.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -1463,6 +1464,7 @@ static bool memcg_accounts_hugetlb(void)
static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
{
int i;
+ u64 pgscan, pgsteal;
/*
* Provide statistics on the state of the memory subsystem as
@@ -1485,36 +1487,40 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
continue;
#endif
size = memcg_page_state_output(memcg, memory_stats[i].idx);
- seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
+ memcg_seq_buf_put_name_val(s, memory_stats[i].name, size);
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
size += memcg_page_state_output(memcg,
NR_SLAB_RECLAIMABLE_B);
- seq_buf_printf(s, "slab %llu\n", size);
+ memcg_seq_buf_put_name_val(s, "slab", size);
}
}
/* Accumulated memory events */
- seq_buf_printf(s, "pgscan %lu\n",
- memcg_events(memcg, PGSCAN_KSWAPD) +
- memcg_events(memcg, PGSCAN_DIRECT) +
- memcg_events(memcg, PGSCAN_PROACTIVE) +
- memcg_events(memcg, PGSCAN_KHUGEPAGED));
- seq_buf_printf(s, "pgsteal %lu\n",
- memcg_events(memcg, PGSTEAL_KSWAPD) +
- memcg_events(memcg, PGSTEAL_DIRECT) +
- memcg_events(memcg, PGSTEAL_PROACTIVE) +
- memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+ pgscan = memcg_events(memcg, PGSCAN_KSWAPD) +
+ memcg_events(memcg, PGSCAN_DIRECT) +
+ memcg_events(memcg, PGSCAN_PROACTIVE) +
+ memcg_events(memcg, PGSCAN_KHUGEPAGED);
+ pgsteal = memcg_events(memcg, PGSTEAL_KSWAPD) +
+ memcg_events(memcg, PGSTEAL_DIRECT) +
+ memcg_events(memcg, PGSTEAL_PROACTIVE) +
+ memcg_events(memcg, PGSTEAL_KHUGEPAGED);
+
+ memcg_seq_buf_put_name_val(s, "pgscan", pgscan);
+ memcg_seq_buf_put_name_val(s, "pgsteal", pgsteal);
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
+ u64 events;
+
#ifdef CONFIG_MEMCG_V1
if (memcg_vm_event_stat[i] == PGPGIN ||
memcg_vm_event_stat[i] == PGPGOUT)
continue;
#endif
- seq_buf_printf(s, "%s %lu\n",
- vm_event_name(memcg_vm_event_stat[i]),
- memcg_events(memcg, memcg_vm_event_stat[i]));
+ events = memcg_events(memcg, memcg_vm_event_stat[i]);
+ memcg_seq_buf_put_name_val(s,
+ vm_event_name(memcg_vm_event_stat[i]),
+ events);
}
}
@@ -4218,10 +4224,12 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
- if (value == PAGE_COUNTER_MAX)
+ if (value == PAGE_COUNTER_MAX) {
seq_puts(m, "max\n");
- else
- seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE);
+ } else {
+ seq_put_decimal_ull(m, "", (u64)value * PAGE_SIZE);
+ seq_putc(m, '\n');
+ }
return 0;
}
@@ -4247,7 +4255,8 @@ static int peak_show(struct seq_file *sf, void *v, struct page_counter *pc)
else
peak = max(fd_peak, READ_ONCE(pc->local_watermark));
- seq_printf(sf, "%llu\n", peak * PAGE_SIZE);
+ seq_put_decimal_ull(sf, "", peak * PAGE_SIZE);
+ seq_putc(sf, '\n');
return 0;
}
@@ -4480,16 +4489,24 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
*/
static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
{
- seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW]));
- seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH]));
- seq_printf(m, "max %lu\n", atomic_long_read(&events[MEMCG_MAX]));
- seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
- seq_printf(m, "oom_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_KILL]));
- seq_printf(m, "oom_group_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
- seq_printf(m, "sock_throttled %lu\n",
- atomic_long_read(&events[MEMCG_SOCK_THROTTLED]));
+ u64 low, high, max, oom, oom_kill;
+ u64 oom_group_kill, sock_throttled;
+
+ low = atomic_long_read(&events[MEMCG_LOW]);
+ high = atomic_long_read(&events[MEMCG_HIGH]);
+ max = atomic_long_read(&events[MEMCG_MAX]);
+ oom = atomic_long_read(&events[MEMCG_OOM]);
+ oom_kill = atomic_long_read(&events[MEMCG_OOM_KILL]);
+ oom_group_kill = atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]);
+ sock_throttled = atomic_long_read(&events[MEMCG_SOCK_THROTTLED]);
+
+ memcg_seq_put_name_val(m, "low", low);
+ memcg_seq_put_name_val(m, "high", high);
+ memcg_seq_put_name_val(m, "max", max);
+ memcg_seq_put_name_val(m, "oom", oom);
+ memcg_seq_put_name_val(m, "oom_kill", oom_kill);
+ memcg_seq_put_name_val(m, "oom_group_kill", oom_group_kill);
+ memcg_seq_put_name_val(m, "sock_throttled", sock_throttled);
}
static int memory_events_show(struct seq_file *m, void *v)
@@ -4544,7 +4561,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
if (memory_stats[i].idx >= NR_VM_NODE_STAT_ITEMS)
continue;
- seq_printf(m, "%s", memory_stats[i].name);
+ seq_puts(m, memory_stats[i].name);
for_each_node_state(nid, N_MEMORY) {
u64 size;
struct lruvec *lruvec;
@@ -4552,7 +4569,10 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
size = lruvec_page_state_output(lruvec,
memory_stats[i].idx);
- seq_printf(m, " N%d=%llu", nid, size);
+
+ seq_put_decimal_ull(m, " N", nid);
+ seq_putc(m, '=');
+ seq_put_decimal_ull(m, "", size);
}
seq_putc(m, '\n');
}
@@ -4565,7 +4585,8 @@ static int memory_oom_group_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->oom_group));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->oom_group));
+ seq_putc(m, '\n');
return 0;
}
@@ -5372,13 +5393,15 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
static int swap_events_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+ u64 swap_high, swap_max, swap_fail;
+
+ swap_high = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]);
+ swap_max = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]);
+ swap_fail = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]);
- seq_printf(m, "high %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
- seq_printf(m, "max %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
- seq_printf(m, "fail %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
+ memcg_seq_put_name_val(m, "high", swap_high);
+ memcg_seq_put_name_val(m, "max", swap_max);
+ memcg_seq_put_name_val(m, "fail", swap_fail);
return 0;
}
@@ -5564,7 +5587,8 @@ static int zswap_writeback_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->zswap_writeback));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->zswap_writeback));
+ seq_putc(m, '\n');
return 0;
}
--
2.43.0
On Sat, Jan 10, 2026 at 12:22:49PM +0800, Jianyue Wu wrote:
> From: Jianyue Wu <wujianyue000@gmail.com>
>
> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
> printf parsing in memcg stats output.
>
> Key changes:
> - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
> - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
> - Update __memory_events_show(), swap_events_show(),
> memory_stat_format(), memory_numa_stat_show(), and related helpers
> - Introduce local variables to improve readability and reduce line length
>
> Performance:
> - 1M reads of memory.stat+memory.numa_stat
> - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
> - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
>
> Tests:
> - Script:
> for ((i=1; i<=1000000; i++)); do
> : > /dev/null < /sys/fs/cgroup/memory.stat
> : > /dev/null < /sys/fs/cgroup/memory.numa_stat
> done
>
> Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
With a nit below, you can add:
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
>
> +/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
> +#define MEMCG_DEC_U64_MAX_LEN 20
> +
> +static inline void memcg_seq_put_name_val(struct seq_file *m, const char *name,
> + u64 val)
> +{
> + seq_puts(m, name);
> + /* need a space between name and value */
> + seq_put_decimal_ull(m, " ", val);
> + seq_putc(m, '\n');
> +}
> +
> +static inline void memcg_seq_buf_put_name_val(struct seq_buf *s,
> + const char *name, u64 val)
> +{
> + char num_buf[MEMCG_DEC_U64_MAX_LEN];
> + int num_len;
> +
> + num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
> + if (num_len <= 0)
> + return;
> +
> + if (seq_buf_puts(s, name))
> + return;
> + if (seq_buf_putc(s, ' '))
> + return;
> + if (seq_buf_putmem(s, num_buf, num_len))
> + return;
> + seq_buf_putc(s, '\n');
> +}
Move the above functions to memcontrol.c and add documentation for them.
On Sat, 10 Jan 2026 12:22:49 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > Replace seq_printf/seq_buf_printf with lightweight helpers to avoid > printf parsing in memcg stats output. > > Key changes: > - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting > - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting > - Update __memory_events_show(), swap_events_show(), > memory_stat_format(), memory_numa_stat_show(), and related helpers > - Introduce local variables to improve readability and reduce line length > > Performance: > - 1M reads of memory.stat+memory.numa_stat > - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s > - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop) > > Tests: > - Script: > for ((i=1; i<=1000000; i++)); do > : > /dev/null < /sys/fs/cgroup/memory.stat > : > /dev/null < /sys/fs/cgroup/memory.numa_stat > done I don't understand - your previous email led me to believe that the new BPF interface can be used to address this issue?
On 1/11/2026 7:33 AM, Andrew Morton wrote: > On Sat, 10 Jan 2026 12:22:49 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > >> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid >> printf parsing in memcg stats output. >> > I don't understand - your previous email led me to believe that the new > BPF interface can be used to address this issue? Yes, previously I think can directly use BPF interface to speedup. Later I think maybe this is still needed, as some platform didn't have BPF installed might still use these sysfs files.
On Sun, Jan 11, 2026 at 12:37:45PM +0800, Jianyue Wu wrote: > On 1/11/2026 7:33 AM, Andrew Morton wrote: > > On Sat, 10 Jan 2026 12:22:49 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > > > > > Replace seq_printf/seq_buf_printf with lightweight helpers to avoid > > > printf parsing in memcg stats output. > > > > > I don't understand - your previous email led me to believe that the new > > BPF interface can be used to address this issue? > > Yes, previously I think can directly use BPF interface to speedup. Later I > think maybe this is still needed, as some platform didn't have BPF installed > might still use these sysfs files. > It seems like this patch adds measurable improvement for the traditional stat readers. The high performance ones can be switched to the bpf based interface. So, I see no harm in taking this patch in.
Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
printf parsing in memcg stats output.
Key changes:
- Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
- Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
- Update __memory_events_show(), swap_events_show(),
memory_stat_format(), memory_numa_stat_show(), and related helpers
- Introduce local variables to improve readability and reduce line length
Performance:
- 1M reads of memory.stat+memory.numa_stat
- Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
- After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
Tests:
- Script:
for ((i=1; i<=1000000; i++)); do
: > /dev/null < /sys/fs/cgroup/memory.stat
: > /dev/null < /sys/fs/cgroup/memory.numa_stat
done
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
---
Hi Shakeel,
Thanks for the review! I've addressed your comments in v3 by moving the
helper functions to memcontrol.c and adding kernel-doc documentation.
Thanks,
Jianyue
Changes in v3:
- Move memcg_seq_put_name_val() and memcg_seq_buf_put_name_val() from
header (inline) to memcontrol.c and add kernel-doc documentation
(Suggested by Shakeel Butt)
Changes in v2:
- Initial version with performance optimization
mm/memcontrol-v1.c | 120 +++++++++++++++++++++------------
mm/memcontrol-v1.h | 6 ++
mm/memcontrol.c | 162 ++++++++++++++++++++++++++++++++++-----------
3 files changed, 205 insertions(+), 83 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 6eed14bff742..482475333876 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/sort.h>
#include <linux/file.h>
-#include <linux/seq_buf.h>
+#include <linux/string.h>
#include "internal.h"
#include "swap.h"
@@ -1795,25 +1795,36 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
mem_cgroup_flush_stats(memcg);
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
- seq_printf(m, "%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- false));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, false));
+ u64 nr_pages;
+
+ seq_puts(m, stat->name);
+ nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ false);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ for_each_node_state(nid, N_MEMORY) {
+ nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ false);
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ }
seq_putc(m, '\n');
}
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
-
- seq_printf(m, "hierarchical_%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- true));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, true));
+ u64 nr_pages;
+
+ seq_puts(m, "hierarchical_");
+ seq_puts(m, stat->name);
+ nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, true);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ for_each_node_state(nid, N_MEMORY) {
+ nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ true);
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=", nr_pages);
+ }
seq_putc(m, '\n');
}
@@ -1870,6 +1881,7 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long memory, memsw;
struct mem_cgroup *mi;
unsigned int i;
+ u64 memory_limit, memsw_limit;
BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
@@ -1879,17 +1891,24 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long nr;
nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
- memcg_events_local(memcg, memcg1_events[i]));
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
+ u64 events;
- for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
- memcg_page_state_local(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ events = memcg_events_local(memcg, memcg1_events[i]);
+ memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
+ events);
+ }
+
+ for (i = 0; i < NR_LRU_LISTS; i++) {
+ u64 nr_pages;
+
+ nr_pages = memcg_page_state_local(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE;
+ memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
+ }
/* Hierarchical information */
memory = memsw = PAGE_COUNTER_MAX;
@@ -1897,28 +1916,38 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memory = min(memory, READ_ONCE(mi->memory.max));
memsw = min(memsw, READ_ONCE(mi->memsw.max));
}
- seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
- (u64)memory * PAGE_SIZE);
- seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
- (u64)memsw * PAGE_SIZE);
+ memory_limit = (u64)memory * PAGE_SIZE;
+ memsw_limit = (u64)memsw * PAGE_SIZE;
+
+ memcg_seq_buf_put_name_val(s, "hierarchical_memory_limit",
+ memory_limit);
+ memcg_seq_buf_put_name_val(s, "hierarchical_memsw_limit",
+ memsw_limit);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
nr = memcg_page_state_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr);
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
+ u64 events;
+
+ events = memcg_events(memcg, memcg1_events[i]);
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
+ events);
}
- for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "total_%s %llu\n",
- vm_event_name(memcg1_events[i]),
- (u64)memcg_events(memcg, memcg1_events[i]));
+ for (i = 0; i < NR_LRU_LISTS; i++) {
+ u64 nr_pages;
- for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
- (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ nr_pages = memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE;
+ seq_buf_puts(s, "total_");
+ memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
+ }
#ifdef CONFIG_DEBUG_VM
{
@@ -1933,8 +1962,8 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
anon_cost += mz->lruvec.anon_cost;
file_cost += mz->lruvec.file_cost;
}
- seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
- seq_buf_printf(s, "file_cost %lu\n", file_cost);
+ memcg_seq_buf_put_name_val(s, "anon_cost", (u64)anon_cost);
+ memcg_seq_buf_put_name_val(s, "file_cost", (u64)file_cost);
}
#endif
}
@@ -1968,11 +1997,14 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
+ u64 oom_kill;
+
+ memcg_seq_put_name_val(sf, "oom_kill_disable",
+ READ_ONCE(memcg->oom_kill_disable));
+ memcg_seq_put_name_val(sf, "under_oom", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable));
- seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
- seq_printf(sf, "oom_kill %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
+ oom_kill = atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]);
+ memcg_seq_put_name_val(sf, "oom_kill", oom_kill);
return 0;
}
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..46f198a81761 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -4,6 +4,9 @@
#define __MM_MEMCONTROL_V1_H
#include <linux/cgroup-defs.h>
+#include <linux/seq_buf.h>
+#include <linux/seq_file.h>
+#include <linux/sprintf.h>
/* Cgroup v1 and v2 common declarations */
@@ -33,6 +36,9 @@ int memory_stat_show(struct seq_file *m, void *v);
void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
+void memcg_seq_put_name_val(struct seq_file *m, const char *name, u64 val);
+void memcg_seq_buf_put_name_val(struct seq_buf *s, const char *name, u64 val);
+
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86f43b7e5f71..0bc244c5a570 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -42,6 +42,7 @@
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/sprintf.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -1460,9 +1461,70 @@ static bool memcg_accounts_hugetlb(void)
}
#endif /* CONFIG_HUGETLB_PAGE */
+/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
+#define MEMCG_DEC_U64_MAX_LEN 20
+
+/**
+ * memcg_seq_put_name_val - Write a name-value pair to a seq_file
+ * @m: The seq_file to write to
+ * @name: The name string (not null-terminated required, uses seq_puts)
+ * @val: The u64 value to write
+ *
+ * This helper formats and writes a "name value\n" line to a seq_file,
+ * commonly used for cgroup statistics output. The value is efficiently
+ * converted to decimal using seq_put_decimal_ull.
+ *
+ * Output format: "<name> <value>\n"
+ * Example: "anon 1048576\n"
+ */
+void memcg_seq_put_name_val(struct seq_file *m, const char *name, u64 val)
+{
+ seq_puts(m, name);
+ /* need a space between name and value */
+ seq_put_decimal_ull(m, " ", val);
+ seq_putc(m, '\n');
+}
+
+/**
+ * memcg_seq_buf_put_name_val - Write a name-value pair to a seq_buf
+ * @s: The seq_buf to write to
+ * @name: The name string to write
+ * @val: The u64 value to write
+ *
+ * This helper formats and writes a "name value\n" line to a seq_buf.
+ * Unlike memcg_seq_put_name_val which uses seq_file's built-in formatting,
+ * this function manually converts the value to a string using num_to_str
+ * and writes it using seq_buf primitives for better performance when
+ * batching multiple writes to a seq_buf.
+ *
+ * The function checks for overflow at each step and returns early if
+ * any operation would cause the buffer to overflow.
+ *
+ * Output format: "<name> <value>\n"
+ * Example: "file 2097152\n"
+ */
+void memcg_seq_buf_put_name_val(struct seq_buf *s, const char *name, u64 val)
+{
+ char num_buf[MEMCG_DEC_U64_MAX_LEN];
+ int num_len;
+
+ num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
+ if (num_len <= 0)
+ return;
+
+ if (seq_buf_puts(s, name))
+ return;
+ if (seq_buf_putc(s, ' '))
+ return;
+ if (seq_buf_putmem(s, num_buf, num_len))
+ return;
+ seq_buf_putc(s, '\n');
+}
+
static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
{
int i;
+ u64 pgscan, pgsteal;
/*
* Provide statistics on the state of the memory subsystem as
@@ -1485,36 +1547,40 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
continue;
#endif
size = memcg_page_state_output(memcg, memory_stats[i].idx);
- seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
+ memcg_seq_buf_put_name_val(s, memory_stats[i].name, size);
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
size += memcg_page_state_output(memcg,
NR_SLAB_RECLAIMABLE_B);
- seq_buf_printf(s, "slab %llu\n", size);
+ memcg_seq_buf_put_name_val(s, "slab", size);
}
}
/* Accumulated memory events */
- seq_buf_printf(s, "pgscan %lu\n",
- memcg_events(memcg, PGSCAN_KSWAPD) +
- memcg_events(memcg, PGSCAN_DIRECT) +
- memcg_events(memcg, PGSCAN_PROACTIVE) +
- memcg_events(memcg, PGSCAN_KHUGEPAGED));
- seq_buf_printf(s, "pgsteal %lu\n",
- memcg_events(memcg, PGSTEAL_KSWAPD) +
- memcg_events(memcg, PGSTEAL_DIRECT) +
- memcg_events(memcg, PGSTEAL_PROACTIVE) +
- memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+ pgscan = memcg_events(memcg, PGSCAN_KSWAPD) +
+ memcg_events(memcg, PGSCAN_DIRECT) +
+ memcg_events(memcg, PGSCAN_PROACTIVE) +
+ memcg_events(memcg, PGSCAN_KHUGEPAGED);
+ pgsteal = memcg_events(memcg, PGSTEAL_KSWAPD) +
+ memcg_events(memcg, PGSTEAL_DIRECT) +
+ memcg_events(memcg, PGSTEAL_PROACTIVE) +
+ memcg_events(memcg, PGSTEAL_KHUGEPAGED);
+
+ memcg_seq_buf_put_name_val(s, "pgscan", pgscan);
+ memcg_seq_buf_put_name_val(s, "pgsteal", pgsteal);
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
+ u64 events;
+
#ifdef CONFIG_MEMCG_V1
if (memcg_vm_event_stat[i] == PGPGIN ||
memcg_vm_event_stat[i] == PGPGOUT)
continue;
#endif
- seq_buf_printf(s, "%s %lu\n",
- vm_event_name(memcg_vm_event_stat[i]),
- memcg_events(memcg, memcg_vm_event_stat[i]));
+ events = memcg_events(memcg, memcg_vm_event_stat[i]);
+ memcg_seq_buf_put_name_val(s,
+ vm_event_name(memcg_vm_event_stat[i]),
+ events);
}
}
@@ -4218,10 +4284,12 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
{
- if (value == PAGE_COUNTER_MAX)
+ if (value == PAGE_COUNTER_MAX) {
seq_puts(m, "max\n");
- else
- seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE);
+ } else {
+ seq_put_decimal_ull(m, "", (u64)value * PAGE_SIZE);
+ seq_putc(m, '\n');
+ }
return 0;
}
@@ -4247,7 +4315,8 @@ static int peak_show(struct seq_file *sf, void *v, struct page_counter *pc)
else
peak = max(fd_peak, READ_ONCE(pc->local_watermark));
- seq_printf(sf, "%llu\n", peak * PAGE_SIZE);
+ seq_put_decimal_ull(sf, "", peak * PAGE_SIZE);
+ seq_putc(sf, '\n');
return 0;
}
@@ -4480,16 +4549,24 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
*/
static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
{
- seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW]));
- seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH]));
- seq_printf(m, "max %lu\n", atomic_long_read(&events[MEMCG_MAX]));
- seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
- seq_printf(m, "oom_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_KILL]));
- seq_printf(m, "oom_group_kill %lu\n",
- atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
- seq_printf(m, "sock_throttled %lu\n",
- atomic_long_read(&events[MEMCG_SOCK_THROTTLED]));
+ u64 low, high, max, oom, oom_kill;
+ u64 oom_group_kill, sock_throttled;
+
+ low = atomic_long_read(&events[MEMCG_LOW]);
+ high = atomic_long_read(&events[MEMCG_HIGH]);
+ max = atomic_long_read(&events[MEMCG_MAX]);
+ oom = atomic_long_read(&events[MEMCG_OOM]);
+ oom_kill = atomic_long_read(&events[MEMCG_OOM_KILL]);
+ oom_group_kill = atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]);
+ sock_throttled = atomic_long_read(&events[MEMCG_SOCK_THROTTLED]);
+
+ memcg_seq_put_name_val(m, "low", low);
+ memcg_seq_put_name_val(m, "high", high);
+ memcg_seq_put_name_val(m, "max", max);
+ memcg_seq_put_name_val(m, "oom", oom);
+ memcg_seq_put_name_val(m, "oom_kill", oom_kill);
+ memcg_seq_put_name_val(m, "oom_group_kill", oom_group_kill);
+ memcg_seq_put_name_val(m, "sock_throttled", sock_throttled);
}
static int memory_events_show(struct seq_file *m, void *v)
@@ -4544,7 +4621,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
if (memory_stats[i].idx >= NR_VM_NODE_STAT_ITEMS)
continue;
- seq_printf(m, "%s", memory_stats[i].name);
+ seq_puts(m, memory_stats[i].name);
for_each_node_state(nid, N_MEMORY) {
u64 size;
struct lruvec *lruvec;
@@ -4552,7 +4629,10 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
size = lruvec_page_state_output(lruvec,
memory_stats[i].idx);
- seq_printf(m, " N%d=%llu", nid, size);
+
+ seq_put_decimal_ull(m, " N", nid);
+ seq_putc(m, '=');
+ seq_put_decimal_ull(m, "", size);
}
seq_putc(m, '\n');
}
@@ -4565,7 +4645,8 @@ static int memory_oom_group_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->oom_group));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->oom_group));
+ seq_putc(m, '\n');
return 0;
}
@@ -5372,13 +5453,15 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
static int swap_events_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+ u64 swap_high, swap_max, swap_fail;
+
+ swap_high = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]);
+ swap_max = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]);
+ swap_fail = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]);
- seq_printf(m, "high %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
- seq_printf(m, "max %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
- seq_printf(m, "fail %lu\n",
- atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
+ memcg_seq_put_name_val(m, "high", swap_high);
+ memcg_seq_put_name_val(m, "max", swap_max);
+ memcg_seq_put_name_val(m, "fail", swap_fail);
return 0;
}
@@ -5564,7 +5647,8 @@ static int zswap_writeback_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
- seq_printf(m, "%d\n", READ_ONCE(memcg->zswap_writeback));
+ seq_put_decimal_ll(m, "", READ_ONCE(memcg->zswap_writeback));
+ seq_putc(m, '\n');
return 0;
}
--
2.43.0
On 1/22/26 3:42 AM, Jianyue Wu wrote:
> Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
> printf parsing in memcg stats output.
>
> Key changes:
> - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting
> - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting
> - Update __memory_events_show(), swap_events_show(),
> memory_stat_format(), memory_numa_stat_show(), and related helpers
> - Introduce local variables to improve readability and reduce line length
>
> Performance:
> - 1M reads of memory.stat+memory.numa_stat
> - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
> - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop)
>
> Tests:
> - Script:
> for ((i=1; i<=1000000; i++)); do
> : > /dev/null < /sys/fs/cgroup/memory.stat
> : > /dev/null < /sys/fs/cgroup/memory.numa_stat
> done
>
> Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
> Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
> ---
Hi Jianyue,
I gave this patch a run and can confirm the perf gain. I left comments
on reducing the amount of added lines so that it better resembles the
existing code.
Tested-by: JP Kobryn <inwardvessel@gmail.com>
>
> Hi Shakeel,
>
> Thanks for the review! I've addressed your comments in v3 by moving the
> helper functions to memcontrol.c and adding kernel-doc documentation.
>
> Thanks,
> Jianyue
>
> Changes in v3:
> - Move memcg_seq_put_name_val() and memcg_seq_buf_put_name_val() from
> header (inline) to memcontrol.c and add kernel-doc documentation
> (Suggested by Shakeel Butt)
>
> Changes in v2:
> - Initial version with performance optimization
>
> mm/memcontrol-v1.c | 120 +++++++++++++++++++++------------
> mm/memcontrol-v1.h | 6 ++
> mm/memcontrol.c | 162 ++++++++++++++++++++++++++++++++++-----------
> 3 files changed, 205 insertions(+), 83 deletions(-)
>
> diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
> index 6eed14bff742..482475333876 100644
> --- a/mm/memcontrol-v1.c
> +++ b/mm/memcontrol-v1.c
> @@ -10,7 +10,7 @@
> #include <linux/poll.h>
> #include <linux/sort.h>
> #include <linux/file.h>
> -#include <linux/seq_buf.h>
> +#include <linux/string.h>
>
> #include "internal.h"
> #include "swap.h"
> @@ -1795,25 +1795,36 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
> mem_cgroup_flush_stats(memcg);
>
> for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
> - seq_printf(m, "%s=%lu", stat->name,
> - mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
> - false));
> - for_each_node_state(nid, N_MEMORY)
> - seq_printf(m, " N%d=%lu", nid,
> - mem_cgroup_node_nr_lru_pages(memcg, nid,
> - stat->lru_mask, false));
> + u64 nr_pages;
> +
> + seq_puts(m, stat->name);
> + nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
> + false);
> + seq_put_decimal_ull(m, "=", nr_pages);
> + for_each_node_state(nid, N_MEMORY) {
> + nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
> + stat->lru_mask,
> + false);
> + seq_put_decimal_ull(m, " N", nid);
> + seq_put_decimal_ull(m, "=", nr_pages);
> + }
> seq_putc(m, '\n');
There's a recurring pattern of 1) put name, 2) put separator, 3) put
value. Instead of adding so many new lines, I wonder if you could use a
function or macro that accepts: char *name, char sep, u64 val. You could
then use it as a replacement for seq_printf() and avoid the extra added
lines here and throughout this patch.
> }
>
> for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
> -
> - seq_printf(m, "hierarchical_%s=%lu", stat->name,
> - mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
> - true));
> - for_each_node_state(nid, N_MEMORY)
> - seq_printf(m, " N%d=%lu", nid,
> - mem_cgroup_node_nr_lru_pages(memcg, nid,
> - stat->lru_mask, true));
> + u64 nr_pages;
> +
> + seq_puts(m, "hierarchical_");
> + seq_puts(m, stat->name);
> + nr_pages = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask, true);
> + seq_put_decimal_ull(m, "=", nr_pages);
> + for_each_node_state(nid, N_MEMORY) {
> + nr_pages = mem_cgroup_node_nr_lru_pages(memcg, nid,
> + stat->lru_mask,
> + true);
> + seq_put_decimal_ull(m, " N", nid);
> + seq_put_decimal_ull(m, "=", nr_pages);
> + }
> seq_putc(m, '\n');
> }
>
> @@ -1870,6 +1881,7 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> unsigned long memory, memsw;
> struct mem_cgroup *mi;
> unsigned int i;
> + u64 memory_limit, memsw_limit;
>
> BUILD_BUG_ON(ARRAY_SIZE(memcg1_stat_names) != ARRAY_SIZE(memcg1_stats));
>
> @@ -1879,17 +1891,24 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> unsigned long nr;
>
> nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
> - seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
> + memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
> }
>
> - for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> - seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
> - memcg_events_local(memcg, memcg1_events[i]));
> + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
> + u64 events;
>
> - for (i = 0; i < NR_LRU_LISTS; i++)
> - seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
> - memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> - PAGE_SIZE);
> + events = memcg_events_local(memcg, memcg1_events[i]);
> + memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
> + events);
> + }
> +
> + for (i = 0; i < NR_LRU_LISTS; i++) {
> + u64 nr_pages;
> +
> + nr_pages = memcg_page_state_local(memcg, NR_LRU_BASE + i) *
> + PAGE_SIZE;
> + memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
> + }
>
> /* Hierarchical information */
> memory = memsw = PAGE_COUNTER_MAX;
> @@ -1897,28 +1916,38 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> memory = min(memory, READ_ONCE(mi->memory.max));
> memsw = min(memsw, READ_ONCE(mi->memsw.max));
> }
> - seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
> - (u64)memory * PAGE_SIZE);
> - seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
> - (u64)memsw * PAGE_SIZE);
> + memory_limit = (u64)memory * PAGE_SIZE;
> + memsw_limit = (u64)memsw * PAGE_SIZE;
I don't think in this case these new local variables are improving
readability.
> +
> + memcg_seq_buf_put_name_val(s, "hierarchical_memory_limit",
> + memory_limit);
> + memcg_seq_buf_put_name_val(s, "hierarchical_memsw_limit",
> + memsw_limit);
>
> for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
> unsigned long nr;
>
> nr = memcg_page_state_output(memcg, memcg1_stats[i]);
> - seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
> - (u64)nr);
> + seq_buf_puts(s, "total_");
> + memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
I would try and combine these 2 calls into 1 if possible. If the diff
has close to a -1:+1 line change in places where seq_buf_printf() is
replaced with some helper, it would reduce the noisiness. This applies
to other areas where a prefix is put before calling a new helper.
> + }
> +
> + for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) {
> + u64 events;
> +
> + events = memcg_events(memcg, memcg1_events[i]);
> + seq_buf_puts(s, "total_");
> + memcg_seq_buf_put_name_val(s, vm_event_name(memcg1_events[i]),
> + events);
> }
>
> - for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
> - seq_buf_printf(s, "total_%s %llu\n",
> - vm_event_name(memcg1_events[i]),
> - (u64)memcg_events(memcg, memcg1_events[i]));
> + for (i = 0; i < NR_LRU_LISTS; i++) {
> + u64 nr_pages;
>
> - for (i = 0; i < NR_LRU_LISTS; i++)
> - seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
> - (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
> - PAGE_SIZE);
> + nr_pages = memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE;
> + seq_buf_puts(s, "total_");
> + memcg_seq_buf_put_name_val(s, lru_list_name(i), nr_pages);
> + }
>
> #ifdef CONFIG_DEBUG_VM
> {
> @@ -1933,8 +1962,8 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> anon_cost += mz->lruvec.anon_cost;
> file_cost += mz->lruvec.file_cost;
> }
> - seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
> - seq_buf_printf(s, "file_cost %lu\n", file_cost);
> + memcg_seq_buf_put_name_val(s, "anon_cost", (u64)anon_cost);
> + memcg_seq_buf_put_name_val(s, "file_cost", (u64)file_cost);
> }
> #endif
> }
> @@ -1968,11 +1997,14 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
> static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v)
> {
> struct mem_cgroup *memcg = mem_cgroup_from_seq(sf);
> + u64 oom_kill;
> +
> + memcg_seq_put_name_val(sf, "oom_kill_disable",
> + READ_ONCE(memcg->oom_kill_disable));
> + memcg_seq_put_name_val(sf, "under_oom", (bool)memcg->under_oom);
>
> - seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable));
> - seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
> - seq_printf(sf, "oom_kill %lu\n",
> - atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
> + oom_kill = atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]);
> + memcg_seq_put_name_val(sf, "oom_kill", oom_kill);
New local variable just adding extra lines.
> return 0;
> }
>
> diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
> index 6358464bb416..46f198a81761 100644
> --- a/mm/memcontrol-v1.h
> +++ b/mm/memcontrol-v1.h
> @@ -4,6 +4,9 @@
> #define __MM_MEMCONTROL_V1_H
>
> #include <linux/cgroup-defs.h>
> +#include <linux/seq_buf.h>
> +#include <linux/seq_file.h>
> +#include <linux/sprintf.h>
>
> /* Cgroup v1 and v2 common declarations */
>
> @@ -33,6 +36,9 @@ int memory_stat_show(struct seq_file *m, void *v);
> void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
> struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
>
> +void memcg_seq_put_name_val(struct seq_file *m, const char *name, u64 val);
> +void memcg_seq_buf_put_name_val(struct seq_buf *s, const char *name, u64 val);
> +
> /* Cgroup v1-specific declarations */
> #ifdef CONFIG_MEMCG_V1
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 86f43b7e5f71..0bc244c5a570 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -42,6 +42,7 @@
> #include <linux/bit_spinlock.h>
> #include <linux/rcupdate.h>
> #include <linux/limits.h>
> +#include <linux/sprintf.h>
> #include <linux/export.h>
> #include <linux/list.h>
> #include <linux/mutex.h>
> @@ -1460,9 +1461,70 @@ static bool memcg_accounts_hugetlb(void)
> }
> #endif /* CONFIG_HUGETLB_PAGE */
>
> +/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
> +#define MEMCG_DEC_U64_MAX_LEN 20
> +
> +/**
> + * memcg_seq_put_name_val - Write a name-value pair to a seq_file
> + * @m: The seq_file to write to
> + * @name: The name string (not null-terminated required, uses seq_puts)
> + * @val: The u64 value to write
> + *
> + * This helper formats and writes a "name value\n" line to a seq_file,
> + * commonly used for cgroup statistics output. The value is efficiently
> + * converted to decimal using seq_put_decimal_ull.
> + *
> + * Output format: "<name> <value>\n"
> + * Example: "anon 1048576\n"
> + */
> +void memcg_seq_put_name_val(struct seq_file *m, const char *name, u64 val)
> +{
> + seq_puts(m, name);
> + /* need a space between name and value */
> + seq_put_decimal_ull(m, " ", val);
> + seq_putc(m, '\n');
I think seq_put* calls normally don't imply a newline. Maybe change the
name to reflect, like something with "print"? Also, it's not really
memcg specific.
This function has a space as a separator. Earlier in your diff you were
using '='. A separator parameter could allow this func to be used
elsewhere, but you'd have to manage the newline somehow. Maybe a newline
wrapper?
> +}
> +
> +/**
> + * memcg_seq_buf_put_name_val - Write a name-value pair to a seq_buf
> + * @s: The seq_buf to write to
> + * @name: The name string to write
> + * @val: The u64 value to write
> + *
> + * This helper formats and writes a "name value\n" line to a seq_buf.
> + * Unlike memcg_seq_put_name_val which uses seq_file's built-in formatting,
> + * this function manually converts the value to a string using num_to_str
> + * and writes it using seq_buf primitives for better performance when
> + * batching multiple writes to a seq_buf.
> + *
> + * The function checks for overflow at each step and returns early if
> + * any operation would cause the buffer to overflow.
> + *
> + * Output format: "<name> <value>\n"
> + * Example: "file 2097152\n"
> + */
> +void memcg_seq_buf_put_name_val(struct seq_buf *s, const char *name, u64 val)
> +{
> + char num_buf[MEMCG_DEC_U64_MAX_LEN];
> + int num_len;
> +
> + num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
> + if (num_len <= 0)
> + return;
> +
> + if (seq_buf_puts(s, name))
> + return;
> + if (seq_buf_putc(s, ' '))
> + return;
Can num_buf[0] just be ' '? The length would have to be extended though.
Not sure if saving a few seq_buf_putc() calls make a difference.
> + if (seq_buf_putmem(s, num_buf, num_len))
> + return;
> + seq_buf_putc(s, '\n');
Similary, though I'm not sure if it even performs better, this call
could be removed and can do num_buf[num_len+1] = '\n' (extend buf
again).
If you make the two changes above you can call seq_buf_putmem() last.
> +}
> +
> static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> {
> int i;
> + u64 pgscan, pgsteal;
>
> /*
> * Provide statistics on the state of the memory subsystem as
> @@ -1485,36 +1547,40 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> continue;
> #endif
> size = memcg_page_state_output(memcg, memory_stats[i].idx);
> - seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
> + memcg_seq_buf_put_name_val(s, memory_stats[i].name, size);
>
> if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
> size += memcg_page_state_output(memcg,
> NR_SLAB_RECLAIMABLE_B);
> - seq_buf_printf(s, "slab %llu\n", size);
> + memcg_seq_buf_put_name_val(s, "slab", size);
> }
> }
>
> /* Accumulated memory events */
> - seq_buf_printf(s, "pgscan %lu\n",
> - memcg_events(memcg, PGSCAN_KSWAPD) +
> - memcg_events(memcg, PGSCAN_DIRECT) +
> - memcg_events(memcg, PGSCAN_PROACTIVE) +
> - memcg_events(memcg, PGSCAN_KHUGEPAGED));
> - seq_buf_printf(s, "pgsteal %lu\n",
> - memcg_events(memcg, PGSTEAL_KSWAPD) +
> - memcg_events(memcg, PGSTEAL_DIRECT) +
> - memcg_events(memcg, PGSTEAL_PROACTIVE) +
> - memcg_events(memcg, PGSTEAL_KHUGEPAGED));
> + pgscan = memcg_events(memcg, PGSCAN_KSWAPD) +
> + memcg_events(memcg, PGSCAN_DIRECT) +
> + memcg_events(memcg, PGSCAN_PROACTIVE) +
> + memcg_events(memcg, PGSCAN_KHUGEPAGED);
> + pgsteal = memcg_events(memcg, PGSTEAL_KSWAPD) +
> + memcg_events(memcg, PGSTEAL_DIRECT) +
> + memcg_events(memcg, PGSTEAL_PROACTIVE) +
> + memcg_events(memcg, PGSTEAL_KHUGEPAGED);
More extra local variables. You can save the lines instead.
> +
> + memcg_seq_buf_put_name_val(s, "pgscan", pgscan);
> + memcg_seq_buf_put_name_val(s, "pgsteal", pgsteal);
>
> for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
> + u64 events;
> +
> #ifdef CONFIG_MEMCG_V1
> if (memcg_vm_event_stat[i] == PGPGIN ||
> memcg_vm_event_stat[i] == PGPGOUT)
> continue;
> #endif
> - seq_buf_printf(s, "%s %lu\n",
> - vm_event_name(memcg_vm_event_stat[i]),
> - memcg_events(memcg, memcg_vm_event_stat[i]));
> + events = memcg_events(memcg, memcg_vm_event_stat[i]);
> + memcg_seq_buf_put_name_val(s,
> + vm_event_name(memcg_vm_event_stat[i]),
> + events);
> }
> }
>
> @@ -4218,10 +4284,12 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset)
>
> static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value)
> {
> - if (value == PAGE_COUNTER_MAX)
> + if (value == PAGE_COUNTER_MAX) {
> seq_puts(m, "max\n");
> - else
> - seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE);
> + } else {
> + seq_put_decimal_ull(m, "", (u64)value * PAGE_SIZE);
> + seq_putc(m, '\n');
> + }
>
> return 0;
> }
> @@ -4247,7 +4315,8 @@ static int peak_show(struct seq_file *sf, void *v, struct page_counter *pc)
> else
> peak = max(fd_peak, READ_ONCE(pc->local_watermark));
>
> - seq_printf(sf, "%llu\n", peak * PAGE_SIZE);
> + seq_put_decimal_ull(sf, "", peak * PAGE_SIZE);
> + seq_putc(sf, '\n');
Your benchmark mentions reading memory and numa stat files, but this
function is not reached in those cases. Is this a hot path for you? If
not, maybe just leave this and any others like it alone.
> return 0;
> }
>
> @@ -4480,16 +4549,24 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
> */
> static void __memory_events_show(struct seq_file *m, atomic_long_t *events)
> {
> - seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW]));
> - seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH]));
> - seq_printf(m, "max %lu\n", atomic_long_read(&events[MEMCG_MAX]));
> - seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM]));
> - seq_printf(m, "oom_kill %lu\n",
> - atomic_long_read(&events[MEMCG_OOM_KILL]));
> - seq_printf(m, "oom_group_kill %lu\n",
> - atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]));
> - seq_printf(m, "sock_throttled %lu\n",
> - atomic_long_read(&events[MEMCG_SOCK_THROTTLED]));
> + u64 low, high, max, oom, oom_kill;
> + u64 oom_group_kill, sock_throttled;
> +
> + low = atomic_long_read(&events[MEMCG_LOW]);
> + high = atomic_long_read(&events[MEMCG_HIGH]);
> + max = atomic_long_read(&events[MEMCG_MAX]);
> + oom = atomic_long_read(&events[MEMCG_OOM]);
> + oom_kill = atomic_long_read(&events[MEMCG_OOM_KILL]);
> + oom_group_kill = atomic_long_read(&events[MEMCG_OOM_GROUP_KILL]);
> + sock_throttled = atomic_long_read(&events[MEMCG_SOCK_THROTTLED]);
Same, more new locals.
> +
> + memcg_seq_put_name_val(m, "low", low);
> + memcg_seq_put_name_val(m, "high", high);
> + memcg_seq_put_name_val(m, "max", max);
> + memcg_seq_put_name_val(m, "oom", oom);
> + memcg_seq_put_name_val(m, "oom_kill", oom_kill);
> + memcg_seq_put_name_val(m, "oom_group_kill", oom_group_kill);
> + memcg_seq_put_name_val(m, "sock_throttled", sock_throttled);
> }
>
> static int memory_events_show(struct seq_file *m, void *v)
> @@ -4544,7 +4621,7 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
> if (memory_stats[i].idx >= NR_VM_NODE_STAT_ITEMS)
> continue;
>
> - seq_printf(m, "%s", memory_stats[i].name);
> + seq_puts(m, memory_stats[i].name);
> for_each_node_state(nid, N_MEMORY) {
> u64 size;
> struct lruvec *lruvec;
> @@ -4552,7 +4629,10 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
> lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
> size = lruvec_page_state_output(lruvec,
> memory_stats[i].idx);
> - seq_printf(m, " N%d=%llu", nid, size);
> +
> + seq_put_decimal_ull(m, " N", nid);
> + seq_putc(m, '=');
> + seq_put_decimal_ull(m, "", size);
> }
> seq_putc(m, '\n');
> }
> @@ -4565,7 +4645,8 @@ static int memory_oom_group_show(struct seq_file *m, void *v)
> {
> struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
>
> - seq_printf(m, "%d\n", READ_ONCE(memcg->oom_group));
> + seq_put_decimal_ll(m, "", READ_ONCE(memcg->oom_group));
> + seq_putc(m, '\n');
>
> return 0;
> }
> @@ -5372,13 +5453,15 @@ static ssize_t swap_max_write(struct kernfs_open_file *of,
> static int swap_events_show(struct seq_file *m, void *v)
> {
> struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
> + u64 swap_high, swap_max, swap_fail;
> +
> + swap_high = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]);
> + swap_max = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]);
> + swap_fail = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]);
Same, new local variables.
>
> - seq_printf(m, "high %lu\n",
> - atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]));
> - seq_printf(m, "max %lu\n",
> - atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]));
> - seq_printf(m, "fail %lu\n",
> - atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]));
> + memcg_seq_put_name_val(m, "high", swap_high);
> + memcg_seq_put_name_val(m, "max", swap_max);
> + memcg_seq_put_name_val(m, "fail", swap_fail);
>
> return 0;
> }
> @@ -5564,7 +5647,8 @@ static int zswap_writeback_show(struct seq_file *m, void *v)
> {
> struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
>
> - seq_printf(m, "%d\n", READ_ONCE(memcg->zswap_writeback));
> + seq_put_decimal_ll(m, "", READ_ONCE(memcg->zswap_writeback));
> + seq_putc(m, '\n');
> return 0;
> }
>
This patch optimizes memcg stat output by replacing seq_buf_printf() with a lightweight helper and replacing seq_printf() in numa stat functions with direct seq operations. Changes in v4: - Embed separator and newline in buffer to reduce function calls in memcg_seq_buf_print_stat() (suggested by JP Kobryn) - Optimize memory_numa_stat_show() and memcg_numa_stat_show() by replacing seq_printf() with seq_puts() and seq_put_decimal_ull() - Add comments explaining the optimization approach - Note: Did not add a separate API for the numa stat case because the output format "N0=value0 N1=value1" is too specific - the "N" prefix and node ID are separate values that don't fit a name=value pattern. Using seq_put_decimal_ull() directly is more flexible and clear. Changes in v3: - Rebased to latest mm-unstable - Updated commit message to clarify the optimization approach Changes in v2: - Initial version with seq_buf optimization Performance improvement (1M reads of memory.stat + memory.numa_stat): - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s - After: real 0m8.909s, user 0m4.661s, sys 0m4.247s - Result: ~11% sys time reduction Jianyue Wu (1): mm: optimize stat output for 11% sys time reduce mm/memcontrol-v1.c | 84 ++++++++++++++++++++++++------------------- mm/memcontrol-v1.h | 4 +++ mm/memcontrol.c | 88 +++++++++++++++++++++++++++++++++++++--------- 3 files changed, 123 insertions(+), 53 deletions(-) -- 2.43.0
Replace seq_buf_printf() calls in memcg stat formatting with a
lightweight helper function and replace seq_printf() in numa stat
output with direct seq operations to avoid printf format parsing
overhead.
Key changes:
- Add memcg_seq_buf_print_stat() helper that embeds separator and
newline directly in the number buffer to reduce function calls
- Replace seq_buf_printf() in memcg_stat_format() with
memcg_seq_buf_print_stat()
- Replace seq_printf() in memory_numa_stat_show() and
memcg_numa_stat_show() with seq_puts() and seq_put_decimal_ull()
- Update memory_stat_format() and related v1 stats formatting
Performance improvement (1M reads of memory.stat + memory.numa_stat):
- Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s
- After: real 0m8.909s, user 0m4.661s, sys 0m4.247s
- Result: ~11% sys time reduction
Test script:
for ((i=1; i<=1000000; i++)); do
: > /dev/null < /sys/fs/cgroup/memory.stat
: > /dev/null < /sys/fs/cgroup/memory.numa_stat
done
Suggested-by: JP Kobryn <inwardvessel@gmail.com>
Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Tested-by: JP Kobryn <inwardvessel@gmail.com>
Signed-off-by: Jianyue Wu <wujianyue000@gmail.com>
---
mm/memcontrol-v1.c | 84 ++++++++++++++++++++++++-------------------
mm/memcontrol-v1.h | 4 +++
mm/memcontrol.c | 88 +++++++++++++++++++++++++++++++++++++---------
3 files changed, 123 insertions(+), 53 deletions(-)
diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c
index 6eed14bff742..5efea38da69e 100644
--- a/mm/memcontrol-v1.c
+++ b/mm/memcontrol-v1.c
@@ -10,7 +10,7 @@
#include <linux/poll.h>
#include <linux/sort.h>
#include <linux/file.h>
-#include <linux/seq_buf.h>
+#include <linux/string.h>
#include "internal.h"
#include "swap.h"
@@ -1794,26 +1794,39 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
mem_cgroup_flush_stats(memcg);
+ /*
+ * Output format: "stat_name=value N0=value0 N1=value1 ...\n"
+ * Use seq_puts and seq_put_decimal_ull to avoid printf format
+ * parsing overhead in this hot path.
+ */
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
- seq_printf(m, "%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- false));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, false));
+ seq_puts(m, stat->name);
+ seq_put_decimal_ull(m, "=",
+ mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ false));
+ for_each_node_state(nid, N_MEMORY) {
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=",
+ mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ false));
+ }
seq_putc(m, '\n');
}
for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
-
- seq_printf(m, "hierarchical_%s=%lu", stat->name,
- mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
- true));
- for_each_node_state(nid, N_MEMORY)
- seq_printf(m, " N%d=%lu", nid,
- mem_cgroup_node_nr_lru_pages(memcg, nid,
- stat->lru_mask, true));
+ seq_puts(m, "hierarchical_");
+ seq_puts(m, stat->name);
+ seq_put_decimal_ull(m, "=",
+ mem_cgroup_nr_lru_pages(memcg, stat->lru_mask,
+ true));
+ for_each_node_state(nid, N_MEMORY) {
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=",
+ mem_cgroup_node_nr_lru_pages(memcg, nid,
+ stat->lru_mask,
+ true));
+ }
seq_putc(m, '\n');
}
@@ -1879,17 +1892,17 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
unsigned long nr;
nr = memcg_page_state_local_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr);
+ memcg_seq_buf_print_stat(s, NULL, memcg1_stat_names[i], ' ', (u64)nr);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]),
- memcg_events_local(memcg, memcg1_events[i]));
+ memcg_seq_buf_print_stat(s, NULL, vm_event_name(memcg1_events[i]),
+ ' ', memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "%s %lu\n", lru_list_name(i),
- memcg_page_state_local(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ memcg_seq_buf_print_stat(s, NULL, lru_list_name(i), ' ',
+ memcg_page_state_local(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);
/* Hierarchical information */
memory = memsw = PAGE_COUNTER_MAX;
@@ -1897,28 +1910,27 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memory = min(memory, READ_ONCE(mi->memory.max));
memsw = min(memsw, READ_ONCE(mi->memsw.max));
}
- seq_buf_printf(s, "hierarchical_memory_limit %llu\n",
- (u64)memory * PAGE_SIZE);
- seq_buf_printf(s, "hierarchical_memsw_limit %llu\n",
- (u64)memsw * PAGE_SIZE);
+ memcg_seq_buf_print_stat(s, NULL, "hierarchical_memory_limit", ' ',
+ (u64)memory * PAGE_SIZE);
+ memcg_seq_buf_print_stat(s, NULL, "hierarchical_memsw_limit", ' ',
+ (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) {
unsigned long nr;
nr = memcg_page_state_output(memcg, memcg1_stats[i]);
- seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i],
- (u64)nr);
+ memcg_seq_buf_print_stat(s, "total_", memcg1_stat_names[i], ' ',
+ (u64)nr);
}
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++)
- seq_buf_printf(s, "total_%s %llu\n",
- vm_event_name(memcg1_events[i]),
- (u64)memcg_events(memcg, memcg1_events[i]));
+ memcg_seq_buf_print_stat(s, "total_", vm_event_name(memcg1_events[i]),
+ ' ', (u64)memcg_events(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++)
- seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i),
- (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
- PAGE_SIZE);
+ memcg_seq_buf_print_stat(s, "total_", lru_list_name(i), ' ',
+ (u64)memcg_page_state(memcg, NR_LRU_BASE + i) *
+ PAGE_SIZE);
#ifdef CONFIG_DEBUG_VM
{
@@ -1933,8 +1945,8 @@ void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
anon_cost += mz->lruvec.anon_cost;
file_cost += mz->lruvec.file_cost;
}
- seq_buf_printf(s, "anon_cost %lu\n", anon_cost);
- seq_buf_printf(s, "file_cost %lu\n", file_cost);
+ memcg_seq_buf_print_stat(s, NULL, "anon_cost", ' ', (u64)anon_cost);
+ memcg_seq_buf_print_stat(s, NULL, "file_cost", ' ', (u64)file_cost);
}
#endif
}
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb416..b1015cbe858f 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -4,6 +4,7 @@
#define __MM_MEMCONTROL_V1_H
#include <linux/cgroup-defs.h>
+#include <linux/seq_buf.h>
/* Cgroup v1 and v2 common declarations */
@@ -33,6 +34,9 @@ int memory_stat_show(struct seq_file *m, void *v);
void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg);
+void memcg_seq_buf_print_stat(struct seq_buf *s, const char *prefix,
+ const char *name, char sep, u64 val);
+
/* Cgroup v1-specific declarations */
#ifdef CONFIG_MEMCG_V1
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86f43b7e5f71..136c08462cd1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -42,6 +42,7 @@
#include <linux/bit_spinlock.h>
#include <linux/rcupdate.h>
#include <linux/limits.h>
+#include <linux/sprintf.h>
#include <linux/export.h>
#include <linux/list.h>
#include <linux/mutex.h>
@@ -1460,6 +1461,53 @@ static bool memcg_accounts_hugetlb(void)
}
#endif /* CONFIG_HUGETLB_PAGE */
+/* Max 2^64 - 1 = 18446744073709551615 (20 digits) */
+#define MEMCG_DEC_U64_MAX_LEN 20
+
+/**
+ * memcg_seq_buf_print_stat - Write a name-value pair to a seq_buf with newline
+ * @s: The seq_buf to write to
+ * @prefix: Optional prefix string (can be NULL or "")
+ * @name: The name string to write
+ * @sep: Separator character between name and value (typically ' ' or '=')
+ * @val: The u64 value to write
+ *
+ * This helper efficiently formats and writes "<prefix><name><sep><value>\n"
+ * to a seq_buf. It manually converts the value to a string using num_to_str
+ * and embeds the separator and newline in the buffer to minimize function
+ * calls for better performance.
+ *
+ * The function checks for overflow at each step and returns early if any
+ * operation would cause the buffer to overflow.
+ *
+ * Example: memcg_seq_buf_print_stat(s, "total_", "cache", ' ', 1048576)
+ * Output: "total_cache 1048576\n"
+ */
+void memcg_seq_buf_print_stat(struct seq_buf *s, const char *prefix,
+ const char *name, char sep, u64 val)
+{
+ char num_buf[MEMCG_DEC_U64_MAX_LEN + 2]; /* +2 for separator and newline */
+ int num_len;
+
+ /* Embed separator at the beginning */
+ num_buf[0] = sep;
+
+ /* Convert number starting at offset 1 */
+ num_len = num_to_str(num_buf + 1, sizeof(num_buf) - 2, val, 0);
+ if (num_len <= 0)
+ return;
+
+ /* Embed newline at the end */
+ num_buf[num_len + 1] = '\n';
+
+ if (prefix && *prefix && seq_buf_puts(s, prefix))
+ return;
+ if (seq_buf_puts(s, name))
+ return;
+ /* Output separator, value, and newline in one call */
+ seq_buf_putmem(s, num_buf, num_len + 2);
+}
+
static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
{
int i;
@@ -1485,26 +1533,26 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
continue;
#endif
size = memcg_page_state_output(memcg, memory_stats[i].idx);
- seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
+ memcg_seq_buf_print_stat(s, NULL, memory_stats[i].name, ' ', size);
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
size += memcg_page_state_output(memcg,
NR_SLAB_RECLAIMABLE_B);
- seq_buf_printf(s, "slab %llu\n", size);
+ memcg_seq_buf_print_stat(s, NULL, "slab", ' ', size);
}
}
/* Accumulated memory events */
- seq_buf_printf(s, "pgscan %lu\n",
- memcg_events(memcg, PGSCAN_KSWAPD) +
- memcg_events(memcg, PGSCAN_DIRECT) +
- memcg_events(memcg, PGSCAN_PROACTIVE) +
- memcg_events(memcg, PGSCAN_KHUGEPAGED));
- seq_buf_printf(s, "pgsteal %lu\n",
- memcg_events(memcg, PGSTEAL_KSWAPD) +
- memcg_events(memcg, PGSTEAL_DIRECT) +
- memcg_events(memcg, PGSTEAL_PROACTIVE) +
- memcg_events(memcg, PGSTEAL_KHUGEPAGED));
+ memcg_seq_buf_print_stat(s, NULL, "pgscan", ' ',
+ memcg_events(memcg, PGSCAN_KSWAPD) +
+ memcg_events(memcg, PGSCAN_DIRECT) +
+ memcg_events(memcg, PGSCAN_PROACTIVE) +
+ memcg_events(memcg, PGSCAN_KHUGEPAGED));
+ memcg_seq_buf_print_stat(s, NULL, "pgsteal", ' ',
+ memcg_events(memcg, PGSTEAL_KSWAPD) +
+ memcg_events(memcg, PGSTEAL_DIRECT) +
+ memcg_events(memcg, PGSTEAL_PROACTIVE) +
+ memcg_events(memcg, PGSTEAL_KHUGEPAGED));
for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
#ifdef CONFIG_MEMCG_V1
@@ -1512,9 +1560,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
memcg_vm_event_stat[i] == PGPGOUT)
continue;
#endif
- seq_buf_printf(s, "%s %lu\n",
- vm_event_name(memcg_vm_event_stat[i]),
- memcg_events(memcg, memcg_vm_event_stat[i]));
+ memcg_seq_buf_print_stat(s, NULL,
+ vm_event_name(memcg_vm_event_stat[i]), ' ',
+ memcg_events(memcg, memcg_vm_event_stat[i]));
}
}
@@ -4544,7 +4592,12 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
if (memory_stats[i].idx >= NR_VM_NODE_STAT_ITEMS)
continue;
- seq_printf(m, "%s", memory_stats[i].name);
+ /*
+ * Output format: "stat_name N0=value0 N1=value1 ...\n"
+ * Use seq_puts and seq_put_decimal_ull to avoid printf
+ * format parsing overhead in this hot path.
+ */
+ seq_puts(m, memory_stats[i].name);
for_each_node_state(nid, N_MEMORY) {
u64 size;
struct lruvec *lruvec;
@@ -4552,7 +4605,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
size = lruvec_page_state_output(lruvec,
memory_stats[i].idx);
- seq_printf(m, " N%d=%llu", nid, size);
+ seq_put_decimal_ull(m, " N", nid);
+ seq_put_decimal_ull(m, "=", size);
}
seq_putc(m, '\n');
}
--
2.43.0
On Fri, Jan 23, 2026 at 11:01 PM Jianyue Wu <wujianyue000@gmail.com> wrote: > > Replace seq_buf_printf() calls in memcg stat formatting with a > lightweight helper function and replace seq_printf() in numa stat > output with direct seq operations to avoid printf format parsing > overhead. Seems I still need to add optimization for memcg_numa_stat_show(), though the change there looks a bit complex, otherwise it didn't get enough perf gain..
On Fri, Jan 23, 2026 at 4:14 PM JP Kobryn <inwardvessel@gmail.com> wrote:
>
> On 1/22/26 3:42 AM, Jianyue Wu wrote:
> > Replace seq_printf/seq_buf_printf with lightweight helpers to avoid
> > printf parsing in memcg stats output.
>
> Hi Jianyue,
> I gave this patch a run and can confirm the perf gain. I left comments
> on reducing the amount of added lines so that it better resembles the
> existing code.
>
> Tested-by: JP Kobryn <inwardvessel@gmail.com>
>
Great, and thanks for the detailed review!
>
> There's a recurring pattern of 1) put name, 2) put separator, 3) put
> value. Instead of adding so many new lines, I wonder if you could use a
> function or macro that accepts: char *name, char sep, u64 val. You could
> then use it as a replacement for seq_printf() and avoid the extra added
> lines here and throughout this patch.
>
That's a good idea! Yes, if we use sep, then many places can use the
same function.
> > + memory_limit = (u64)memory * PAGE_SIZE;
> > + memsw_limit = (u64)memsw * PAGE_SIZE;
>
> I don't think in this case these new local variables are improving
> readability.
>
Agree, will remove it. Previously I wanted to keep them inside 80 columns,
as a hack:) Indent is not so easy to change.
> > + seq_buf_puts(s, "total_");
> > + memcg_seq_buf_put_name_val(s, memcg1_stat_names[i], (u64)nr);
>
> I would try and combine these 2 calls into 1 if possible. If the diff
> has close to a -1:+1 line change in places where seq_buf_printf() is
> replaced with some helper, it would reduce the noisiness. This applies
> to other areas where a prefix is put before calling a new helper.
>
Agree, I think we can add a prefix param here 0) prefix, 1) put name,
2) put separator, 3) put value. So we can use a common API.
> > + u64 oom_kill;
> > +
> > + memcg_seq_put_name_val(sf, "oom_kill_disable",
> > + READ_ONCE(memcg->oom_kill_disable));
> > + memcg_seq_put_name_val(sf, "under_oom", (bool)memcg->under_oom);
> >
> > - seq_printf(sf, "oom_kill_disable %d\n", READ_ONCE(memcg->oom_kill_disable));
> > - seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom);
> > - seq_printf(sf, "oom_kill %lu\n",
> > - atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]));
> > + oom_kill = atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL]);
> > + memcg_seq_put_name_val(sf, "oom_kill", oom_kill);
>
> New local variable just adding extra lines.
>
Agree, will remove it.
> > +void memcg_seq_put_name_val(struct seq_file *m, const char *name, u64 val)
> > +{
> > + seq_puts(m, name);
> > + /* need a space between name and value */
> > + seq_put_decimal_ull(m, " ", val);
> > + seq_putc(m, '\n');
>
> I think seq_put* calls normally don't imply a newline. Maybe change the
> name to reflect, like something with "print"? Also, it's not really
> memcg specific.
>
> This function has a space as a separator. Earlier in your diff you were
> using '='. A separator parameter could allow this func to be used
> elsewhere, but you'd have to manage the newline somehow. Maybe a newline
> wrapper?
I think a newline wrapper API might be an extra complexity, maybe this time
I'll keep changes only for which has a newline, places which still
don't have newline,
like memory.numa_stats print still kept using seq_printf() API.
But not sure how perf gain will be, I will test in the next version.
> > +void memcg_seq_buf_put_name_val(struct seq_buf *s, const char *name, u64 val)
> > +{
> > + char num_buf[MEMCG_DEC_U64_MAX_LEN];
> > + int num_len;
> > +
> > + num_len = num_to_str(num_buf, sizeof(num_buf), val, 0);
> > + if (num_len <= 0)
> > + return;
> > +
> > + if (seq_buf_puts(s, name))
> > + return;
> > + if (seq_buf_putc(s, ' '))
> > + return;
>
> Can num_buf[0] just be ' '? The length would have to be extended though.
> Not sure if saving a few seq_buf_putc() calls make a difference.
>
> > + if (seq_buf_putmem(s, num_buf, num_len))
> > + return;
> > + seq_buf_putc(s, '\n');
>
> Similary, though I'm not sure if it even performs better, this call
> could be removed and can do num_buf[num_len+1] = '\n' (extend buf
> again).
>
> If you make the two changes above you can call seq_buf_putmem() last.
Good catch~ Embedding the separator and newline in num_buf reduces
function calls from 4 to 2, perfect!
> > +}
> > +
> > static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
> > {
> > int i;
> > + u64 pgscan, pgsteal;
> >
> More extra local variables. You can save the lines instead.
>
Agree, will remove it.
> > @@ -4247,7 +4315,8 @@ static int peak_show(struct seq_file *sf, void *v, struct page_counter *pc)
> > else
> > peak = max(fd_peak, READ_ONCE(pc->local_watermark));
> >
> > - seq_printf(sf, "%llu\n", peak * PAGE_SIZE);
> > + seq_put_decimal_ull(sf, "", peak * PAGE_SIZE);
> > + seq_putc(sf, '\n');
>
> Your benchmark mentions reading memory and numa stat files, but this
> function is not reached in those cases. Is this a hot path for you? If
> not, maybe just leave this and any others like it alone.
>
This path is not touched by memory.stat. Agree, will remove the change.
> > + u64 low, high, max, oom, oom_kill;
> > + u64 oom_group_kill, sock_throttled;
> > +
> Same, more new locals.
Will remove them.
> > + u64 swap_high, swap_max, swap_fail;
> > +
> > + swap_high = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_HIGH]);
> > + swap_max = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_MAX]);
> > + swap_fail = atomic_long_read(&memcg->memory_events[MEMCG_SWAP_FAIL]);
>
> Same, new local variables.
Will remove them.
On Thu, 22 Jan 2026 19:42:42 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > Replace seq_printf/seq_buf_printf with lightweight helpers to avoid > printf parsing in memcg stats output. > > Key changes: > - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting > - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting > - Update __memory_events_show(), swap_events_show(), > memory_stat_format(), memory_numa_stat_show(), and related helpers > - Introduce local variables to improve readability and reduce line length > > Performance: > - 1M reads of memory.stat+memory.numa_stat > - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s > - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop) So the tl;dr here is "vfprintf() is slow". It's quite a large change, although not a complex one. Do we need to change so much? Would some subset of these changes provide most of the benefit? It does rather uglify things so there's a risk that helpful people will send "cleanups" which switch back to using *printf*. Explanatory code comments would help prevent that but we'd need a lot of them. I dunno, what do people think? Does the benefit justify the change? > Tests: > - Script: > for ((i=1; i<=1000000; i++)); do > : > /dev/null < /sys/fs/cgroup/memory.stat > : > /dev/null < /sys/fs/cgroup/memory.numa_stat > done
On Thu, Jan 22, 2026 at 09:13:51AM -0800, Andrew Morton wrote: > On Thu, 22 Jan 2026 19:42:42 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > > > Replace seq_printf/seq_buf_printf with lightweight helpers to avoid > > printf parsing in memcg stats output. > > > > Key changes: > > - Add memcg_seq_put_name_val() for seq_file "name value\n" formatting > > - Add memcg_seq_buf_put_name_val() for seq_buf "name value\n" formatting > > - Update __memory_events_show(), swap_events_show(), > > memory_stat_format(), memory_numa_stat_show(), and related helpers > > - Introduce local variables to improve readability and reduce line length > > > > Performance: > > - 1M reads of memory.stat+memory.numa_stat > > - Before: real 0m9.663s, user 0m4.840s, sys 0m4.823s > > - After: real 0m9.051s, user 0m4.775s, sys 0m4.275s (~11.4% sys drop) > > So the tl;dr here is "vfprintf() is slow". > > It's quite a large change, although not a complex one. > > Do we need to change so much? Would some subset of these changes > provide most of the benefit? > > It does rather uglify things so there's a risk that helpful people will > send "cleanups" which switch back to using *printf*. Explanatory code > comments would help prevent that but we'd need a lot of them. > > I dunno, what do people think? Does the benefit justify the change? It does come with significant benefit but there is no urgency and we can definitely decrease the ugliness. JP told me he has some ideas to improve this. Andrew, let's skip this patch for the upcoming merge window and you can drop it from mm-tree if it is a burden.
On Fri, Jan 23, 2026 at 5:12 AM Shakeel Butt <shakeel.butt@linux.dev> wrote: > > On Thu, Jan 22, 2026 at 09:13:51AM -0800, Andrew Morton wrote: > > On Thu, 22 Jan 2026 19:42:42 +0800 Jianyue Wu <wujianyue000@gmail.com> wrote: > > So the tl;dr here is "vfprintf() is slow". > > > > It's quite a large change, although not a complex one. > > > > Do we need to change so much? Would some subset of these changes > > provide most of the benefit? > > > > It does rather uglify things so there's a risk that helpful people will > > send "cleanups" which switch back to using *printf*. Explanatory code > > comments would help prevent that but we'd need a lot of them. > > > > I dunno, what do people think? Does the benefit justify the change? > > It does come with significant benefit but there is no urgency and we can > definitely decrease the ugliness. JP told me he has some ideas to > improve this. > > Andrew, let's skip this patch for the upcoming merge window and you can > drop it from mm-tree if it is a burden. > Agree. This touches a lot of code and increases complexity, which is a significant downside. If there are ideas to improve it, that’d be great. I’m OK with dropping it for the upcoming merge window, and drop it if it is a burden.
© 2016 - 2026 Red Hat, Inc.