include/linux/alloc_tag.h | 18 +++++++++++++++--- lib/Kconfig.debug | 10 ++++++++++ lib/alloc_tag.c | 10 +++++++++- 3 files changed, 34 insertions(+), 4 deletions(-)
Accumulative allocation counter can be used to evaluate
memory allocation behavior/rate via delta(counters)/delta(time).
It can help analysis performance issues, identify top modules
with high rate of memory allocation activity.
Considering this would incur extra performance and memory impact,
introduce kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER.
Signed-off-by: David Wang <00107082@163.com>
---
Changes in v2:
- Add kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER as
suggested by "Kent Overstreet <kent.overstreet@linux.dev>"
---
include/linux/alloc_tag.h | 18 +++++++++++++++---
lib/Kconfig.debug | 10 ++++++++++
lib/alloc_tag.c | 10 +++++++++-
3 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 8c61ccd161ba..5a94d61205b5 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -18,6 +18,9 @@
struct alloc_tag_counters {
u64 bytes;
u64 calls;
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ u64 accu_calls;
+#endif
};
/*
@@ -103,13 +106,19 @@ static inline bool mem_alloc_profiling_enabled(void)
static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
{
struct alloc_tag_counters v = { 0, 0 };
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ v.accu_calls = 0;
+#endif
struct alloc_tag_counters *counter;
int cpu;
for_each_possible_cpu(cpu) {
- counter = per_cpu_ptr(tag->counters, cpu);
- v.bytes += counter->bytes;
- v.calls += counter->calls;
+ counter = per_cpu_ptr(tag->counters, cpu);
+ v.bytes += counter->bytes;
+ v.calls += counter->calls;
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ v.accu_calls += counter->accu_calls;
+#endif
}
return v;
@@ -145,6 +154,9 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag
* counter because when we free each part the counter will be decremented.
*/
this_cpu_inc(tag->counters->calls);
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ this_cpu_inc(tag->counters->accu_calls);
+#endif
}
static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a30c03a66172..1e9974d28510 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1000,6 +1000,16 @@ config MEM_ALLOC_PROFILING_DEBUG
Adds warnings with helpful error messages for memory allocation
profiling.
+config MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ bool "Enable accumulative allocation counters"
+ default n
+ depends on MEM_ALLOC_PROFILING
+ help
+ Record accumulative call counters for memory allocation. This may have
+ extra performance and memory impact, but the impact is small.
+ The stat can be used to evaluate allocation activity/rate
+ via delta(counter)/delta(time).
+
source "lib/Kconfig.kasan"
source "lib/Kconfig.kfence"
source "lib/Kconfig.kmsan"
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 81e5f9a70f22..6b03edb04e7d 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -66,8 +66,12 @@ static void allocinfo_stop(struct seq_file *m, void *arg)
static void print_allocinfo_header(struct seq_buf *buf)
{
/* Output format version, so we can change it. */
- seq_buf_printf(buf, "allocinfo - version: 1.0\n");
+ seq_buf_printf(buf, "allocinfo - version: 1.1\n");
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ seq_buf_printf(buf, "# <size> <calls> <tag info> <accumulative calls>\n");
+#else
seq_buf_printf(buf, "# <size> <calls> <tag info>\n");
+#endif
}
static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
@@ -78,8 +82,12 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
codetag_to_text(out, ct);
+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
+ seq_buf_printf(out, " %llu\n", counter.accu_calls);
+#else
seq_buf_putc(out, ' ');
seq_buf_putc(out, '\n');
+#endif
}
static int allocinfo_show(struct seq_file *m, void *arg)
--
2.39.2
Hi,
I found another usage/benefit for accumulative counters:
On my system, /proc/allocinfo yields about 5065 lines, of which 2/3 lines have accumulative counter *0*.
meaning no memory activities. (right?)
It is quite a waste to keep those items which are *not alive yet*.
With additional changes, only 1684 lines in /proc/allocinfo on my system:
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -95,8 +95,11 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
struct alloc_tag_counters counter = alloc_tag_read(tag);
s64 bytes = counter.bytes;
+ if (counter.accu_calls == 0)
+ return;
seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
I think this is quite an improvement worth pursuing.
(counter.calls could also be used to filter out "inactive" items, but
lines keep disappearing/reappearing can confuse monitoring systems.)
Thanks~
David
At 2024-09-13 13:57:29, "David Wang" <00107082@163.com> wrote:
>Accumulative allocation counter can be used to evaluate
>memory allocation behavior/rate via delta(counters)/delta(time).
>It can help analysis performance issues, identify top modules
>with high rate of memory allocation activity.
>Considering this would incur extra performance and memory impact,
>introduce kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER.
>
>Signed-off-by: David Wang <00107082@163.com>
>---
>Changes in v2:
>- Add kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER as
>suggested by "Kent Overstreet <kent.overstreet@linux.dev>"
>
>---
> include/linux/alloc_tag.h | 18 +++++++++++++++---
> lib/Kconfig.debug | 10 ++++++++++
> lib/alloc_tag.c | 10 +++++++++-
> 3 files changed, 34 insertions(+), 4 deletions(-)
>
>diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
>index 8c61ccd161ba..5a94d61205b5 100644
>--- a/include/linux/alloc_tag.h
>+++ b/include/linux/alloc_tag.h
>@@ -18,6 +18,9 @@
> struct alloc_tag_counters {
> u64 bytes;
> u64 calls;
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ u64 accu_calls;
>+#endif
> };
>
> /*
>@@ -103,13 +106,19 @@ static inline bool mem_alloc_profiling_enabled(void)
> static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
> {
> struct alloc_tag_counters v = { 0, 0 };
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ v.accu_calls = 0;
>+#endif
> struct alloc_tag_counters *counter;
> int cpu;
>
> for_each_possible_cpu(cpu) {
>- counter = per_cpu_ptr(tag->counters, cpu);
>- v.bytes += counter->bytes;
>- v.calls += counter->calls;
>+ counter = per_cpu_ptr(tag->counters, cpu);
>+ v.bytes += counter->bytes;
>+ v.calls += counter->calls;
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ v.accu_calls += counter->accu_calls;
>+#endif
> }
>
> return v;
>@@ -145,6 +154,9 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag
> * counter because when we free each part the counter will be decremented.
> */
> this_cpu_inc(tag->counters->calls);
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ this_cpu_inc(tag->counters->accu_calls);
>+#endif
> }
>
> static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
>diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
>index a30c03a66172..1e9974d28510 100644
>--- a/lib/Kconfig.debug
>+++ b/lib/Kconfig.debug
>@@ -1000,6 +1000,16 @@ config MEM_ALLOC_PROFILING_DEBUG
> Adds warnings with helpful error messages for memory allocation
> profiling.
>
>+config MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ bool "Enable accumulative allocation counters"
>+ default n
>+ depends on MEM_ALLOC_PROFILING
>+ help
>+ Record accumulative call counters for memory allocation. This may have
>+ extra performance and memory impact, but the impact is small.
>+ The stat can be used to evaluate allocation activity/rate
>+ via delta(counter)/delta(time).
>+
> source "lib/Kconfig.kasan"
> source "lib/Kconfig.kfence"
> source "lib/Kconfig.kmsan"
>diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
>index 81e5f9a70f22..6b03edb04e7d 100644
>--- a/lib/alloc_tag.c
>+++ b/lib/alloc_tag.c
>@@ -66,8 +66,12 @@ static void allocinfo_stop(struct seq_file *m, void *arg)
> static void print_allocinfo_header(struct seq_buf *buf)
> {
> /* Output format version, so we can change it. */
>- seq_buf_printf(buf, "allocinfo - version: 1.0\n");
>+ seq_buf_printf(buf, "allocinfo - version: 1.1\n");
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ seq_buf_printf(buf, "# <size> <calls> <tag info> <accumulative calls>\n");
>+#else
> seq_buf_printf(buf, "# <size> <calls> <tag info>\n");
>+#endif
> }
>
> static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
>@@ -78,8 +82,12 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
>
> seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
> codetag_to_text(out, ct);
>+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
>+ seq_buf_printf(out, " %llu\n", counter.accu_calls);
>+#else
> seq_buf_putc(out, ' ');
> seq_buf_putc(out, '\n');
>+#endif
> }
>
> static int allocinfo_show(struct seq_file *m, void *arg)
>--
>2.39.2
On Wed, Dec 18, 2024 at 4:49 AM David Wang <00107082@163.com> wrote:
>
> Hi,
>
> I found another usage/benefit for accumulative counters:
>
> On my system, /proc/allocinfo yields about 5065 lines, of which 2/3 lines have accumulative counter *0*.
> meaning no memory activities. (right?)
> It is quite a waste to keep those items which are *not alive yet*.
> With additional changes, only 1684 lines in /proc/allocinfo on my system:
>
> --- a/lib/alloc_tag.c
> +++ b/lib/alloc_tag.c
> @@ -95,8 +95,11 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
> struct alloc_tag_counters counter = alloc_tag_read(tag);
> s64 bytes = counter.bytes;
>
> + if (counter.accu_calls == 0)
> + return;
> seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
>
>
> I think this is quite an improvement worth pursuing.
> (counter.calls could also be used to filter out "inactive" items, but
> lines keep disappearing/reappearing can confuse monitoring systems.)
Please see discussion at
https://lore.kernel.org/all/20241211085616.2471901-1-quic_zhenhuah@quicinc.com/
>
>
> Thanks~
> David
>
>
> At 2024-09-13 13:57:29, "David Wang" <00107082@163.com> wrote:
> >Accumulative allocation counter can be used to evaluate
> >memory allocation behavior/rate via delta(counters)/delta(time).
> >It can help analysis performance issues, identify top modules
> >with high rate of memory allocation activity.
> >Considering this would incur extra performance and memory impact,
> >introduce kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER.
> >
> >Signed-off-by: David Wang <00107082@163.com>
> >---
> >Changes in v2:
> >- Add kconfig MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER as
> >suggested by "Kent Overstreet <kent.overstreet@linux.dev>"
> >
> >---
> > include/linux/alloc_tag.h | 18 +++++++++++++++---
> > lib/Kconfig.debug | 10 ++++++++++
> > lib/alloc_tag.c | 10 +++++++++-
> > 3 files changed, 34 insertions(+), 4 deletions(-)
> >
> >diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
> >index 8c61ccd161ba..5a94d61205b5 100644
> >--- a/include/linux/alloc_tag.h
> >+++ b/include/linux/alloc_tag.h
> >@@ -18,6 +18,9 @@
> > struct alloc_tag_counters {
> > u64 bytes;
> > u64 calls;
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ u64 accu_calls;
> >+#endif
> > };
> >
> > /*
> >@@ -103,13 +106,19 @@ static inline bool mem_alloc_profiling_enabled(void)
> > static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
> > {
> > struct alloc_tag_counters v = { 0, 0 };
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ v.accu_calls = 0;
> >+#endif
> > struct alloc_tag_counters *counter;
> > int cpu;
> >
> > for_each_possible_cpu(cpu) {
> >- counter = per_cpu_ptr(tag->counters, cpu);
> >- v.bytes += counter->bytes;
> >- v.calls += counter->calls;
> >+ counter = per_cpu_ptr(tag->counters, cpu);
> >+ v.bytes += counter->bytes;
> >+ v.calls += counter->calls;
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ v.accu_calls += counter->accu_calls;
> >+#endif
> > }
> >
> > return v;
> >@@ -145,6 +154,9 @@ static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag
> > * counter because when we free each part the counter will be decremented.
> > */
> > this_cpu_inc(tag->counters->calls);
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ this_cpu_inc(tag->counters->accu_calls);
> >+#endif
> > }
> >
> > static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag)
> >diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
> >index a30c03a66172..1e9974d28510 100644
> >--- a/lib/Kconfig.debug
> >+++ b/lib/Kconfig.debug
> >@@ -1000,6 +1000,16 @@ config MEM_ALLOC_PROFILING_DEBUG
> > Adds warnings with helpful error messages for memory allocation
> > profiling.
> >
> >+config MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ bool "Enable accumulative allocation counters"
> >+ default n
> >+ depends on MEM_ALLOC_PROFILING
> >+ help
> >+ Record accumulative call counters for memory allocation. This may have
> >+ extra performance and memory impact, but the impact is small.
> >+ The stat can be used to evaluate allocation activity/rate
> >+ via delta(counter)/delta(time).
> >+
> > source "lib/Kconfig.kasan"
> > source "lib/Kconfig.kfence"
> > source "lib/Kconfig.kmsan"
> >diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
> >index 81e5f9a70f22..6b03edb04e7d 100644
> >--- a/lib/alloc_tag.c
> >+++ b/lib/alloc_tag.c
> >@@ -66,8 +66,12 @@ static void allocinfo_stop(struct seq_file *m, void *arg)
> > static void print_allocinfo_header(struct seq_buf *buf)
> > {
> > /* Output format version, so we can change it. */
> >- seq_buf_printf(buf, "allocinfo - version: 1.0\n");
> >+ seq_buf_printf(buf, "allocinfo - version: 1.1\n");
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ seq_buf_printf(buf, "# <size> <calls> <tag info> <accumulative calls>\n");
> >+#else
> > seq_buf_printf(buf, "# <size> <calls> <tag info>\n");
> >+#endif
> > }
> >
> > static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
> >@@ -78,8 +82,12 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct)
> >
> > seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls);
> > codetag_to_text(out, ct);
> >+#ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
> >+ seq_buf_printf(out, " %llu\n", counter.accu_calls);
> >+#else
> > seq_buf_putc(out, ' ');
> > seq_buf_putc(out, '\n');
> >+#endif
> > }
> >
> > static int allocinfo_show(struct seq_file *m, void *arg)
> >--
> >2.39.2
HI, At 2024-12-19 02:22:53, "Suren Baghdasaryan" <surenb@google.com> wrote: >On Wed, Dec 18, 2024 at 4:49 AM David Wang <00107082@163.com> wrote: >> >> Hi, >> >> I found another usage/benefit for accumulative counters: >> >> On my system, /proc/allocinfo yields about 5065 lines, of which 2/3 lines have accumulative counter *0*. >> meaning no memory activities. (right?) >> It is quite a waste to keep those items which are *not alive yet*. >> With additional changes, only 1684 lines in /proc/allocinfo on my system: >> >> --- a/lib/alloc_tag.c >> +++ b/lib/alloc_tag.c >> @@ -95,8 +95,11 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) >> struct alloc_tag_counters counter = alloc_tag_read(tag); >> s64 bytes = counter.bytes; >> >> + if (counter.accu_calls == 0) >> + return; >> seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); >> >> >> I think this is quite an improvement worth pursuing. >> (counter.calls could also be used to filter out "inactive" items, but >> lines keep disappearing/reappearing can confuse monitoring systems.) > >Please see discussion at >https://lore.kernel.org/all/20241211085616.2471901-1-quic_zhenhuah@quicinc.com/ Thanks for the information. > My point is that with this change we lose information which can be > useful. For example if I want to analyze all the places in the kernel > where memory can be potentially allocated, your change would prevent > me from doing that Maybe the filter can be disabled when DEBUG is on? > No, I disagree. Allocation that was never invoked is not the same as > no allocation at all. How would we know the difference if we filter > out the empty ones? Totally agree with this, I think (bytes || counter.calls) does not make good filter. Accumulative counter is the answer. :) > If you don't want to see all the unused sites, you can filter them in > the userspace. I also suspect that for practical purposes you would > want to filter small ones (below some threshold) as well. I have setup monitoring tool polling /proc/allocinfo every 5 seconds on my system, and it takes totally ~11ms and ~100 read syscalls just read out all the content in one round, and with (counter.accu_calls == 0) filter, it takes totally ~4.4ms and 34 read syscalls. it would be nice to have ~60% performance improvement.... Thanks David
On 2024/12/19 10:31, David Wang wrote: > HI, > At 2024-12-19 02:22:53, "Suren Baghdasaryan" <surenb@google.com> wrote: >> On Wed, Dec 18, 2024 at 4:49 AM David Wang <00107082@163.com> wrote: >>> >>> Hi, >>> >>> I found another usage/benefit for accumulative counters: >>> >>> On my system, /proc/allocinfo yields about 5065 lines, of which 2/3 lines have accumulative counter *0*. >>> meaning no memory activities. (right?) >>> It is quite a waste to keep those items which are *not alive yet*. >>> With additional changes, only 1684 lines in /proc/allocinfo on my system: >>> >>> --- a/lib/alloc_tag.c >>> +++ b/lib/alloc_tag.c >>> @@ -95,8 +95,11 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) >>> struct alloc_tag_counters counter = alloc_tag_read(tag); >>> s64 bytes = counter.bytes; >>> >>> + if (counter.accu_calls == 0) >>> + return; >>> seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); >>> >>> >>> I think this is quite an improvement worth pursuing. >>> (counter.calls could also be used to filter out "inactive" items, but >>> lines keep disappearing/reappearing can confuse monitoring systems.) >> >> Please see discussion at >> https://lore.kernel.org/all/20241211085616.2471901-1-quic_zhenhuah@quicinc.com/ > > Thanks for the information. > >> My point is that with this change we lose information which can be >> useful. For example if I want to analyze all the places in the kernel >> where memory can be potentially allocated, your change would prevent >> me from doing that > > Maybe the filter can be disabled when DEBUG is on? > > > No, I disagree. Allocation that was never invoked is not the same as >> no allocation at all. How would we know the difference if we filter >> out the empty ones? > > Totally agree with this, I think (bytes || counter.calls) does not make good filter. Accumulative counter is the answer. :) hmm... it really depends on the use case IMHO. If memory consumption is a concern, using counter.calls should suffice. However, for performance-related scenarios as you stated, it's definitely better to use an accumulative counter." Both of these can't address Suren's comment: "if I want to analyze all the places in the kernel where memory can be potentially allocated, your change would prevent me from doing that", but. > >> If you don't want to see all the unused sites, you can filter them in >> the userspace. I also suspect that for practical purposes you would >> want to filter small ones (below some threshold) as well. > > I have setup monitoring tool polling /proc/allocinfo every 5 seconds on my system, > and it takes totally ~11ms and ~100 read syscalls just read out all the content in one round, > and with (counter.accu_calls == 0) filter, it takes totally ~4.4ms and 34 read syscalls. > it would be nice to have ~60% performance improvement.... > > > > Thanks > David
At 2024-12-19 12:06:07, "Zhenhua Huang" <quic_zhenhuah@quicinc.com> wrote: > > >On 2024/12/19 10:31, David Wang wrote: >> HI, >> At 2024-12-19 02:22:53, "Suren Baghdasaryan" <surenb@google.com> wrote: >>> On Wed, Dec 18, 2024 at 4:49 AM David Wang <00107082@163.com> wrote: >>>> >>>> Hi, >>>> >>>> I found another usage/benefit for accumulative counters: >>>> >>>> On my system, /proc/allocinfo yields about 5065 lines, of which 2/3 lines have accumulative counter *0*. >>>> meaning no memory activities. (right?) >>>> It is quite a waste to keep those items which are *not alive yet*. >>>> With additional changes, only 1684 lines in /proc/allocinfo on my system: >>>> >>>> --- a/lib/alloc_tag.c >>>> +++ b/lib/alloc_tag.c >>>> @@ -95,8 +95,11 @@ static void alloc_tag_to_text(struct seq_buf *out, struct codetag *ct) >>>> struct alloc_tag_counters counter = alloc_tag_read(tag); >>>> s64 bytes = counter.bytes; >>>> >>>> + if (counter.accu_calls == 0) >>>> + return; >>>> seq_buf_printf(out, "%12lli %8llu ", bytes, counter.calls); >>>> >>>> >>>> I think this is quite an improvement worth pursuing. >>>> (counter.calls could also be used to filter out "inactive" items, but >>>> lines keep disappearing/reappearing can confuse monitoring systems.) >>> >>> Please see discussion at >>> https://lore.kernel.org/all/20241211085616.2471901-1-quic_zhenhuah@quicinc.com/ >> >> Thanks for the information. >> >>> My point is that with this change we lose information which can be >>> useful. For example if I want to analyze all the places in the kernel >>> where memory can be potentially allocated, your change would prevent >>> me from doing that >> >> Maybe the filter can be disabled when DEBUG is on? >> >> > No, I disagree. Allocation that was never invoked is not the same as >>> no allocation at all. How would we know the difference if we filter >>> out the empty ones? >> >> Totally agree with this, I think (bytes || counter.calls) does not make good filter. Accumulative counter is the answer. :) > >hmm... it really depends on the use case IMHO. If memory consumption is >a concern, using counter.calls should suffice. However, for >performance-related scenarios as you stated, it's definitely better to >use an accumulative counter." >Both of these can't address Suren's comment: "if I want to analyze all >the places in the kernel where memory can be potentially allocated, your >change would prevent me from doing that", but. Oh, as I mentioned above, maybe CONFIG_MEM_ALLOC_PROFILING_DEBUG can help: when DEBUG is on, nothing is filtered. David
Hi David,
kernel test robot noticed the following build warnings:
[auto build test WARNING on akpm-mm/mm-nonmm-unstable]
[also build test WARNING on linus/master v6.11 next-20240918]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/David-Wang/lib-alloc_tag-Add-accumulative-call-counter-for-memory-allocation-profiling/20240913-140040
base: https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-nonmm-unstable
patch link: https://lore.kernel.org/r/20240913055729.7208-1-00107082%40163.com
patch subject: [PATCH v2] lib/alloc_tag: Add accumulative call counter for memory allocation profiling
config: arm-randconfig-002-20240919 (https://download.01.org/0day-ci/archive/20240919/202409190453.pe1HrGL0-lkp@intel.com/config)
compiler: clang version 20.0.0git (https://github.com/llvm/llvm-project 8663a75fa2f31299ab8d1d90288d9df92aadee88)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240919/202409190453.pe1HrGL0-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202409190453.pe1HrGL0-lkp@intel.com/
All warnings (new ones prefixed by >>):
In file included from drivers/hwtracing/coresight/coresight-tpiu.c:8:
In file included from include/linux/acpi.h:13:
In file included from include/linux/resource_ext.h:11:
In file included from include/linux/slab.h:16:
In file included from include/linux/gfp.h:7:
In file included from include/linux/mmzone.h:22:
In file included from include/linux/mm_types.h:21:
In file included from include/linux/percpu_counter.h:14:
In file included from include/linux/percpu.h:5:
>> include/linux/alloc_tag.h:108:39: warning: missing field 'accu_calls' initializer [-Wmissing-field-initializers]
108 | struct alloc_tag_counters v = { 0, 0 };
| ^
In file included from drivers/hwtracing/coresight/coresight-tpiu.c:9:
In file included from include/linux/amba/bus.h:19:
In file included from include/linux/regulator/consumer.h:35:
In file included from include/linux/suspend.h:5:
In file included from include/linux/swap.h:9:
In file included from include/linux/memcontrol.h:21:
In file included from include/linux/mm.h:2228:
include/linux/vmstat.h:517:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
517 | return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
| ~~~~~~~~~~~ ^ ~~~
2 warnings generated.
vim +/accu_calls +108 include/linux/alloc_tag.h
22d407b164ff79 Suren Baghdasaryan 2024-03-21 105
22d407b164ff79 Suren Baghdasaryan 2024-03-21 106 static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
22d407b164ff79 Suren Baghdasaryan 2024-03-21 107 {
22d407b164ff79 Suren Baghdasaryan 2024-03-21 @108 struct alloc_tag_counters v = { 0, 0 };
38770cdc292e9e David Wang 2024-09-13 109 #ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
38770cdc292e9e David Wang 2024-09-13 110 v.accu_calls = 0;
38770cdc292e9e David Wang 2024-09-13 111 #endif
22d407b164ff79 Suren Baghdasaryan 2024-03-21 112 struct alloc_tag_counters *counter;
22d407b164ff79 Suren Baghdasaryan 2024-03-21 113 int cpu;
22d407b164ff79 Suren Baghdasaryan 2024-03-21 114
22d407b164ff79 Suren Baghdasaryan 2024-03-21 115 for_each_possible_cpu(cpu) {
22d407b164ff79 Suren Baghdasaryan 2024-03-21 116 counter = per_cpu_ptr(tag->counters, cpu);
22d407b164ff79 Suren Baghdasaryan 2024-03-21 117 v.bytes += counter->bytes;
22d407b164ff79 Suren Baghdasaryan 2024-03-21 118 v.calls += counter->calls;
38770cdc292e9e David Wang 2024-09-13 119 #ifdef CONFIG_MEM_ALLOC_PROFILING_ACCUMULATIVE_CALL_COUNTER
38770cdc292e9e David Wang 2024-09-13 120 v.accu_calls += counter->accu_calls;
38770cdc292e9e David Wang 2024-09-13 121 #endif
22d407b164ff79 Suren Baghdasaryan 2024-03-21 122 }
22d407b164ff79 Suren Baghdasaryan 2024-03-21 123
22d407b164ff79 Suren Baghdasaryan 2024-03-21 124 return v;
22d407b164ff79 Suren Baghdasaryan 2024-03-21 125 }
22d407b164ff79 Suren Baghdasaryan 2024-03-21 126
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.