Add record/report --latency flag that allows to capture and show
latency-centric profiles rather than the default CPU-consumption-centric
profiles. For latency profiles record captures context switch events,
and report shows Latency as the first column.
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
tools/perf/builtin-record.c | 20 +++++++++++++++++
tools/perf/builtin-report.c | 32 +++++++++++++++++++++++----
tools/perf/ui/hist.c | 41 ++++++++++++++++++++++++++++-------
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 33 +++++++++++++++++++++++-----
tools/perf/util/sort.h | 2 +-
tools/perf/util/symbol_conf.h | 4 +++-
7 files changed, 113 insertions(+), 20 deletions(-)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5db1aedf48df9..e219639ac401b 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -161,6 +161,7 @@ struct record {
struct evlist *sb_evlist;
pthread_t thread_id;
int realtime_prio;
+ bool latency;
bool switch_output_event_set;
bool no_buildid;
bool no_buildid_set;
@@ -3371,6 +3372,9 @@ static struct option __record_options[] = {
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
+ OPT_BOOLEAN(0, "latency", &record.latency,
+ "Enable data collection for latency profiling.\n"
+ "\t\t\t Use perf report --latency for latency-centric profile."),
OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
NULL, "don't record events from perf itself",
exclude_perf),
@@ -4017,6 +4021,22 @@ int cmd_record(int argc, const char **argv)
}
+ if (record.latency) {
+ /*
+ * There is no fundamental reason why latency profiling
+ * can't work for system-wide mode, but exact semantics
+ * and details are to be defined.
+ * See the following thread for details:
+ * https://lore.kernel.org/all/Z4XDJyvjiie3howF@google.com/
+ */
+ if (record.opts.target.system_wide) {
+ pr_err("Failed: latency profiling is not supported with system-wide collection.\n");
+ err = -EINVAL;
+ goto out_opts;
+ }
+ record.opts.record_switch_events = true;
+ }
+
if (rec->buildid_mmap) {
if (!perf_can_record_build_id()) {
pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 2a19abdc869a1..69de6dbefecfa 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -112,6 +112,8 @@ struct report {
u64 nr_entries;
u64 queue_size;
u64 total_cycles;
+ u64 total_samples;
+ u64 singlethreaded_samples;
int socket_filter;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
struct branch_type_stat brtype_stat;
@@ -331,6 +333,10 @@ static int process_sample_event(const struct perf_tool *tool,
&rep->total_cycles, evsel);
}
+ rep->total_samples++;
+ if (al.parallelism == 1)
+ rep->singlethreaded_samples++;
+
ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
if (ret < 0)
pr_debug("problem adding hist entry, skipping event\n");
@@ -1079,6 +1085,11 @@ static int __cmd_report(struct report *rep)
return ret;
}
+ /* Don't show Latency column for non-parallel profiles by default. */
+ if (rep->singlethreaded_samples * 100 / rep->total_samples >= 99 &&
+ !symbol_conf.prefer_latency)
+ perf_hpp__cancel_latency();
+
evlist__check_mem_load_aux(session->evlist);
if (rep->stats_mode)
@@ -1468,6 +1479,10 @@ int cmd_report(int argc, const char **argv)
"Disable raw trace ordering"),
OPT_BOOLEAN(0, "skip-empty", &report.skip_empty,
"Do not display empty (or dummy) events in the output"),
+ OPT_BOOLEAN(0, "latency", &symbol_conf.prefer_latency,
+ "Show latency-centric profile rather than the default\n"
+ "\t\t\t CPU-consumption-centric profile\n"
+ "\t\t\t (requires perf record --latency flag)."),
OPT_END()
};
struct perf_data data = {
@@ -1722,16 +1737,25 @@ int cmd_report(int argc, const char **argv)
symbol_conf.annotate_data_sample = true;
}
+ symbol_conf.enable_latency = true;
if (report.disable_order || !perf_session__has_switch_events(session)) {
if (symbol_conf.parallelism_list_str ||
- (sort_order && strstr(sort_order, "parallelism")) ||
- (field_order && strstr(field_order, "parallelism"))) {
+ symbol_conf.prefer_latency ||
+ (sort_order && (strstr(sort_order, "latency") ||
+ strstr(sort_order, "parallelism"))) ||
+ (field_order && (strstr(field_order, "latency") ||
+ strstr(field_order, "parallelism")))) {
if (report.disable_order)
- ui__error("Use of parallelism is incompatible with --disable-order.\n");
+ ui__error("Use of latency profile or parallelism is incompatible with --disable-order.\n");
else
- ui__error("Use of parallelism requires --switch-events during record.\n");
+ ui__error("Use of latency profile or parallelism requires --latency flag during record.\n");
return -1;
}
+ /*
+ * If user did not ask for anything related to
+ * latency/parallelism explicitly, just don't show it.
+ */
+ symbol_conf.enable_latency = false;
}
if (sort_order && strstr(sort_order, "ipc")) {
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 22e31d835301e..d87046052b432 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -632,27 +632,36 @@ void perf_hpp__init(void)
return;
if (symbol_conf.cumulate_callchain) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC);
+ /* Use idempotent addition to avoid more complex logic. */
+ if (symbol_conf.prefer_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY_ACC, true);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC, true);
+ if (symbol_conf.enable_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY_ACC, true);
perf_hpp__format[PERF_HPP__OVERHEAD].name = "Self";
}
- hpp_dimension__add_output(PERF_HPP__OVERHEAD);
+ if (symbol_conf.prefer_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY, true);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD, true);
+ if (symbol_conf.enable_latency)
+ hpp_dimension__add_output(PERF_HPP__LATENCY, true);
if (symbol_conf.show_cpu_utilization) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS);
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_US);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS, false);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_US, false);
if (perf_guest) {
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS);
- hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS, false);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US, false);
}
}
if (symbol_conf.show_nr_samples)
- hpp_dimension__add_output(PERF_HPP__SAMPLES);
+ hpp_dimension__add_output(PERF_HPP__SAMPLES, false);
if (symbol_conf.show_total_period)
- hpp_dimension__add_output(PERF_HPP__PERIOD);
+ hpp_dimension__add_output(PERF_HPP__PERIOD, false);
}
void perf_hpp_list__column_register(struct perf_hpp_list *list,
@@ -701,6 +710,22 @@ void perf_hpp__cancel_cumulate(void)
}
}
+void perf_hpp__cancel_latency(void)
+{
+ struct perf_hpp_fmt *fmt, *lat, *acc, *tmp;
+
+ if (is_strict_order(field_order) || is_strict_order(sort_order))
+ return;
+
+ lat = &perf_hpp__format[PERF_HPP__LATENCY];
+ acc = &perf_hpp__format[PERF_HPP__LATENCY_ACC];
+
+ perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+ if (fmt_equal(lat, fmt) || fmt_equal(acc, fmt))
+ perf_hpp__column_unregister(fmt);
+ }
+}
+
void perf_hpp__setup_output_field(struct perf_hpp_list *list)
{
struct perf_hpp_fmt *fmt;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 91159f16c60b2..29d4c7a3d1747 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -582,6 +582,7 @@ enum {
void perf_hpp__init(void);
void perf_hpp__cancel_cumulate(void);
+void perf_hpp__cancel_latency(void);
void perf_hpp__setup_output_field(struct perf_hpp_list *list);
void perf_hpp__reset_output_field(struct perf_hpp_list *list);
void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index bc4c3acfe7552..2b6023de7a53a 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2622,6 +2622,7 @@ struct hpp_dimension {
const char *name;
struct perf_hpp_fmt *fmt;
int taken;
+ int was_taken;
};
#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
@@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
return -1;
hd->taken = 1;
+ hd->was_taken = 1;
perf_hpp_list__register_sort_field(list, fmt);
return 0;
}
@@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
return 0;
}
-int hpp_dimension__add_output(unsigned col)
+int hpp_dimension__add_output(unsigned col, bool implicit)
{
+ struct hpp_dimension *hd;
+
BUG_ON(col >= PERF_HPP__MAX_INDEX);
- return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+ hd = &hpp_sort_dimensions[col];
+ if (implicit && !hd->was_taken)
+ return 0;
+ return __hpp_dimension__add_output(&perf_hpp_list, hd);
}
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
@@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
if (sort__mode == SORT_MODE__DIFF)
return keys;
- keys = prefix_if_not_in("overhead", keys);
-
- if (symbol_conf.cumulate_callchain)
- keys = prefix_if_not_in("overhead_children", keys);
+ if (symbol_conf.prefer_latency) {
+ keys = prefix_if_not_in("overhead", keys);
+ keys = prefix_if_not_in("latency", keys);
+ if (symbol_conf.cumulate_callchain) {
+ keys = prefix_if_not_in("overhead_children", keys);
+ keys = prefix_if_not_in("latency_children", keys);
+ }
+ } else if (!keys || (!strstr(keys, "overhead") &&
+ !strstr(keys, "latency"))) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency", keys);
+ keys = prefix_if_not_in("overhead", keys);
+ if (symbol_conf.cumulate_callchain) {
+ if (symbol_conf.enable_latency)
+ keys = prefix_if_not_in("latency_children", keys);
+ keys = prefix_if_not_in("overhead_children", keys);
+ }
+ }
return keys;
}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 11fb15f914093..180d36a2bea35 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -141,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
bool is_strict_order(const char *order);
-int hpp_dimension__add_output(unsigned col);
+int hpp_dimension__add_output(unsigned col, bool implicit);
void reset_dimensions(void);
int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
struct evlist *evlist,
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index c5b2e56127e22..cd9aa82c7d5ad 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -49,7 +49,9 @@ struct symbol_conf {
keep_exited_threads,
annotate_data_member,
annotate_data_sample,
- skip_empty;
+ skip_empty,
+ enable_latency,
+ prefer_latency;
const char *vmlinux_name,
*kallsyms_name,
*source_prefix,
--
2.48.1.262.g85cc9f2d1e-goog
On Mon, Jan 27, 2025 at 10:58:53AM +0100, Dmitry Vyukov wrote:
> Add record/report --latency flag that allows to capture and show
> latency-centric profiles rather than the default CPU-consumption-centric
> profiles. For latency profiles record captures context switch events,
> and report shows Latency as the first column.
It'd be nice if you could add a small example output in the commit
message.
>
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Ian Rogers <irogers@google.com>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
[SNIP]
> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> index bc4c3acfe7552..2b6023de7a53a 100644
> --- a/tools/perf/util/sort.c
> +++ b/tools/perf/util/sort.c
> @@ -2622,6 +2622,7 @@ struct hpp_dimension {
> const char *name;
> struct perf_hpp_fmt *fmt;
> int taken;
> + int was_taken;
> };
>
> #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
> @@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
> return -1;
>
> hd->taken = 1;
> + hd->was_taken = 1;
> perf_hpp_list__register_sort_field(list, fmt);
> return 0;
> }
> @@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
> return 0;
> }
>
> -int hpp_dimension__add_output(unsigned col)
> +int hpp_dimension__add_output(unsigned col, bool implicit)
> {
> + struct hpp_dimension *hd;
> +
> BUG_ON(col >= PERF_HPP__MAX_INDEX);
> - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
> + hd = &hpp_sort_dimensions[col];
> + if (implicit && !hd->was_taken)
> + return 0;
I don't understand why you need 'was_taken' field. Can it use the
'taken' field?
> + return __hpp_dimension__add_output(&perf_hpp_list, hd);
> }
>
> int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> @@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
> if (sort__mode == SORT_MODE__DIFF)
> return keys;
>
> - keys = prefix_if_not_in("overhead", keys);
> -
> - if (symbol_conf.cumulate_callchain)
> - keys = prefix_if_not_in("overhead_children", keys);
> + if (symbol_conf.prefer_latency) {
> + keys = prefix_if_not_in("overhead", keys);
> + keys = prefix_if_not_in("latency", keys);
> + if (symbol_conf.cumulate_callchain) {
> + keys = prefix_if_not_in("overhead_children", keys);
> + keys = prefix_if_not_in("latency_children", keys);
> + }
> + } else if (!keys || (!strstr(keys, "overhead") &&
> + !strstr(keys, "latency"))) {
> + if (symbol_conf.enable_latency)
> + keys = prefix_if_not_in("latency", keys);
> + keys = prefix_if_not_in("overhead", keys);
> + if (symbol_conf.cumulate_callchain) {
> + if (symbol_conf.enable_latency)
> + keys = prefix_if_not_in("latency_children", keys);
> + keys = prefix_if_not_in("overhead_children", keys);
> + }
> + }
Do you really need this complexity? I think it's better to fix the order
of columns in both case.
Thanks,
Namhyung
>
> return keys;
> }
> diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> index 11fb15f914093..180d36a2bea35 100644
> --- a/tools/perf/util/sort.h
> +++ b/tools/perf/util/sort.h
> @@ -141,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
>
> bool is_strict_order(const char *order);
>
> -int hpp_dimension__add_output(unsigned col);
> +int hpp_dimension__add_output(unsigned col, bool implicit);
> void reset_dimensions(void);
> int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> struct evlist *evlist,
> diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
> index c5b2e56127e22..cd9aa82c7d5ad 100644
> --- a/tools/perf/util/symbol_conf.h
> +++ b/tools/perf/util/symbol_conf.h
> @@ -49,7 +49,9 @@ struct symbol_conf {
> keep_exited_threads,
> annotate_data_member,
> annotate_data_sample,
> - skip_empty;
> + skip_empty,
> + enable_latency,
> + prefer_latency;
> const char *vmlinux_name,
> *kallsyms_name,
> *source_prefix,
> --
> 2.48.1.262.g85cc9f2d1e-goog
>
On Wed, 29 Jan 2025 at 06:03, Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Mon, Jan 27, 2025 at 10:58:53AM +0100, Dmitry Vyukov wrote:
> > Add record/report --latency flag that allows to capture and show
> > latency-centric profiles rather than the default CPU-consumption-centric
> > profiles. For latency profiles record captures context switch events,
> > and report shows Latency as the first column.
>
> It'd be nice if you could add a small example output in the commit
> message.
>
> >
> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: linux-perf-users@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > ---
> [SNIP]
> > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> > index bc4c3acfe7552..2b6023de7a53a 100644
> > --- a/tools/perf/util/sort.c
> > +++ b/tools/perf/util/sort.c
> > @@ -2622,6 +2622,7 @@ struct hpp_dimension {
> > const char *name;
> > struct perf_hpp_fmt *fmt;
> > int taken;
> > + int was_taken;
> > };
> >
> > #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
> > @@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
> > return -1;
> >
> > hd->taken = 1;
> > + hd->was_taken = 1;
> > perf_hpp_list__register_sort_field(list, fmt);
> > return 0;
> > }
> > @@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
> > return 0;
> > }
> >
> > -int hpp_dimension__add_output(unsigned col)
> > +int hpp_dimension__add_output(unsigned col, bool implicit)
> > {
> > + struct hpp_dimension *hd;
> > +
> > BUG_ON(col >= PERF_HPP__MAX_INDEX);
> > - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
> > + hd = &hpp_sort_dimensions[col];
> > + if (implicit && !hd->was_taken)
> > + return 0;
>
> I don't understand why you need 'was_taken' field. Can it use the
> 'taken' field?
I've started getting failed asserts in
perf_hpp__cancel_cumulate/latency when removing columns still linked
into sort order list.
I've figured out that columns we implicitly add in setup_overhead and
perf_hpp__init must match exactly. If we add one in setup_overhead,
but not in perf_hpp__init, then it starts failing.
Taken does not work b/c there is reset_dimensions call between these
functions, so they actually add the same columns twice to field/sort
lists (and that prevents failures in
perf_hpp__cancel_cumulate/latency).
Initially I've just tried to match logic in setup_overhead and in
perf_hpp__init. But it turned out quite messy, duplicate logic, and
e.g. in setup_overhead we look at sort_order, but in perf_hpp__init we
already can't look at it b/c we already altered it in setup_overhead.
That logic would also be quite fragile. Adding was_taken looks like
the simplest, most reliable, and less fragile with respect to future
changes approach.
> > + return __hpp_dimension__add_output(&perf_hpp_list, hd);
> > }
> >
> > int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> > @@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
> > if (sort__mode == SORT_MODE__DIFF)
> > return keys;
> >
> > - keys = prefix_if_not_in("overhead", keys);
> > -
> > - if (symbol_conf.cumulate_callchain)
> > - keys = prefix_if_not_in("overhead_children", keys);
> > + if (symbol_conf.prefer_latency) {
> > + keys = prefix_if_not_in("overhead", keys);
> > + keys = prefix_if_not_in("latency", keys);
> > + if (symbol_conf.cumulate_callchain) {
> > + keys = prefix_if_not_in("overhead_children", keys);
> > + keys = prefix_if_not_in("latency_children", keys);
> > + }
> > + } else if (!keys || (!strstr(keys, "overhead") &&
> > + !strstr(keys, "latency"))) {
> > + if (symbol_conf.enable_latency)
> > + keys = prefix_if_not_in("latency", keys);
> > + keys = prefix_if_not_in("overhead", keys);
> > + if (symbol_conf.cumulate_callchain) {
> > + if (symbol_conf.enable_latency)
> > + keys = prefix_if_not_in("latency_children", keys);
> > + keys = prefix_if_not_in("overhead_children", keys);
> > + }
> > + }
>
> Do you really need this complexity? I think it's better to fix the order
> of columns in both case.
This is sort order which affects ordering of row, and order of rows is
basically the most important thing for a profiler. If we fix the
order, it will be showing completely different things.
"latency" and "overhead" are equivalent with respect to their
fundamentality for a profile. So we shouldn't be adding any of them,
if any of them are already explicitly specified by the user.
For example, the command from tips.txt:
perf report --hierarchy --sort latency,parallelism,comm,symbol
if we prepend "overhead", it all falls apart.
Or for 2 default modes (normals and latency) we want "overhead" or
"latency" to come first. Prepending "latency" for the current CPU mode
would lead to completely different ordering.
> Thanks,
> Namhyung
>
> >
> > return keys;
> > }
> > diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
> > index 11fb15f914093..180d36a2bea35 100644
> > --- a/tools/perf/util/sort.h
> > +++ b/tools/perf/util/sort.h
> > @@ -141,7 +141,7 @@ int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, i
> >
> > bool is_strict_order(const char *order);
> >
> > -int hpp_dimension__add_output(unsigned col);
> > +int hpp_dimension__add_output(unsigned col, bool implicit);
> > void reset_dimensions(void);
> > int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> > struct evlist *evlist,
> > diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
> > index c5b2e56127e22..cd9aa82c7d5ad 100644
> > --- a/tools/perf/util/symbol_conf.h
> > +++ b/tools/perf/util/symbol_conf.h
> > @@ -49,7 +49,9 @@ struct symbol_conf {
> > keep_exited_threads,
> > annotate_data_member,
> > annotate_data_sample,
> > - skip_empty;
> > + skip_empty,
> > + enable_latency,
> > + prefer_latency;
> > const char *vmlinux_name,
> > *kallsyms_name,
> > *source_prefix,
> > --
> > 2.48.1.262.g85cc9f2d1e-goog
> >
On Wed, Jan 29, 2025 at 08:12:51AM +0100, Dmitry Vyukov wrote:
> On Wed, 29 Jan 2025 at 06:03, Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Mon, Jan 27, 2025 at 10:58:53AM +0100, Dmitry Vyukov wrote:
> > > Add record/report --latency flag that allows to capture and show
> > > latency-centric profiles rather than the default CPU-consumption-centric
> > > profiles. For latency profiles record captures context switch events,
> > > and report shows Latency as the first column.
> >
> > It'd be nice if you could add a small example output in the commit
> > message.
> >
> > >
> > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > > Cc: Namhyung Kim <namhyung@kernel.org>
> > > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > > Cc: Ian Rogers <irogers@google.com>
> > > Cc: linux-perf-users@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > ---
> > [SNIP]
> > > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> > > index bc4c3acfe7552..2b6023de7a53a 100644
> > > --- a/tools/perf/util/sort.c
> > > +++ b/tools/perf/util/sort.c
> > > @@ -2622,6 +2622,7 @@ struct hpp_dimension {
> > > const char *name;
> > > struct perf_hpp_fmt *fmt;
> > > int taken;
> > > + int was_taken;
> > > };
> > >
> > > #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
> > > @@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
> > > return -1;
> > >
> > > hd->taken = 1;
> > > + hd->was_taken = 1;
> > > perf_hpp_list__register_sort_field(list, fmt);
> > > return 0;
> > > }
> > > @@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
> > > return 0;
> > > }
> > >
> > > -int hpp_dimension__add_output(unsigned col)
> > > +int hpp_dimension__add_output(unsigned col, bool implicit)
> > > {
> > > + struct hpp_dimension *hd;
> > > +
> > > BUG_ON(col >= PERF_HPP__MAX_INDEX);
> > > - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
> > > + hd = &hpp_sort_dimensions[col];
> > > + if (implicit && !hd->was_taken)
> > > + return 0;
> >
> > I don't understand why you need 'was_taken' field. Can it use the
> > 'taken' field?
>
> I've started getting failed asserts in
> perf_hpp__cancel_cumulate/latency when removing columns still linked
> into sort order list.
Ok.
>
> I've figured out that columns we implicitly add in setup_overhead and
> perf_hpp__init must match exactly. If we add one in setup_overhead,
> but not in perf_hpp__init, then it starts failing.
Right, I don't remember why we don't have unregister_sort_field or let
the perf_hpp__column_unregister() remove it from the sort_list too.
>
> Taken does not work b/c there is reset_dimensions call between these
> functions, so they actually add the same columns twice to field/sort
> lists (and that prevents failures in
> perf_hpp__cancel_cumulate/latency).
I see.
>
> Initially I've just tried to match logic in setup_overhead and in
> perf_hpp__init. But it turned out quite messy, duplicate logic, and
> e.g. in setup_overhead we look at sort_order, but in perf_hpp__init we
> already can't look at it b/c we already altered it in setup_overhead.
> That logic would also be quite fragile. Adding was_taken looks like
> the simplest, most reliable, and less fragile with respect to future
> changes approach.
Maybe just remove it from the sort_list. There should be no reason to
keep it in the list.
>
>
> > > + return __hpp_dimension__add_output(&perf_hpp_list, hd);
> > > }
> > >
> > > int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> > > @@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
> > > if (sort__mode == SORT_MODE__DIFF)
> > > return keys;
> > >
> > > - keys = prefix_if_not_in("overhead", keys);
> > > -
> > > - if (symbol_conf.cumulate_callchain)
> > > - keys = prefix_if_not_in("overhead_children", keys);
> > > + if (symbol_conf.prefer_latency) {
> > > + keys = prefix_if_not_in("overhead", keys);
> > > + keys = prefix_if_not_in("latency", keys);
> > > + if (symbol_conf.cumulate_callchain) {
> > > + keys = prefix_if_not_in("overhead_children", keys);
> > > + keys = prefix_if_not_in("latency_children", keys);
> > > + }
> > > + } else if (!keys || (!strstr(keys, "overhead") &&
> > > + !strstr(keys, "latency"))) {
> > > + if (symbol_conf.enable_latency)
> > > + keys = prefix_if_not_in("latency", keys);
> > > + keys = prefix_if_not_in("overhead", keys);
> > > + if (symbol_conf.cumulate_callchain) {
> > > + if (symbol_conf.enable_latency)
> > > + keys = prefix_if_not_in("latency_children", keys);
> > > + keys = prefix_if_not_in("overhead_children", keys);
> > > + }
> > > + }
> >
> > Do you really need this complexity? I think it's better to fix the order
> > of columns in both case.
>
> This is sort order which affects ordering of row, and order of rows is
> basically the most important thing for a profiler. If we fix the
> order, it will be showing completely different things.
>
> "latency" and "overhead" are equivalent with respect to their
> fundamentality for a profile. So we shouldn't be adding any of them,
> if any of them are already explicitly specified by the user.
>
> For example, the command from tips.txt:
>
> perf report --hierarchy --sort latency,parallelism,comm,symbol
>
> if we prepend "overhead", it all falls apart.
>
> Or for 2 default modes (normals and latency) we want "overhead" or
> "latency" to come first. Prepending "latency" for the current CPU mode
> would lead to completely different ordering.
I see. You want to sort by overhead in the default mode even if it has
implicitly-added latency field, and to sort by latency if --latency is
given explicitly.
I think it can be done with the following command line.
$ perf report -F overhead,latency,parallelism,comm,sym -s overhead
or
$ perf report -F overhead,latency,parallelism,comm,sym -s latency
IOW, you can keep the same output column ordering and sort the result
differently. But I'm not sure if it'd make the code simpler. :)
Thanks,
Namhyung
On Thu, 30 Jan 2025 at 07:30, Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Wed, Jan 29, 2025 at 08:12:51AM +0100, Dmitry Vyukov wrote:
> > On Wed, 29 Jan 2025 at 06:03, Namhyung Kim <namhyung@kernel.org> wrote:
> > >
> > > On Mon, Jan 27, 2025 at 10:58:53AM +0100, Dmitry Vyukov wrote:
> > > > Add record/report --latency flag that allows to capture and show
> > > > latency-centric profiles rather than the default CPU-consumption-centric
> > > > profiles. For latency profiles record captures context switch events,
> > > > and report shows Latency as the first column.
> > >
> > > It'd be nice if you could add a small example output in the commit
> > > message.
> > >
> > > >
> > > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > > > Cc: Namhyung Kim <namhyung@kernel.org>
> > > > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > > > Cc: Ian Rogers <irogers@google.com>
> > > > Cc: linux-perf-users@vger.kernel.org
> > > > Cc: linux-kernel@vger.kernel.org
> > > > ---
> > > [SNIP]
> > > > diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
> > > > index bc4c3acfe7552..2b6023de7a53a 100644
> > > > --- a/tools/perf/util/sort.c
> > > > +++ b/tools/perf/util/sort.c
> > > > @@ -2622,6 +2622,7 @@ struct hpp_dimension {
> > > > const char *name;
> > > > struct perf_hpp_fmt *fmt;
> > > > int taken;
> > > > + int was_taken;
> > > > };
> > > >
> > > > #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
> > > > @@ -3513,6 +3514,7 @@ static int __hpp_dimension__add(struct hpp_dimension *hd,
> > > > return -1;
> > > >
> > > > hd->taken = 1;
> > > > + hd->was_taken = 1;
> > > > perf_hpp_list__register_sort_field(list, fmt);
> > > > return 0;
> > > > }
> > > > @@ -3547,10 +3549,15 @@ static int __hpp_dimension__add_output(struct perf_hpp_list *list,
> > > > return 0;
> > > > }
> > > >
> > > > -int hpp_dimension__add_output(unsigned col)
> > > > +int hpp_dimension__add_output(unsigned col, bool implicit)
> > > > {
> > > > + struct hpp_dimension *hd;
> > > > +
> > > > BUG_ON(col >= PERF_HPP__MAX_INDEX);
> > > > - return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
> > > > + hd = &hpp_sort_dimensions[col];
> > > > + if (implicit && !hd->was_taken)
> > > > + return 0;
> > >
> > > I don't understand why you need 'was_taken' field. Can it use the
> > > 'taken' field?
> >
> > I've started getting failed asserts in
> > perf_hpp__cancel_cumulate/latency when removing columns still linked
> > into sort order list.
>
> Ok.
>
> >
> > I've figured out that columns we implicitly add in setup_overhead and
> > perf_hpp__init must match exactly. If we add one in setup_overhead,
> > but not in perf_hpp__init, then it starts failing.
>
> Right, I don't remember why we don't have unregister_sort_field or let
> the perf_hpp__column_unregister() remove it from the sort_list too.
>
> >
> > Taken does not work b/c there is reset_dimensions call between these
> > functions, so they actually add the same columns twice to field/sort
> > lists (and that prevents failures in
> > perf_hpp__cancel_cumulate/latency).
>
> I see.
>
> >
> > Initially I've just tried to match logic in setup_overhead and in
> > perf_hpp__init. But it turned out quite messy, duplicate logic, and
> > e.g. in setup_overhead we look at sort_order, but in perf_hpp__init we
> > already can't look at it b/c we already altered it in setup_overhead.
> > That logic would also be quite fragile. Adding was_taken looks like
> > the simplest, most reliable, and less fragile with respect to future
> > changes approach.
>
> Maybe just remove it from the sort_list. There should be no reason to
> keep it in the list.
I've tried that (and removing setup_overhead entirely), but none of
that yielded the same result.
E.g.:
perf report --sort latency,symbol
currently gives intended:
Latency Symbol
4.37% [k] native_queued_spin_lock_slowpath
2.71% [.] _PyEval_EvalFrameDefault
2.18% [.] iterative_hash
2.00% [k] clear_page_erms
but if I remove was_taken logic and make perf_hpp__column_unregister()
remove from the sort list, I get:
Overhead Latency Symbol
38.17% 4.37% [k] native_queued_spin_lock_slowpath
0.86% 2.71% [.] _PyEval_EvalFrameDefault
0.47% 2.18% [.] iterative_hash
0.68% 2.00% [k] clear_page_erms
So, so far the current version of the code is unfortunately the
simplest version with intended behavior that I come up with.
> > > > + return __hpp_dimension__add_output(&perf_hpp_list, hd);
> > > > }
> > > >
> > > > int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
> > > > @@ -3809,10 +3816,24 @@ static char *setup_overhead(char *keys)
> > > > if (sort__mode == SORT_MODE__DIFF)
> > > > return keys;
> > > >
> > > > - keys = prefix_if_not_in("overhead", keys);
> > > > -
> > > > - if (symbol_conf.cumulate_callchain)
> > > > - keys = prefix_if_not_in("overhead_children", keys);
> > > > + if (symbol_conf.prefer_latency) {
> > > > + keys = prefix_if_not_in("overhead", keys);
> > > > + keys = prefix_if_not_in("latency", keys);
> > > > + if (symbol_conf.cumulate_callchain) {
> > > > + keys = prefix_if_not_in("overhead_children", keys);
> > > > + keys = prefix_if_not_in("latency_children", keys);
> > > > + }
> > > > + } else if (!keys || (!strstr(keys, "overhead") &&
> > > > + !strstr(keys, "latency"))) {
> > > > + if (symbol_conf.enable_latency)
> > > > + keys = prefix_if_not_in("latency", keys);
> > > > + keys = prefix_if_not_in("overhead", keys);
> > > > + if (symbol_conf.cumulate_callchain) {
> > > > + if (symbol_conf.enable_latency)
> > > > + keys = prefix_if_not_in("latency_children", keys);
> > > > + keys = prefix_if_not_in("overhead_children", keys);
> > > > + }
> > > > + }
> > >
> > > Do you really need this complexity? I think it's better to fix the order
> > > of columns in both case.
> >
> > This is sort order which affects ordering of row, and order of rows is
> > basically the most important thing for a profiler. If we fix the
> > order, it will be showing completely different things.
> >
> > "latency" and "overhead" are equivalent with respect to their
> > fundamentality for a profile. So we shouldn't be adding any of them,
> > if any of them are already explicitly specified by the user.
> >
> > For example, the command from tips.txt:
> >
> > perf report --hierarchy --sort latency,parallelism,comm,symbol
> >
> > if we prepend "overhead", it all falls apart.
> >
> > Or for 2 default modes (normals and latency) we want "overhead" or
> > "latency" to come first. Prepending "latency" for the current CPU mode
> > would lead to completely different ordering.
>
> I see. You want to sort by overhead in the default mode even if it has
> implicitly-added latency field, and to sort by latency if --latency is
> given explicitly.
>
> I think it can be done with the following command line.
>
> $ perf report -F overhead,latency,parallelism,comm,sym -s overhead
>
> or
>
> $ perf report -F overhead,latency,parallelism,comm,sym -s latency
>
> IOW, you can keep the same output column ordering and sort the result
> differently. But I'm not sure if it'd make the code simpler. :)
>
> Thanks,
> Namhyung
>
© 2016 - 2026 Red Hat, Inc.