Show parallelism level in profiles if requested by user.
Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
tools/perf/builtin-report.c | 11 +++++++++++
tools/perf/util/hist.c | 2 ++
tools/perf/util/hist.h | 3 +++
tools/perf/util/session.c | 12 ++++++++++++
tools/perf/util/session.h | 1 +
tools/perf/util/sort.c | 23 +++++++++++++++++++++++
tools/perf/util/sort.h | 1 +
7 files changed, 53 insertions(+)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0d9bd090eda71..14d49f0625881 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv)
symbol_conf.annotate_data_sample = true;
}
+ if (report.disable_order || !perf_session__has_switch_events(session)) {
+ if ((sort_order && strstr(sort_order, "parallelism")) ||
+ (field_order && strstr(field_order, "parallelism"))) {
+ if (report.disable_order)
+ ui__error("Use of parallelism is incompatible with --disable-order.\n");
+ else
+ ui__error("Use of parallelism requires --switch-events during record.\n");
+ return -1;
+ }
+ }
+
if (sort_order && strstr(sort_order, "ipc")) {
parse_options_usage(report_usage, options, "s", 1);
goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0f30f843c566d..cafd693568189 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_PARALLELISM, 11);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists,
.ip = al->addr,
.level = al->level,
.code_page_size = sample->code_page_size,
+ .parallelism = al->parallelism,
.stat = {
.nr_events = 1,
.period = sample->period,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 46c8373e31465..a6e662d77dc24 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -42,6 +42,7 @@ enum hist_column {
HISTC_CGROUP_ID,
HISTC_CGROUP,
HISTC_PARENT,
+ HISTC_PARALLELISM,
HISTC_CPU,
HISTC_SOCKET,
HISTC_SRCLINE,
@@ -228,6 +229,7 @@ struct hist_entry {
u64 transaction;
s32 socket;
s32 cpu;
+ int parallelism;
u64 code_page_size;
u64 weight;
u64 ins_lat;
@@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c06e3020a9769..00fcf8d8ac255 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2403,6 +2403,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
return false;
}
+bool perf_session__has_switch_events(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->core.attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
{
char *bracket;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index bcf1bcf06959b..db1c120a9e67f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session,
struct symbol **parent);
bool perf_session__has_traces(struct perf_session *session, const char *msg);
+bool perf_session__has_switch_events(struct perf_session *session);
void perf_event__attr_swap(struct perf_event_attr *attr);
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3dd33721823f3..7eef43f5be360 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -892,6 +892,27 @@ struct sort_entry sort_cpu = {
.se_width_idx = HISTC_CPU,
};
+/* --sort parallelism */
+
+static int64_t
+sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->parallelism - left->parallelism;
+}
+
+static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*d", width, he->parallelism);
+}
+
+struct sort_entry sort_parallelism = {
+ .se_header = "Parallelism",
+ .se_cmp = sort__parallelism_cmp,
+ .se_snprintf = hist_entry__parallelism_snprintf,
+ .se_width_idx = HISTC_PARALLELISM,
+};
+
/* --sort cgroup_id */
static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
@@ -2534,6 +2555,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
+ DIM(SORT_PARALLELISM, "parallelism", sort_parallelism),
};
#undef DIM
@@ -2735,6 +2757,7 @@ MK_SORT_ENTRY_CHK(thread)
MK_SORT_ENTRY_CHK(comm)
MK_SORT_ENTRY_CHK(dso)
MK_SORT_ENTRY_CHK(sym)
+MK_SORT_ENTRY_CHK(parallelism)
static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a8572574e1686..11fb15f914093 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -72,6 +72,7 @@ enum sort_type {
SORT_ANNOTATE_DATA_TYPE_OFFSET,
SORT_SYM_OFFSET,
SORT_ANNOTATE_DATA_TYPE_CACHELINE,
+ SORT_PARALLELISM,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
--
2.48.1.262.g85cc9f2d1e-goog
On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote: > Show parallelism level in profiles if requested by user. > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > Cc: Namhyung Kim <namhyung@kernel.org> > Cc: Arnaldo Carvalho de Melo <acme@kernel.org> > Cc: Ian Rogers <irogers@google.com> > Cc: linux-perf-users@vger.kernel.org > Cc: linux-kernel@vger.kernel.org > --- > tools/perf/builtin-report.c | 11 +++++++++++ > tools/perf/util/hist.c | 2 ++ > tools/perf/util/hist.h | 3 +++ > tools/perf/util/session.c | 12 ++++++++++++ > tools/perf/util/session.h | 1 + > tools/perf/util/sort.c | 23 +++++++++++++++++++++++ > tools/perf/util/sort.h | 1 + > 7 files changed, 53 insertions(+) > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c > index 0d9bd090eda71..14d49f0625881 100644 > --- a/tools/perf/builtin-report.c > +++ b/tools/perf/builtin-report.c > @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv) > symbol_conf.annotate_data_sample = true; > } > > + if (report.disable_order || !perf_session__has_switch_events(session)) { > + if ((sort_order && strstr(sort_order, "parallelism")) || > + (field_order && strstr(field_order, "parallelism"))) { > + if (report.disable_order) > + ui__error("Use of parallelism is incompatible with --disable-order.\n"); > + else > + ui__error("Use of parallelism requires --switch-events during record.\n"); > + return -1; > + } > + } > + > if (sort_order && strstr(sort_order, "ipc")) { > parse_options_usage(report_usage, options, "s", 1); > goto error; > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > index 0f30f843c566d..cafd693568189 100644 > --- a/tools/perf/util/hist.c > +++ b/tools/perf/util/hist.c > @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) > > hists__new_col_len(hists, HISTC_CGROUP, 6); > hists__new_col_len(hists, HISTC_CGROUP_ID, 20); > + hists__new_col_len(hists, HISTC_PARALLELISM, 11); > hists__new_col_len(hists, HISTC_CPU, 3); > hists__new_col_len(hists, HISTC_SOCKET, 6); > hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); > @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists, > .ip = al->addr, > .level = al->level, > .code_page_size = sample->code_page_size, > + .parallelism = al->parallelism, > .stat = { > .nr_events = 1, > .period = sample->period, > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > index 46c8373e31465..a6e662d77dc24 100644 > --- a/tools/perf/util/hist.h > +++ b/tools/perf/util/hist.h > @@ -42,6 +42,7 @@ enum hist_column { > HISTC_CGROUP_ID, > HISTC_CGROUP, > HISTC_PARENT, > + HISTC_PARALLELISM, > HISTC_CPU, > HISTC_SOCKET, > HISTC_SRCLINE, > @@ -228,6 +229,7 @@ struct hist_entry { > u64 transaction; > s32 socket; > s32 cpu; > + int parallelism; Can you make it u16 and move to around cpumode to remove paddings? Thanks, Namhyung > u64 code_page_size; > u64 weight; > u64 ins_lat; > @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); > bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); > bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); > bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); > +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); > > struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
On Wed, 29 Jan 2025 at 05:42, Namhyung Kim <namhyung@kernel.org> wrote: > > On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote: > > Show parallelism level in profiles if requested by user. > > > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > > Cc: Namhyung Kim <namhyung@kernel.org> > > Cc: Arnaldo Carvalho de Melo <acme@kernel.org> > > Cc: Ian Rogers <irogers@google.com> > > Cc: linux-perf-users@vger.kernel.org > > Cc: linux-kernel@vger.kernel.org > > --- > > tools/perf/builtin-report.c | 11 +++++++++++ > > tools/perf/util/hist.c | 2 ++ > > tools/perf/util/hist.h | 3 +++ > > tools/perf/util/session.c | 12 ++++++++++++ > > tools/perf/util/session.h | 1 + > > tools/perf/util/sort.c | 23 +++++++++++++++++++++++ > > tools/perf/util/sort.h | 1 + > > 7 files changed, 53 insertions(+) > > > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c > > index 0d9bd090eda71..14d49f0625881 100644 > > --- a/tools/perf/builtin-report.c > > +++ b/tools/perf/builtin-report.c > > @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv) > > symbol_conf.annotate_data_sample = true; > > } > > > > + if (report.disable_order || !perf_session__has_switch_events(session)) { > > + if ((sort_order && strstr(sort_order, "parallelism")) || > > + (field_order && strstr(field_order, "parallelism"))) { > > + if (report.disable_order) > > + ui__error("Use of parallelism is incompatible with --disable-order.\n"); > > + else > > + ui__error("Use of parallelism requires --switch-events during record.\n"); > > + return -1; > > + } > > + } > > + > > if (sort_order && strstr(sort_order, "ipc")) { > > parse_options_usage(report_usage, options, "s", 1); > > goto error; > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > > index 0f30f843c566d..cafd693568189 100644 > > --- a/tools/perf/util/hist.c > > +++ b/tools/perf/util/hist.c > > @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) > > > > hists__new_col_len(hists, HISTC_CGROUP, 6); > > hists__new_col_len(hists, HISTC_CGROUP_ID, 20); > > + hists__new_col_len(hists, HISTC_PARALLELISM, 11); > > hists__new_col_len(hists, HISTC_CPU, 3); > > hists__new_col_len(hists, HISTC_SOCKET, 6); > > hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); > > @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists, > > .ip = al->addr, > > .level = al->level, > > .code_page_size = sample->code_page_size, > > + .parallelism = al->parallelism, > > .stat = { > > .nr_events = 1, > > .period = sample->period, > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > > index 46c8373e31465..a6e662d77dc24 100644 > > --- a/tools/perf/util/hist.h > > +++ b/tools/perf/util/hist.h > > @@ -42,6 +42,7 @@ enum hist_column { > > HISTC_CGROUP_ID, > > HISTC_CGROUP, > > HISTC_PARENT, > > + HISTC_PARALLELISM, > > HISTC_CPU, > > HISTC_SOCKET, > > HISTC_SRCLINE, > > @@ -228,6 +229,7 @@ struct hist_entry { > > u64 transaction; > > s32 socket; > > s32 cpu; > > + int parallelism; > > Can you make it u16 and move to around cpumode to remove paddings? It generally should be the same size as cpu/socket/etc. And these are 32-bits throughout the codebase (the previous fields). Are there any checks that MAX_NR_CPUS<64K? > Thanks, > Namhyung > > > > u64 code_page_size; > > u64 weight; > > u64 ins_lat; > > @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); > > bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); > > bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); > > bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); > > +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); > > > > struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
On Wed, Jan 29, 2025 at 08:18:34AM +0100, Dmitry Vyukov wrote: > On Wed, 29 Jan 2025 at 05:42, Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote: > > > Show parallelism level in profiles if requested by user. > > > > > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com> > > > Cc: Namhyung Kim <namhyung@kernel.org> > > > Cc: Arnaldo Carvalho de Melo <acme@kernel.org> > > > Cc: Ian Rogers <irogers@google.com> > > > Cc: linux-perf-users@vger.kernel.org > > > Cc: linux-kernel@vger.kernel.org > > > --- > > > tools/perf/builtin-report.c | 11 +++++++++++ > > > tools/perf/util/hist.c | 2 ++ > > > tools/perf/util/hist.h | 3 +++ > > > tools/perf/util/session.c | 12 ++++++++++++ > > > tools/perf/util/session.h | 1 + > > > tools/perf/util/sort.c | 23 +++++++++++++++++++++++ > > > tools/perf/util/sort.h | 1 + > > > 7 files changed, 53 insertions(+) > > > > > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c > > > index 0d9bd090eda71..14d49f0625881 100644 > > > --- a/tools/perf/builtin-report.c > > > +++ b/tools/perf/builtin-report.c > > > @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv) > > > symbol_conf.annotate_data_sample = true; > > > } > > > > > > + if (report.disable_order || !perf_session__has_switch_events(session)) { > > > + if ((sort_order && strstr(sort_order, "parallelism")) || > > > + (field_order && strstr(field_order, "parallelism"))) { > > > + if (report.disable_order) > > > + ui__error("Use of parallelism is incompatible with --disable-order.\n"); > > > + else > > > + ui__error("Use of parallelism requires --switch-events during record.\n"); > > > + return -1; > > > + } > > > + } > > > + > > > if (sort_order && strstr(sort_order, "ipc")) { > > > parse_options_usage(report_usage, options, "s", 1); > > > goto error; > > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c > > > index 0f30f843c566d..cafd693568189 100644 > > > --- a/tools/perf/util/hist.c > > > +++ b/tools/perf/util/hist.c > > > @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) > > > > > > hists__new_col_len(hists, HISTC_CGROUP, 6); > > > hists__new_col_len(hists, HISTC_CGROUP_ID, 20); > > > + hists__new_col_len(hists, HISTC_PARALLELISM, 11); > > > hists__new_col_len(hists, HISTC_CPU, 3); > > > hists__new_col_len(hists, HISTC_SOCKET, 6); > > > hists__new_col_len(hists, HISTC_MEM_LOCKED, 6); > > > @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists, > > > .ip = al->addr, > > > .level = al->level, > > > .code_page_size = sample->code_page_size, > > > + .parallelism = al->parallelism, > > > .stat = { > > > .nr_events = 1, > > > .period = sample->period, > > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h > > > index 46c8373e31465..a6e662d77dc24 100644 > > > --- a/tools/perf/util/hist.h > > > +++ b/tools/perf/util/hist.h > > > @@ -42,6 +42,7 @@ enum hist_column { > > > HISTC_CGROUP_ID, > > > HISTC_CGROUP, > > > HISTC_PARENT, > > > + HISTC_PARALLELISM, > > > HISTC_CPU, > > > HISTC_SOCKET, > > > HISTC_SRCLINE, > > > @@ -228,6 +229,7 @@ struct hist_entry { > > > u64 transaction; > > > s32 socket; > > > s32 cpu; > > > + int parallelism; > > > > Can you make it u16 and move to around cpumode to remove paddings? > > It generally should be the same size as cpu/socket/etc. And these are > 32-bits throughout the codebase (the previous fields). Are there any > checks that MAX_NR_CPUS<64K? I don't think there's such a check. But practically it used be 4K and you can add the check. :) Anyway hist_entry is getting bigger and we may need to shrink it later. I don't stringly insist on u16 though. It's up to you. Thanks, Namhyung > > > > > > > u64 code_page_size; > > > u64 weight; > > > u64 ins_lat; > > > @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt); > > > bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt); > > > bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt); > > > bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt); > > > +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt); > > > > > > struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
© 2016 - 2025 Red Hat, Inc.