[PATCH v3 2/7] perf report: Add parallelism sort key

Dmitry Vyukov posted 7 patches 3 days, 8 hours ago
[PATCH v3 2/7] perf report: Add parallelism sort key
Posted by Dmitry Vyukov 3 days, 8 hours ago
Show parallelism level in profiles if requested by user.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
---
 tools/perf/builtin-report.c | 11 +++++++++++
 tools/perf/util/hist.c      |  2 ++
 tools/perf/util/hist.h      |  3 +++
 tools/perf/util/session.c   | 12 ++++++++++++
 tools/perf/util/session.h   |  1 +
 tools/perf/util/sort.c      | 23 +++++++++++++++++++++++
 tools/perf/util/sort.h      |  1 +
 7 files changed, 53 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 0d9bd090eda71..14d49f0625881 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv)
 		symbol_conf.annotate_data_sample = true;
 	}
 
+	if (report.disable_order || !perf_session__has_switch_events(session)) {
+		if ((sort_order && strstr(sort_order, "parallelism")) ||
+				(field_order && strstr(field_order, "parallelism"))) {
+			if (report.disable_order)
+				ui__error("Use of parallelism is incompatible with --disable-order.\n");
+			else
+				ui__error("Use of parallelism requires --switch-events during record.\n");
+			return -1;
+		}
+	}
+
 	if (sort_order && strstr(sort_order, "ipc")) {
 		parse_options_usage(report_usage, options, "s", 1);
 		goto error;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 0f30f843c566d..cafd693568189 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
 
 	hists__new_col_len(hists, HISTC_CGROUP, 6);
 	hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+	hists__new_col_len(hists, HISTC_PARALLELISM, 11);
 	hists__new_col_len(hists, HISTC_CPU, 3);
 	hists__new_col_len(hists, HISTC_SOCKET, 6);
 	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
@@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists,
 		.ip	 = al->addr,
 		.level	 = al->level,
 		.code_page_size = sample->code_page_size,
+		.parallelism	= al->parallelism,
 		.stat = {
 			.nr_events = 1,
 			.period	= sample->period,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 46c8373e31465..a6e662d77dc24 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -42,6 +42,7 @@ enum hist_column {
 	HISTC_CGROUP_ID,
 	HISTC_CGROUP,
 	HISTC_PARENT,
+	HISTC_PARALLELISM,
 	HISTC_CPU,
 	HISTC_SOCKET,
 	HISTC_SRCLINE,
@@ -228,6 +229,7 @@ struct hist_entry {
 	u64			transaction;
 	s32			socket;
 	s32			cpu;
+	int			parallelism;
 	u64			code_page_size;
 	u64			weight;
 	u64			ins_lat;
@@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
 bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
 
 struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c06e3020a9769..00fcf8d8ac255 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2403,6 +2403,18 @@ bool perf_session__has_traces(struct perf_session *session, const char *msg)
 	return false;
 }
 
+bool perf_session__has_switch_events(struct perf_session *session)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(session->evlist, evsel) {
+		if (evsel->core.attr.context_switch)
+			return true;
+	}
+
+	return false;
+}
+
 int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
 {
 	char *bracket;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index bcf1bcf06959b..db1c120a9e67f 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -141,6 +141,7 @@ int perf_session__resolve_callchain(struct perf_session *session,
 				    struct symbol **parent);
 
 bool perf_session__has_traces(struct perf_session *session, const char *msg);
+bool perf_session__has_switch_events(struct perf_session *session);
 
 void perf_event__attr_swap(struct perf_event_attr *attr);
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 3dd33721823f3..7eef43f5be360 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -892,6 +892,27 @@ struct sort_entry sort_cpu = {
 	.se_width_idx	= HISTC_CPU,
 };
 
+/* --sort parallelism */
+
+static int64_t
+sort__parallelism_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+	return right->parallelism - left->parallelism;
+}
+
+static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf,
+				    size_t size, unsigned int width)
+{
+	return repsep_snprintf(bf, size, "%*d", width, he->parallelism);
+}
+
+struct sort_entry sort_parallelism = {
+	.se_header      = "Parallelism",
+	.se_cmp	        = sort__parallelism_cmp,
+	.se_snprintf    = hist_entry__parallelism_snprintf,
+	.se_width_idx	= HISTC_PARALLELISM,
+};
+
 /* --sort cgroup_id */
 
 static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
@@ -2534,6 +2555,7 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_ANNOTATE_DATA_TYPE_OFFSET, "typeoff", sort_type_offset),
 	DIM(SORT_SYM_OFFSET, "symoff", sort_sym_offset),
 	DIM(SORT_ANNOTATE_DATA_TYPE_CACHELINE, "typecln", sort_type_cacheline),
+	DIM(SORT_PARALLELISM, "parallelism", sort_parallelism),
 };
 
 #undef DIM
@@ -2735,6 +2757,7 @@ MK_SORT_ENTRY_CHK(thread)
 MK_SORT_ENTRY_CHK(comm)
 MK_SORT_ENTRY_CHK(dso)
 MK_SORT_ENTRY_CHK(sym)
+MK_SORT_ENTRY_CHK(parallelism)
 
 
 static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index a8572574e1686..11fb15f914093 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -72,6 +72,7 @@ enum sort_type {
 	SORT_ANNOTATE_DATA_TYPE_OFFSET,
 	SORT_SYM_OFFSET,
 	SORT_ANNOTATE_DATA_TYPE_CACHELINE,
+	SORT_PARALLELISM,
 
 	/* branch stack specific sort keys */
 	__SORT_BRANCH_STACK,
-- 
2.48.1.262.g85cc9f2d1e-goog
Re: [PATCH v3 2/7] perf report: Add parallelism sort key
Posted by Namhyung Kim 1 day, 14 hours ago
On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote:
> Show parallelism level in profiles if requested by user.
> 
> Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> Cc: Namhyung Kim <namhyung@kernel.org>
> Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> Cc: Ian Rogers <irogers@google.com>
> Cc: linux-perf-users@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---
>  tools/perf/builtin-report.c | 11 +++++++++++
>  tools/perf/util/hist.c      |  2 ++
>  tools/perf/util/hist.h      |  3 +++
>  tools/perf/util/session.c   | 12 ++++++++++++
>  tools/perf/util/session.h   |  1 +
>  tools/perf/util/sort.c      | 23 +++++++++++++++++++++++
>  tools/perf/util/sort.h      |  1 +
>  7 files changed, 53 insertions(+)
> 
> diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> index 0d9bd090eda71..14d49f0625881 100644
> --- a/tools/perf/builtin-report.c
> +++ b/tools/perf/builtin-report.c
> @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv)
>  		symbol_conf.annotate_data_sample = true;
>  	}
>  
> +	if (report.disable_order || !perf_session__has_switch_events(session)) {
> +		if ((sort_order && strstr(sort_order, "parallelism")) ||
> +				(field_order && strstr(field_order, "parallelism"))) {
> +			if (report.disable_order)
> +				ui__error("Use of parallelism is incompatible with --disable-order.\n");
> +			else
> +				ui__error("Use of parallelism requires --switch-events during record.\n");
> +			return -1;
> +		}
> +	}
> +
>  	if (sort_order && strstr(sort_order, "ipc")) {
>  		parse_options_usage(report_usage, options, "s", 1);
>  		goto error;
> diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> index 0f30f843c566d..cafd693568189 100644
> --- a/tools/perf/util/hist.c
> +++ b/tools/perf/util/hist.c
> @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
>  
>  	hists__new_col_len(hists, HISTC_CGROUP, 6);
>  	hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
> +	hists__new_col_len(hists, HISTC_PARALLELISM, 11);
>  	hists__new_col_len(hists, HISTC_CPU, 3);
>  	hists__new_col_len(hists, HISTC_SOCKET, 6);
>  	hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
> @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists,
>  		.ip	 = al->addr,
>  		.level	 = al->level,
>  		.code_page_size = sample->code_page_size,
> +		.parallelism	= al->parallelism,
>  		.stat = {
>  			.nr_events = 1,
>  			.period	= sample->period,
> diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> index 46c8373e31465..a6e662d77dc24 100644
> --- a/tools/perf/util/hist.h
> +++ b/tools/perf/util/hist.h
> @@ -42,6 +42,7 @@ enum hist_column {
>  	HISTC_CGROUP_ID,
>  	HISTC_CGROUP,
>  	HISTC_PARENT,
> +	HISTC_PARALLELISM,
>  	HISTC_CPU,
>  	HISTC_SOCKET,
>  	HISTC_SRCLINE,
> @@ -228,6 +229,7 @@ struct hist_entry {
>  	u64			transaction;
>  	s32			socket;
>  	s32			cpu;
> +	int			parallelism;

Can you make it u16 and move to around cpumode to remove paddings?

Thanks,
Namhyung


>  	u64			code_page_size;
>  	u64			weight;
>  	u64			ins_lat;
> @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
>  bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
>  bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
>  bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
> +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
>  
>  struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
Re: [PATCH v3 2/7] perf report: Add parallelism sort key
Posted by Dmitry Vyukov 1 day, 11 hours ago
On Wed, 29 Jan 2025 at 05:42, Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote:
> > Show parallelism level in profiles if requested by user.
> >
> > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > Cc: Namhyung Kim <namhyung@kernel.org>
> > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > Cc: Ian Rogers <irogers@google.com>
> > Cc: linux-perf-users@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > ---
> >  tools/perf/builtin-report.c | 11 +++++++++++
> >  tools/perf/util/hist.c      |  2 ++
> >  tools/perf/util/hist.h      |  3 +++
> >  tools/perf/util/session.c   | 12 ++++++++++++
> >  tools/perf/util/session.h   |  1 +
> >  tools/perf/util/sort.c      | 23 +++++++++++++++++++++++
> >  tools/perf/util/sort.h      |  1 +
> >  7 files changed, 53 insertions(+)
> >
> > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > index 0d9bd090eda71..14d49f0625881 100644
> > --- a/tools/perf/builtin-report.c
> > +++ b/tools/perf/builtin-report.c
> > @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv)
> >               symbol_conf.annotate_data_sample = true;
> >       }
> >
> > +     if (report.disable_order || !perf_session__has_switch_events(session)) {
> > +             if ((sort_order && strstr(sort_order, "parallelism")) ||
> > +                             (field_order && strstr(field_order, "parallelism"))) {
> > +                     if (report.disable_order)
> > +                             ui__error("Use of parallelism is incompatible with --disable-order.\n");
> > +                     else
> > +                             ui__error("Use of parallelism requires --switch-events during record.\n");
> > +                     return -1;
> > +             }
> > +     }
> > +
> >       if (sort_order && strstr(sort_order, "ipc")) {
> >               parse_options_usage(report_usage, options, "s", 1);
> >               goto error;
> > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > index 0f30f843c566d..cafd693568189 100644
> > --- a/tools/perf/util/hist.c
> > +++ b/tools/perf/util/hist.c
> > @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> >
> >       hists__new_col_len(hists, HISTC_CGROUP, 6);
> >       hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
> > +     hists__new_col_len(hists, HISTC_PARALLELISM, 11);
> >       hists__new_col_len(hists, HISTC_CPU, 3);
> >       hists__new_col_len(hists, HISTC_SOCKET, 6);
> >       hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
> > @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists,
> >               .ip      = al->addr,
> >               .level   = al->level,
> >               .code_page_size = sample->code_page_size,
> > +             .parallelism    = al->parallelism,
> >               .stat = {
> >                       .nr_events = 1,
> >                       .period = sample->period,
> > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> > index 46c8373e31465..a6e662d77dc24 100644
> > --- a/tools/perf/util/hist.h
> > +++ b/tools/perf/util/hist.h
> > @@ -42,6 +42,7 @@ enum hist_column {
> >       HISTC_CGROUP_ID,
> >       HISTC_CGROUP,
> >       HISTC_PARENT,
> > +     HISTC_PARALLELISM,
> >       HISTC_CPU,
> >       HISTC_SOCKET,
> >       HISTC_SRCLINE,
> > @@ -228,6 +229,7 @@ struct hist_entry {
> >       u64                     transaction;
> >       s32                     socket;
> >       s32                     cpu;
> > +     int                     parallelism;
>
> Can you make it u16 and move to around cpumode to remove paddings?

It generally should be the same size as cpu/socket/etc. And these are
32-bits throughout the codebase (the previous fields). Are there any
checks that MAX_NR_CPUS<64K?


> Thanks,
> Namhyung
>
>
> >       u64                     code_page_size;
> >       u64                     weight;
> >       u64                     ins_lat;
> > @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
> >  bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
> >  bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
> >  bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
> > +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
> >
> >  struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
Re: [PATCH v3 2/7] perf report: Add parallelism sort key
Posted by Namhyung Kim 13 hours ago
On Wed, Jan 29, 2025 at 08:18:34AM +0100, Dmitry Vyukov wrote:
> On Wed, 29 Jan 2025 at 05:42, Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > On Mon, Jan 27, 2025 at 10:58:49AM +0100, Dmitry Vyukov wrote:
> > > Show parallelism level in profiles if requested by user.
> > >
> > > Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
> > > Cc: Namhyung Kim <namhyung@kernel.org>
> > > Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
> > > Cc: Ian Rogers <irogers@google.com>
> > > Cc: linux-perf-users@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > ---
> > >  tools/perf/builtin-report.c | 11 +++++++++++
> > >  tools/perf/util/hist.c      |  2 ++
> > >  tools/perf/util/hist.h      |  3 +++
> > >  tools/perf/util/session.c   | 12 ++++++++++++
> > >  tools/perf/util/session.h   |  1 +
> > >  tools/perf/util/sort.c      | 23 +++++++++++++++++++++++
> > >  tools/perf/util/sort.h      |  1 +
> > >  7 files changed, 53 insertions(+)
> > >
> > > diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
> > > index 0d9bd090eda71..14d49f0625881 100644
> > > --- a/tools/perf/builtin-report.c
> > > +++ b/tools/perf/builtin-report.c
> > > @@ -1720,6 +1720,17 @@ int cmd_report(int argc, const char **argv)
> > >               symbol_conf.annotate_data_sample = true;
> > >       }
> > >
> > > +     if (report.disable_order || !perf_session__has_switch_events(session)) {
> > > +             if ((sort_order && strstr(sort_order, "parallelism")) ||
> > > +                             (field_order && strstr(field_order, "parallelism"))) {
> > > +                     if (report.disable_order)
> > > +                             ui__error("Use of parallelism is incompatible with --disable-order.\n");
> > > +                     else
> > > +                             ui__error("Use of parallelism requires --switch-events during record.\n");
> > > +                     return -1;
> > > +             }
> > > +     }
> > > +
> > >       if (sort_order && strstr(sort_order, "ipc")) {
> > >               parse_options_usage(report_usage, options, "s", 1);
> > >               goto error;
> > > diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
> > > index 0f30f843c566d..cafd693568189 100644
> > > --- a/tools/perf/util/hist.c
> > > +++ b/tools/perf/util/hist.c
> > > @@ -207,6 +207,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
> > >
> > >       hists__new_col_len(hists, HISTC_CGROUP, 6);
> > >       hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
> > > +     hists__new_col_len(hists, HISTC_PARALLELISM, 11);
> > >       hists__new_col_len(hists, HISTC_CPU, 3);
> > >       hists__new_col_len(hists, HISTC_SOCKET, 6);
> > >       hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
> > > @@ -741,6 +742,7 @@ __hists__add_entry(struct hists *hists,
> > >               .ip      = al->addr,
> > >               .level   = al->level,
> > >               .code_page_size = sample->code_page_size,
> > > +             .parallelism    = al->parallelism,
> > >               .stat = {
> > >                       .nr_events = 1,
> > >                       .period = sample->period,
> > > diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
> > > index 46c8373e31465..a6e662d77dc24 100644
> > > --- a/tools/perf/util/hist.h
> > > +++ b/tools/perf/util/hist.h
> > > @@ -42,6 +42,7 @@ enum hist_column {
> > >       HISTC_CGROUP_ID,
> > >       HISTC_CGROUP,
> > >       HISTC_PARENT,
> > > +     HISTC_PARALLELISM,
> > >       HISTC_CPU,
> > >       HISTC_SOCKET,
> > >       HISTC_SRCLINE,
> > > @@ -228,6 +229,7 @@ struct hist_entry {
> > >       u64                     transaction;
> > >       s32                     socket;
> > >       s32                     cpu;
> > > +     int                     parallelism;
> >
> > Can you make it u16 and move to around cpumode to remove paddings?
> 
> It generally should be the same size as cpu/socket/etc. And these are
> 32-bits throughout the codebase (the previous fields). Are there any
> checks that MAX_NR_CPUS<64K?

I don't think there's such a check.  But practically it used be 4K and
you can add the check. :)

Anyway hist_entry is getting bigger and we may need to shrink it later.
I don't stringly insist on u16 though.  It's up to you.

Thanks,
Namhyung

> >
> >
> > >       u64                     code_page_size;
> > >       u64                     weight;
> > >       u64                     ins_lat;
> > > @@ -580,6 +582,7 @@ bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
> > >  bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
> > >  bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
> > >  bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
> > > +bool perf_hpp__is_parallelism_entry(struct perf_hpp_fmt *fmt);
> > >
> > >  struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);