[PATCH v3 32/46] perf stat: Make cputype filter generic

Ian Rogers posted 46 patches 2 years, 7 months ago
There is a newer version of this series
[PATCH v3 32/46] perf stat: Make cputype filter generic
Posted by Ian Rogers 2 years, 7 months ago
Rather than limit the --cputype argument for "perf list" and "perf
stat" to hybrid PMUs of just cpu_atom and cpu_core, allow any PMU.

Note, that if cpu_atom isn't mounted but a filter of cpu_atom is
requested, then this will now fail. As such a filter would never
succeed, no events can come from that unmounted PMU, then this
behavior could never have been useful and failing is clearer.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-list.c    | 19 +++++++++++--------
 tools/perf/builtin-stat.c    | 12 +++++++-----
 tools/perf/util/pmu-hybrid.c | 20 --------------------
 tools/perf/util/pmu-hybrid.h |  1 -
 tools/perf/util/pmus.c       | 25 ++++++++++++++++++++++++-
 tools/perf/util/pmus.h       |  3 +++
 6 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 1f5dbd5f0ba4..1b48cf214b6e 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -11,8 +11,8 @@
 #include "builtin.h"
 
 #include "util/print-events.h"
+#include "util/pmus.h"
 #include "util/pmu.h"
-#include "util/pmu-hybrid.h"
 #include "util/debug.h"
 #include "util/metricgroup.h"
 #include "util/string2.h"
@@ -429,7 +429,7 @@ int cmd_list(int argc, const char **argv)
 		.print_event = default_print_event,
 		.print_metric = default_print_metric,
 	};
-	const char *hybrid_name = NULL;
+	const char *cputype = NULL;
 	const char *unit_name = NULL;
 	bool json = false;
 	struct option list_options[] = {
@@ -443,8 +443,8 @@ int cmd_list(int argc, const char **argv)
 			    "Print information on the perf event names and expressions used internally by events."),
 		OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
 			    "Print deprecated events."),
-		OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
-			   "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
+		OPT_STRING(0, "cputype", &cputype, "cpu type",
+			   "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."),
 		OPT_STRING(0, "unit", &unit_name, "PMU name",
 			   "Limit PMU or metric printing to the specified PMU."),
 		OPT_INCR(0, "debug", &verbose,
@@ -484,10 +484,13 @@ int cmd_list(int argc, const char **argv)
 		assert(default_ps.visited_metrics);
 		if (unit_name)
 			default_ps.pmu_glob = strdup(unit_name);
-		else if (hybrid_name) {
-			default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
-			if (!default_ps.pmu_glob)
-				pr_warning("WARNING: hybrid cputype is not supported!\n");
+		else if (cputype) {
+			const struct perf_pmu *pmu = perf_pmus__pmu_for_pmu_filter(cputype);
+
+			if (!pmu)
+				pr_warning("WARNING: cputype is not supported!\n");
+
+			default_ps.pmu_glob = pmu->name;
 		}
 	}
 	print_cb.print_start(ps);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3b25fcab5cd1..06a1d71a49a5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -44,6 +44,7 @@
 #include "util/cgroup.h"
 #include <subcmd/parse-options.h>
 #include "util/parse-events.h"
+#include "util/pmus.h"
 #include "util/pmu.h"
 #include "util/event.h"
 #include "util/evlist.h"
@@ -69,7 +70,6 @@
 #include "util/pfm.h"
 #include "util/bpf_counter.h"
 #include "util/iostat.h"
-#include "util/pmu-hybrid.h"
 #include "util/util.h"
 #include "asm/bug.h"
 
@@ -1089,10 +1089,11 @@ static int parse_stat_cgroups(const struct option *opt,
 	return parse_cgroups(opt, str, unset);
 }
 
-static int parse_hybrid_type(const struct option *opt,
+static int parse_cputype(const struct option *opt,
 			     const char *str,
 			     int unset __maybe_unused)
 {
+	const struct perf_pmu *pmu;
 	struct evlist *evlist = *(struct evlist **)opt->value;
 
 	if (!list_empty(&evlist->core.entries)) {
@@ -1100,11 +1101,12 @@ static int parse_hybrid_type(const struct option *opt,
 		return -1;
 	}
 
-	parse_events_option_args.pmu_filter = perf_pmu__hybrid_type_to_pmu(str);
-	if (!parse_events_option_args.pmu_filter) {
+	pmu = perf_pmus__pmu_for_pmu_filter(str);
+	if (!pmu) {
 		fprintf(stderr, "--cputype %s is not supported!\n", str);
 		return -1;
 	}
+	parse_events_option_args.pmu_filter = pmu->name;
 
 	return 0;
 }
@@ -1230,7 +1232,7 @@ static struct option stat_options[] = {
 	OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
 		     "Only enable events on applying cpu with this type "
 		     "for hybrid platform (e.g. core or atom)",
-		     parse_hybrid_type),
+		     parse_cputype),
 #ifdef HAVE_LIBPFM
 	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
 		"libpfm4 event selector. use 'perf list' to list available events",
diff --git a/tools/perf/util/pmu-hybrid.c b/tools/perf/util/pmu-hybrid.c
index 38628805a952..bc4cb0738c35 100644
--- a/tools/perf/util/pmu-hybrid.c
+++ b/tools/perf/util/pmu-hybrid.c
@@ -50,23 +50,3 @@ bool perf_pmu__is_hybrid(const char *name)
 {
 	return perf_pmu__find_hybrid_pmu(name) != NULL;
 }
-
-char *perf_pmu__hybrid_type_to_pmu(const char *type)
-{
-	char *pmu_name = NULL;
-
-	if (asprintf(&pmu_name, "cpu_%s", type) < 0)
-		return NULL;
-
-	if (perf_pmu__is_hybrid(pmu_name))
-		return pmu_name;
-
-	/*
-	 * pmu may be not scanned, check the sysfs.
-	 */
-	if (perf_pmu__hybrid_mounted(pmu_name))
-		return pmu_name;
-
-	free(pmu_name);
-	return NULL;
-}
diff --git a/tools/perf/util/pmu-hybrid.h b/tools/perf/util/pmu-hybrid.h
index 2b186c26a43e..206b94931531 100644
--- a/tools/perf/util/pmu-hybrid.h
+++ b/tools/perf/util/pmu-hybrid.h
@@ -17,7 +17,6 @@ bool perf_pmu__hybrid_mounted(const char *name);
 
 struct perf_pmu *perf_pmu__find_hybrid_pmu(const char *name);
 bool perf_pmu__is_hybrid(const char *name);
-char *perf_pmu__hybrid_type_to_pmu(const char *type);
 
 static inline int perf_pmu__hybrid_pmu_num(void)
 {
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 7f3b93c4d229..140e11f00b29 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -1,5 +1,28 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/list.h>
-#include <pmus.h>
+#include <string.h>
+#include "pmus.h"
+#include "pmu.h"
 
 LIST_HEAD(pmus);
+
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
+{
+	struct perf_pmu *pmu = NULL;
+
+	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		if (!strcmp(pmu->name, str))
+			return pmu;
+		/* Ignore "uncore_" prefix. */
+		if (!strncmp(pmu->name, "uncore_", 7)) {
+			if (!strcmp(pmu->name + 7, str))
+				return pmu;
+		}
+		/* Ignore "cpu_" prefix on Intel hybrid PMUs. */
+		if (!strncmp(pmu->name, "cpu_", 4)) {
+			if (!strcmp(pmu->name + 4, str))
+				return pmu;
+		}
+	}
+	return NULL;
+}
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
index 5ec12007eb5c..d475e2960c10 100644
--- a/tools/perf/util/pmus.h
+++ b/tools/perf/util/pmus.h
@@ -3,7 +3,10 @@
 #define __PMUS_H
 
 extern struct list_head pmus;
+struct perf_pmu;
 
 #define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list)
 
+const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
+
 #endif /* __PMUS_H */
-- 
2.40.1.495.gc816e09b53d-goog
Re: [PATCH v3 32/46] perf stat: Make cputype filter generic
Posted by Ravi Bangoria 2 years, 7 months ago
On 29-Apr-23 11:04 AM, Ian Rogers wrote:
> Rather than limit the --cputype argument for "perf list" and "perf
> stat" to hybrid PMUs of just cpu_atom and cpu_core, allow any PMU.

I've couple of doubts:

1. Can you please explain intention to do this esp for perf list. Since, IIUC,
   `perf list --unit` option provide the same functionality.

2. Since we are already specifying pmu name for non-standerd/arch-specific
   events like `pmu/attributes/`, I'm not sure where `perf stat --cputype=pmu`
   is useful. Can you please explain perf stat usability aspect for non-hybrid
   pmus.

3. What am I missing here:

   $ sudo ./perf stat --cputype=amd_df -e amd_l3/event=0x4,umask=0xff/ -C 0 -- sleep 1
    Performance counter stats for 'CPU(s) 0':

           108,267      amd_l3/event=0x4,umask=0xff/                                         

       1.061290167 seconds time elapsed

3. Also, IMHO, using --cputype option to specify _pmu name_ is bit odd.

> 
> Note, that if cpu_atom isn't mounted but a filter of cpu_atom is
> requested, then this will now fail. As such a filter would never
> succeed, no events can come from that unmounted PMU, then this
> behavior could never have been useful and failing is clearer.

I'm hitting a segfault if I use non-existing pmu:

$ sudo ./perf list --cputype=random
WARNING: cputype is not supported!
Segmentation fault


> @@ -443,8 +443,8 @@ int cmd_list(int argc, const char **argv)
>  			    "Print information on the perf event names and expressions used internally by events."),
>  		OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
>  			    "Print deprecated events."),
> -		OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
> -			   "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
> +		OPT_STRING(0, "cputype", &cputype, "cpu type",
> +			   "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."),

man perf-list does not describe --cputype. I think we should add it as part
of this patch?

Similarly, man perf-stat also needs to be updated.


> +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
> +{
> +	struct perf_pmu *pmu = NULL;
> +
> +	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
> +		if (!strcmp(pmu->name, str))
> +			return pmu;
> +		/* Ignore "uncore_" prefix. */
> +		if (!strncmp(pmu->name, "uncore_", 7)) {
> +			if (!strcmp(pmu->name + 7, str))
> +				return pmu;

Any specific reason to ignore "uncore_"? IMHO, ignoring prefix of some
pmus and not of others is bit confusing for naive user.
Re: [PATCH v3 32/46] perf stat: Make cputype filter generic
Posted by Ian Rogers 2 years, 7 months ago
On Tue, May 2, 2023 at 3:52 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> On 29-Apr-23 11:04 AM, Ian Rogers wrote:
> > Rather than limit the --cputype argument for "perf list" and "perf
> > stat" to hybrid PMUs of just cpu_atom and cpu_core, allow any PMU.
>
> I've couple of doubts:
>
> 1. Can you please explain intention to do this esp for perf list. Since, IIUC,
>    `perf list --unit` option provide the same functionality.
>
> 2. Since we are already specifying pmu name for non-standerd/arch-specific
>    events like `pmu/attributes/`, I'm not sure where `perf stat --cputype=pmu`
>    is useful. Can you please explain perf stat usability aspect for non-hybrid
>    pmus.
>
> 3. What am I missing here:
>
>    $ sudo ./perf stat --cputype=amd_df -e amd_l3/event=0x4,umask=0xff/ -C 0 -- sleep 1
>     Performance counter stats for 'CPU(s) 0':
>
>            108,267      amd_l3/event=0x4,umask=0xff/
>
>        1.061290167 seconds time elapsed
>
> 3. Also, IMHO, using --cputype option to specify _pmu name_ is bit odd.
>
> >
> > Note, that if cpu_atom isn't mounted but a filter of cpu_atom is
> > requested, then this will now fail. As such a filter would never
> > succeed, no events can come from that unmounted PMU, then this
> > behavior could never have been useful and failing is clearer.
>
> I'm hitting a segfault if I use non-existing pmu:
>
> $ sudo ./perf list --cputype=random
> WARNING: cputype is not supported!
> Segmentation fault
>
>
> > @@ -443,8 +443,8 @@ int cmd_list(int argc, const char **argv)
> >                           "Print information on the perf event names and expressions used internally by events."),
> >               OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
> >                           "Print deprecated events."),
> > -             OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
> > -                        "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
> > +             OPT_STRING(0, "cputype", &cputype, "cpu type",
> > +                        "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."),
>
> man perf-list does not describe --cputype. I think we should add it as part
> of this patch?
>
> Similarly, man perf-stat also needs to be updated.

Ok, these changes are just keeping existing functionality. I don't
disagree with making these changes but I think we can follow up with
them. Or probably just deprecate the option.

> > +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
> > +{
> > +     struct perf_pmu *pmu = NULL;
> > +
> > +     while ((pmu = perf_pmu__scan(pmu)) != NULL) {
> > +             if (!strcmp(pmu->name, str))
> > +                     return pmu;
> > +             /* Ignore "uncore_" prefix. */
> > +             if (!strncmp(pmu->name, "uncore_", 7)) {
> > +                     if (!strcmp(pmu->name + 7, str))
> > +                             return pmu;
>
> Any specific reason to ignore "uncore_"? IMHO, ignoring prefix of some
> pmus and not of others is bit confusing for naive user.

It is trying to be consistent with the event parser:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/util/parse-events.y#n326
where uncore is consumed. Fun fact, as numbers are consumed after a
PMU name and the underscore is optional, i915 as a PMU can be matched
in parsing with a PMU name of just "i".

Thanks,
Ian
Re: [PATCH v3 32/46] perf stat: Make cputype filter generic
Posted by Ian Rogers 2 years, 7 months ago
On Tue, May 2, 2023 at 3:52 AM Ravi Bangoria <ravi.bangoria@amd.com> wrote:
>
> On 29-Apr-23 11:04 AM, Ian Rogers wrote:
> > Rather than limit the --cputype argument for "perf list" and "perf
> > stat" to hybrid PMUs of just cpu_atom and cpu_core, allow any PMU.
>
> I've couple of doubts:
>
> 1. Can you please explain intention to do this esp for perf list. Since, IIUC,
>    `perf list --unit` option provide the same functionality.

I agree with you. The option already exists and I think we should just
move this option to being deprecated/hidden:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/subcmd/parse-options.h#n46

> 2. Since we are already specifying pmu name for non-standerd/arch-specific
>    events like `pmu/attributes/`, I'm not sure where `perf stat --cputype=pmu`
>    is useful. Can you please explain perf stat usability aspect for non-hybrid
>    pmus.

Completely agreed. This patch series is trying to remove the
duplicated code introduced by the hybrid changes. In this case I
didn't want to remove an option, and potentially break users of that
option, as part of fixing things up. A lot of what you are saying here
I added as comments to the original patch series.

> 3. What am I missing here:
>
>    $ sudo ./perf stat --cputype=amd_df -e amd_l3/event=0x4,umask=0xff/ -C 0 -- sleep 1
>     Performance counter stats for 'CPU(s) 0':
>
>            108,267      amd_l3/event=0x4,umask=0xff/
>
>        1.061290167 seconds time elapsed

The cputype applies to wildcard-ed events. So on hybrid:
$ perf stat -e cycles true
will open cycles on cpu_atom and cpu_core, the
parse_events__filter_pmu function is used skip PMUs based on cputype.

> 3. Also, IMHO, using --cputype option to specify _pmu name_ is bit odd.

Right, when the "feature" was added I would have preferred it as PMU
rather than CPU.

> >
> > Note, that if cpu_atom isn't mounted but a filter of cpu_atom is
> > requested, then this will now fail. As such a filter would never
> > succeed, no events can come from that unmounted PMU, then this
> > behavior could never have been useful and failing is clearer.
>
> I'm hitting a segfault if I use non-existing pmu:
>
> $ sudo ./perf list --cputype=random
> WARNING: cputype is not supported!
> Segmentation fault

Will fix in v4. The warning should be fatal/exit rather than try to
read the PMU's name.

Thanks,
Ian

> > @@ -443,8 +443,8 @@ int cmd_list(int argc, const char **argv)
> >                           "Print information on the perf event names and expressions used internally by events."),
> >               OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
> >                           "Print deprecated events."),
> > -             OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
> > -                        "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
> > +             OPT_STRING(0, "cputype", &cputype, "cpu type",
> > +                        "Limit PMU or metric printing to the given PMU (e.g. cpu, core or atom)."),
>
> man perf-list does not describe --cputype. I think we should add it as part
> of this patch?
>
> Similarly, man perf-stat also needs to be updated.
>
>
> > +const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
> > +{
> > +     struct perf_pmu *pmu = NULL;
> > +
> > +     while ((pmu = perf_pmu__scan(pmu)) != NULL) {
> > +             if (!strcmp(pmu->name, str))
> > +                     return pmu;
> > +             /* Ignore "uncore_" prefix. */
> > +             if (!strncmp(pmu->name, "uncore_", 7)) {
> > +                     if (!strcmp(pmu->name + 7, str))
> > +                             return pmu;
>
> Any specific reason to ignore "uncore_"? IMHO, ignoring prefix of some
> pmus and not of others is bit confusing for naive user.