[PATCH v4 0/5] perf evsel fallback changes

Ian Rogers posted 5 patches 2 weeks, 6 days ago
There is a newer version of this series
tools/perf/builtin-record.c      | 64 ++---------------------------
tools/perf/builtin-top.c         |  2 +-
tools/perf/tests/event_update.c  |  4 +-
tools/perf/tests/expand-cgroup.c |  4 +-
tools/perf/tests/perf-record.c   |  7 +++-
tools/perf/tests/topology.c      |  4 +-
tools/perf/util/callchain.c      | 61 ++++++++++++++++++++++++++++
tools/perf/util/evlist.c         | 32 ++++++++++-----
tools/perf/util/evlist.h         |  2 +-
tools/perf/util/evsel.c          | 70 +++++++++++++++++++++-----------
tools/perf/util/evsel.h          | 10 +++--
tools/perf/util/target.h         | 12 +++---
12 files changed, 159 insertions(+), 113 deletions(-)
[PATCH v4 0/5] perf evsel fallback changes
Posted by Ian Rogers 2 weeks, 6 days ago
Discussion with Thomas Richter in:
https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
showed that the evsel__fallback wasn't working for s390. These patches
avoid the problematic frame pointer callchain on s390 and fix
evsel__fallback from a range of problems when falling back to a
software event. I simulated failures when developing the patches but
they are untested other than that.

v4: Changing the callchain parameter at configuration time means other
    options aren't set the same as they would for `--call-graph
    dwarf`, for example the stack size. Switch to setting the
    callchain option on s390 to parameter parse time. For '-g' use
    '--call-graph dwarf' for s390. Other --call-graph options are
    parsed as normal, but a warning is generated when setting
    `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
    when there is only 1 event in the evlist.

v3: Incorporate feedback about event and callchain behavior for s390:
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/

v2: try exclude_callchain_user for s390 rather than fully disabling
    the callchain. Fix a missed clearing of is_pmu_core if the
    software event fallback.
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/

v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/

Ian Rogers (5):
  perf evsel: Improve falling back from cycles
  perf target: Constify simple check functions
  perf evsel: Constify option arguments to config functions
  perf callchain: Move callchain option parsing out of builtin
  perf evlist: Improve default event for s390

 tools/perf/builtin-record.c      | 64 ++---------------------------
 tools/perf/builtin-top.c         |  2 +-
 tools/perf/tests/event_update.c  |  4 +-
 tools/perf/tests/expand-cgroup.c |  4 +-
 tools/perf/tests/perf-record.c   |  7 +++-
 tools/perf/tests/topology.c      |  4 +-
 tools/perf/util/callchain.c      | 61 ++++++++++++++++++++++++++++
 tools/perf/util/evlist.c         | 32 ++++++++++-----
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          | 70 +++++++++++++++++++++-----------
 tools/perf/util/evsel.h          | 10 +++--
 tools/perf/util/target.h         | 12 +++---
 12 files changed, 159 insertions(+), 113 deletions(-)

-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 0/5] perf evsel fallback changes
Posted by Ian Rogers 2 weeks, 6 days ago
Discussion with Thomas Richter in:
https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
showed that the evsel__fallback wasn't working for s390. These patches
avoid the problematic frame pointer callchain on s390 and fix
evsel__fallback from a range of problems when falling back to a
software event. I simulated failures when developing the patches but
they are untested other than that.

v5: Fix the value for the top option to match that of record. Tidy the
    callchain parsing option callbacks. Based on AI review feedback:
https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com

v4: Changing the callchain parameter at configuration time means other
    options aren't set the same as they would for `--call-graph
    dwarf`, for example the stack size. Switch to setting the
    callchain option on s390 to parameter parse time. For '-g' use
    '--call-graph dwarf' for s390. Other --call-graph options are
    parsed as normal, but a warning is generated when setting
    `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
    when there is only 1 event in the evlist.
https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/

v3: Incorporate feedback about event and callchain behavior for s390:
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/

v2: try exclude_callchain_user for s390 rather than fully disabling
    the callchain. Fix a missed clearing of is_pmu_core if the
    software event fallback.
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/

v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/

Ian Rogers (5):
  perf evsel: Improve falling back from cycles
  perf target: Constify simple check functions
  perf evsel: Constify option arguments to config functions
  perf callchain: Refactor callchain option parsing
  perf evlist: Improve default event for s390

 tools/perf/builtin-record.c      | 64 ++++++-----------------------
 tools/perf/builtin-top.c         | 17 ++++----
 tools/perf/builtin-trace.c       |  9 +++-
 tools/perf/tests/event_update.c  |  4 +-
 tools/perf/tests/expand-cgroup.c |  4 +-
 tools/perf/tests/perf-record.c   |  7 +++-
 tools/perf/tests/topology.c      |  4 +-
 tools/perf/util/callchain.c      | 39 ++++++++++++++++++
 tools/perf/util/callchain.h      | 12 ++----
 tools/perf/util/evlist.c         | 32 ++++++++++-----
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          | 70 +++++++++++++++++++++-----------
 tools/perf/util/evsel.h          | 10 +++--
 tools/perf/util/target.h         | 12 +++---
 14 files changed, 165 insertions(+), 121 deletions(-)

-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 1/5] perf evsel: Improve falling back from cycles
Posted by Ian Rogers 2 weeks, 6 days ago
Switch to using evsel__match rather than comparing perf_event_attr
values, this is robust on hybrid architectures.
Ensure evsel->pmu matches the evsel->core.attr.
Remove exclude bits that get set in other fallback attempts when
switching the event.
Log the event name with modifiers when switching the event on fallback.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c | 45 ++++++++++++++++++++++++++++-------------
 tools/perf/util/evsel.h |  2 ++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..bd14d9bbc91f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3785,25 +3785,42 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 {
 	int paranoid;
 
-	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
-	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
-	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+	if ((err == ENODEV || err == ENOENT || err == ENXIO) &&
+	    evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		/*
-		 * If it's cycles then fall back to hrtimer based cpu-clock sw
-		 * counter, which is always available even if no PMU support.
-		 *
-		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
-		 * b0a873e).
+		 * If it's the legacy hardware cycles event fails then fall back
+		 * to hrtimer based cpu-clock sw counter, which is always
+		 * available even if no PMU support. PPC returned ENXIO rather
+		 * than ENODEV or ENOENT until 2.6.37.
 		 */
-		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->pmu = perf_pmus__find_by_type(PERF_TYPE_SOFTWARE);
+		assert(evsel->pmu); /* software is a "well-known" and can't fail PMU type. */
+
+		/* Configure the event. */
+		evsel->core.attr.type = PERF_TYPE_SOFTWARE;
 		evsel->core.attr.config = target__has_cpu(target)
 			? PERF_COUNT_SW_CPU_CLOCK
 			: PERF_COUNT_SW_TASK_CLOCK;
-		scnprintf(msg, msgsize,
-			"The cycles event is not supported, trying to fall back to %s",
-			target__has_cpu(target) ? "cpu-clock" : "task-clock");
+		evsel->core.is_pmu_core = false;
+
+		/* Remove excludes for new event. */
+		if (evsel->fallenback_eacces) {
+			evsel->core.attr.exclude_kernel = 0;
+			evsel->core.attr.exclude_hv     = 0;
+			evsel->fallenback_eacces = false;
+		}
+		if (evsel->fallenback_eopnotsupp) {
+			evsel->core.attr.exclude_guest = 0;
+			evsel->fallenback_eopnotsupp = false;
+		}
 
+		/* Name is recomputed by evsel__name. */
 		zfree(&evsel->name);
+
+		/* Log message. */
+		scnprintf(msg, msgsize,
+			  "The cycles event is not supported, trying to fall back to %s",
+			  evsel__name(evsel));
 		return true;
 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
 		   (paranoid = perf_event_paranoid()) > 1) {
@@ -3830,7 +3847,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 			  " samples", paranoid);
 		evsel->core.attr.exclude_kernel = 1;
 		evsel->core.attr.exclude_hv     = 1;
-
+		evsel->fallenback_eacces = true;
 		return true;
 	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
 		   !evsel->exclude_GH) {
@@ -3851,7 +3868,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 		/* Apple M1 requires exclude_guest */
 		scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
 		evsel->core.attr.exclude_guest = 1;
-
+		evsel->fallenback_eopnotsupp = true;
 		return true;
 	}
 no_fallback:
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a3d754c029a0..97f57fab28ce 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -124,6 +124,8 @@ struct evsel {
 	bool			default_metricgroup; /* A member of the Default metricgroup */
 	bool			default_show_events; /* If a default group member, show the event */
 	bool			needs_uniquify;
+	bool			fallenback_eacces;
+	bool			fallenback_eopnotsupp;
 	struct hashmap		*per_pkg_mask;
 	int			err;
 	int			script_output_type;
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 2/5] perf target: Constify simple check functions
Posted by Ian Rogers 2 weeks, 6 days ago
Allow the target to be const in callers.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/target.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 84ebb9c940c6..bc2bff9c6842 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -49,22 +49,22 @@ uid_t parse_uid(const char *str);
 
 int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
 
-static inline bool target__has_task(struct target *target)
+static inline bool target__has_task(const struct target *target)
 {
 	return target->tid || target->pid;
 }
 
-static inline bool target__has_cpu(struct target *target)
+static inline bool target__has_cpu(const struct target *target)
 {
 	return target->system_wide || target->cpu_list;
 }
 
-static inline bool target__none(struct target *target)
+static inline bool target__none(const struct target *target)
 {
 	return !target__has_task(target) && !target__has_cpu(target);
 }
 
-static inline bool target__enable_on_exec(struct target *target)
+static inline bool target__enable_on_exec(const struct target *target)
 {
 	/*
 	 * Normally enable_on_exec should be set if:
@@ -75,12 +75,12 @@ static inline bool target__enable_on_exec(struct target *target)
 	return target__none(target) && !target->initial_delay;
 }
 
-static inline bool target__has_per_thread(struct target *target)
+static inline bool target__has_per_thread(const struct target *target)
 {
 	return target->system_wide && target->per_thread;
 }
 
-static inline bool target__uses_dummy_map(struct target *target)
+static inline bool target__uses_dummy_map(const struct target *target)
 {
 	bool use_dummy = false;
 
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 3/5] perf evsel: Constify option arguments to config functions
Posted by Ian Rogers 2 weeks, 6 days ago
The options are used to configure the evsel but are not themselves
configured. Make the arguments const to better capture this.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c | 20 ++++++++++----------
 tools/perf/util/evsel.h |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bd14d9bbc91f..54c8922a8e47 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1015,8 +1015,8 @@ uint16_t evsel__e_machine(struct evsel *evsel, uint32_t *e_flags)
 	return perf_session__e_machine(session, e_flags);
 }
 
-static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-				      struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+				      const struct callchain_param *param)
 {
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -1080,14 +1080,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 		attr->defer_callchain = 1;
 }
 
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const struct callchain_param *param)
 {
 	if (param->enabled)
 		return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, const struct callchain_param *param)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -1106,7 +1106,7 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
 
 static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 				       struct perf_event_attr *attr,
-				       struct record_opts *opts,
+				       const struct record_opts *opts,
 				       const char *buf)
 {
 	struct perf_event_attr *prev_attr = NULL;
@@ -1170,7 +1170,7 @@ static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 }
 
 static void evsel__apply_config_terms(struct evsel *evsel,
-				      struct record_opts *opts, bool track)
+				      const struct record_opts *opts, bool track)
 {
 	struct evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
@@ -1445,7 +1445,7 @@ void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
 {
 }
 
-static void evsel__set_default_freq_period(struct record_opts *opts,
+static void evsel__set_default_freq_period(const struct record_opts *opts,
 					   struct perf_event_attr *attr)
 {
 	if (opts->freq) {
@@ -1490,8 +1490,8 @@ bool evsel__is_offcpu_event(struct evsel *evsel)
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain)
 {
 	struct evsel *leader = evsel__leader(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 97f57fab28ce..339b5c08a33d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -287,10 +287,10 @@ void evsel__set_priv_destructor(void (*destructor)(void *priv));
 
 struct callchain_param;
 
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain);
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *callchain);
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const  struct callchain_param *callchain);
 
 int __evsel__sample_size(u64 sample_type);
 void evsel__calc_id_pos(struct evsel *evsel);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 4/5] perf callchain: Refactor callchain option parsing
Posted by Ian Rogers 2 weeks, 6 days ago
record_opts__parse_callchain is shared by builtin-record and
builtin-trace, it is declared in callchain.h. Move the declaration to
callchain.c for consistency with the header. In other cases make the
option callback a small static stub that then calls into callchain.c.
Make the no argument '-g' callchain option just a short-cut for
'--call-graph fp' so that there is consistency in how the arguments
are handled.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c | 58 +++++--------------------------------
 tools/perf/builtin-top.c    | 14 ++++-----
 tools/perf/builtin-trace.c  |  9 +++++-
 tools/perf/util/callchain.c | 39 +++++++++++++++++++++++++
 tools/perf/util/callchain.h | 12 ++------
 5 files changed, 64 insertions(+), 68 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 40917a0be238..af1fe6b7c65c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2975,65 +2975,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	return status;
 }
 
-static void callchain_debug(struct callchain_param *callchain)
-{
-	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
-
-	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
-
-	if (callchain->record_mode == CALLCHAIN_DWARF)
-		pr_debug("callchain: stack dump size %d\n",
-			 callchain->dump_size);
-}
-
-int record_opts__parse_callchain(struct record_opts *record,
-				 struct callchain_param *callchain,
-				 const char *arg, bool unset)
-{
-	int ret;
-	callchain->enabled = !unset;
-
-	/* --no-call-graph */
-	if (unset) {
-		callchain->record_mode = CALLCHAIN_NONE;
-		pr_debug("callchain: disabled\n");
-		return 0;
-	}
-
-	ret = parse_callchain_record_opt(arg, callchain);
-	if (!ret) {
-		/* Enable data address sampling for DWARF unwind. */
-		if (callchain->record_mode == CALLCHAIN_DWARF &&
-		    !record->record_data_mmap_set)
-			record->record_data_mmap = true;
-		callchain_debug(callchain);
-	}
-
-	return ret;
-}
-
-int record_parse_callchain_opt(const struct option *opt,
+static int record_parse_callchain_opt(const struct option *opt,
 			       const char *arg,
 			       int unset)
 {
 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
 }
 
-int record_callchain_opt(const struct option *opt,
-			 const char *arg __maybe_unused,
-			 int unset __maybe_unused)
+static int record_callchain_opt(const struct option *opt,
+				const char *arg __maybe_unused,
+				int unset)
 {
-	struct callchain_param *callchain = opt->value;
-
-	callchain->enabled = true;
-
-	if (callchain->record_mode == CALLCHAIN_NONE)
-		callchain->record_mode = CALLCHAIN_FP;
-
-	callchain_debug(callchain);
-	return 0;
+	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
 }
 
+
 static int perf_record_config(const char *var, const char *value, void *cb)
 {
 	struct record *rec = cb;
@@ -3525,7 +3481,7 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
 		     record__mmap_flush_parse),
-	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
 	OPT_CALLBACK(0, "call-graph", &record.opts,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 710604c4f6f6..2a949d956d0b 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1386,13 +1386,6 @@ static int __cmd_top(struct perf_top *top)
 	return ret;
 }
 
-static int
-callchain_opt(const struct option *opt, const char *arg, int unset)
-{
-	symbol_conf.use_callchain = true;
-	return record_callchain_opt(opt, arg, unset);
-}
-
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
@@ -1413,6 +1406,13 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return parse_callchain_top_opt(arg);
 }
 
+static int
+callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
+{
+	return parse_callchain_opt(opt, "fp", unset);
+}
+
+
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph")) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1c38f3d16a31..f487fbaa0ad6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5300,6 +5300,13 @@ static int trace__parse_summary_mode(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int trace_parse_callchain_opt(const struct option *opt,
+				     const char *arg,
+				     int unset)
+{
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
 static int trace__config(const char *var, const char *value, void *arg)
 {
 	struct trace *trace = arg;
@@ -5447,7 +5454,7 @@ int cmd_trace(int argc, const char **argv)
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
 	OPT_CALLBACK(0, "call-graph", &trace.opts,
 		     "record_mode[,record_size]", record_callchain_help,
-		     &record_parse_callchain_opt),
+		     &trace_parse_callchain_opt),
 	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
 		    "Use libtraceevent to print the tracepoint arguments."),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8ff0898799ee..d97421fbf159 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -30,6 +30,7 @@
 #include "map.h"
 #include "callchain.h"
 #include "branch.h"
+#include "record.h"
 #include "symbol.h"
 #include "thread.h"
 #include "util.h"
@@ -328,6 +329,44 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	return ret;
 }
 
+static void callchain_debug(const struct callchain_param *callchain)
+{
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+	if (callchain->record_mode == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF &&
+		    !record->record_data_mmap_set)
+			record->record_data_mmap = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
+}
+
 int perf_callchain_config(const char *var, const char *value)
 {
 	char *endptr;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index df54ddb8c0cb..06d463ccc7a0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -9,11 +9,13 @@
 
 struct addr_location;
 struct evsel;
+struct hist_entry;
+struct hists;
 struct ip_callchain;
 struct map;
 struct perf_sample;
+struct record_opts;
 struct thread;
-struct hists;
 
 #define HELP_PAD "\t\t\t\t"
 
@@ -237,14 +239,6 @@ struct callchain_cursor *get_tls_callchain_cursor(void);
 int callchain_cursor__copy(struct callchain_cursor *dst,
 			   struct callchain_cursor *src);
 
-struct option;
-struct hist_entry;
-
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
-int record_callchain_opt(const struct option *opt, const char *arg, int unset);
-
-struct record_opts;
-
 int record_opts__parse_callchain(struct record_opts *record,
 				 struct callchain_param *callchain,
 				 const char *arg, bool unset);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v5 5/5] perf evlist: Improve default event for s390
Posted by Ian Rogers 2 weeks, 6 days ago
Frame pointer callchains are not supported on s390 and dwarf
callchains are only supported on software events.

Switch the default event from cycles to cpu-clock or task-clock on
s390 if callchains are enabled.

If frame pointer callchains are requested on s390 show a
warning. Modify the '-g' option of `perf top` and `perf record` to
default to dwarf callchains on s390.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c      |  8 ++++++--
 tools/perf/builtin-top.c         |  5 +++--
 tools/perf/tests/event_update.c  |  4 +++-
 tools/perf/tests/expand-cgroup.c |  4 +++-
 tools/perf/tests/perf-record.c   |  7 +++++--
 tools/perf/tests/topology.c      |  4 +++-
 tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          |  5 +++++
 9 files changed, 50 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index af1fe6b7c65c..ef97c7a54088 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -55,6 +55,7 @@
 #include "asm/bug.h"
 #include "perf.h"
 #include "cputopo.h"
+#include "dwarf-regs.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -2986,7 +2987,9 @@ static int record_callchain_opt(const struct option *opt,
 				const char *arg __maybe_unused,
 				int unset)
 {
-	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
+	return record_opts__parse_callchain(opt->value, &callchain_param,
+					    EM_HOST != EM_S390 ? "fp" : "dwarf",
+					    unset);
 }
 
 
@@ -4265,7 +4268,8 @@ int cmd_record(int argc, const char **argv)
 		record.opts.tail_synthesize = true;
 
 	if (rec->evlist->core.nr_entries == 0) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
+								callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 2a949d956d0b..84211a78977e 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -56,6 +56,7 @@
 #include "util/debug.h"
 #include "util/ordered-events.h"
 #include "util/pfm.h"
+#include "dwarf-regs.h"
 
 #include <assert.h>
 #include <elf.h>
@@ -1409,7 +1410,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 static int
 callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
 {
-	return parse_callchain_opt(opt, "fp", unset);
+	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
 }
 
 
@@ -1695,7 +1696,7 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 
 	if (!top.evlist->core.nr_entries) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out_delete_evlist;
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index cb9e6de2e033..facc65e29f20 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -8,6 +8,7 @@
 #include "header.h"
 #include "machine.h"
 #include "util/synthetic-events.h"
+#include "target.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
 {
 	struct evsel *evsel;
 	struct event_name tmp;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index c7b32a220ca1..dd547f2f77cc 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -8,6 +8,7 @@
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
 #include "pfm.h"
+#include "target.h"
 #include <subcmd/parse-options.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
 	int ret;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index efbd9cd60c63..c6e31ab8a6b8 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	CPU_ZERO_S(cpu_mask_size, cpu_mask);
 
 	perf_sample__init(&sample, /*all=*/false);
-	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-		evlist = evlist__new_default();
+	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+		struct target target = {};
+
+		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
+	}
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ec01150d208d..a34a7ab19a80 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -9,6 +9,7 @@
 #include "evlist.h"
 #include "debug.h"
 #include "pmus.h"
+#include "target.h"
 #include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
@@ -37,11 +38,12 @@ static int session_write_header(char *path)
 		.path = path,
 		.mode = PERF_DATA_MODE_WRITE,
 	};
+	struct target target = {};
 
 	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-	session->evlist = evlist__new_default();
+	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 	TEST_ASSERT_VAL("can't get evlist", session->evlist);
 	session->evlist->session = session;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 591bdf0b3e2a..c702741a9173 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -13,6 +13,7 @@
 #include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
+#include "dwarf-regs.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "record.h"
@@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
-struct evlist *evlist__new_default(void)
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
 {
 	struct evlist *evlist = evlist__new();
 	bool can_profile_kernel;
 	struct perf_pmu *pmu = NULL;
+	struct evsel *evsel;
+	char buf[256];
+	int err;
 
 	if (!evlist)
 		return NULL;
 
 	can_profile_kernel = perf_event_paranoid_check(1);
 
-	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
-		char buf[256];
-		int err;
-
-		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+	if (EM_HOST == EM_S390 && sample_callchains) {
+		snprintf(buf, sizeof(buf), "software/%s/%s",
+			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
 			 can_profile_kernel ? "P" : "Pu");
 		err = parse_event(evlist, buf);
-		if (err) {
-			evlist__delete(evlist);
-			return NULL;
+		if (err)
+			goto out_err;
+	} else {
+		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+				can_profile_kernel ? "P" : "Pu");
+			err = parse_event(evlist, buf);
+			if (err)
+				goto out_err;
 		}
 	}
 
+	/* If there is only 1 event a sample identifier isn't necessary. */
 	if (evlist->core.nr_entries > 1) {
-		struct evsel *evsel;
-
 		evlist__for_each_entry(evlist, evsel)
 			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
 	}
 
 	return evlist;
+out_err:
+	evlist__delete(evlist);
+	return NULL;
 }
 
 struct evlist *evlist__new_dummy(void)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d17c3b57a409..e507f5f20ef6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -104,7 +104,7 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
 struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 54c8922a8e47..5a294595a677 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
 
+	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
+		pr_warning_once(
+			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
+	}
+
 	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	attr->sample_max_stack = param->max_stack;
-- 
2.53.0.851.ga537e3e6e9-goog
Re: [PATCH v5 5/5] perf evlist: Improve default event for s390
Posted by Thomas Richter 2 weeks, 6 days ago
On 3/17/26 06:53, Ian Rogers wrote:
> Frame pointer callchains are not supported on s390 and dwarf
> callchains are only supported on software events.
> 
> Switch the default event from cycles to cpu-clock or task-clock on
> s390 if callchains are enabled.
> 
> If frame pointer callchains are requested on s390 show a
> warning. Modify the '-g' option of `perf top` and `perf record` to
> default to dwarf callchains on s390.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> ---
>  tools/perf/builtin-record.c      |  8 ++++++--
>  tools/perf/builtin-top.c         |  5 +++--
>  tools/perf/tests/event_update.c  |  4 +++-
>  tools/perf/tests/expand-cgroup.c |  4 +++-
>  tools/perf/tests/perf-record.c   |  7 +++++--
>  tools/perf/tests/topology.c      |  4 +++-
>  tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
>  tools/perf/util/evlist.h         |  2 +-
>  tools/perf/util/evsel.c          |  5 +++++
>  9 files changed, 50 insertions(+), 21 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index af1fe6b7c65c..ef97c7a54088 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -55,6 +55,7 @@
>  #include "asm/bug.h"
>  #include "perf.h"
>  #include "cputopo.h"
> +#include "dwarf-regs.h"
>  
>  #include <errno.h>
>  #include <inttypes.h>
> @@ -2986,7 +2987,9 @@ static int record_callchain_opt(const struct option *opt,
>  				const char *arg __maybe_unused,
>  				int unset)
>  {
> -	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
> +	return record_opts__parse_callchain(opt->value, &callchain_param,
> +					    EM_HOST != EM_S390 ? "fp" : "dwarf",
> +					    unset);
>  }
>  
>  
> @@ -4265,7 +4268,8 @@ int cmd_record(int argc, const char **argv)
>  		record.opts.tail_synthesize = true;
>  
>  	if (rec->evlist->core.nr_entries == 0) {
> -		struct evlist *def_evlist = evlist__new_default();
> +		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
> +								callchain_param.enabled);
>  
>  		if (!def_evlist)
>  			goto out;
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index 2a949d956d0b..84211a78977e 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -56,6 +56,7 @@
>  #include "util/debug.h"
>  #include "util/ordered-events.h"
>  #include "util/pfm.h"
> +#include "dwarf-regs.h"
>  
>  #include <assert.h>
>  #include <elf.h>
> @@ -1409,7 +1410,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
>  static int
>  callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
>  {
> -	return parse_callchain_opt(opt, "fp", unset);
> +	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
>  }
>  
>  
> @@ -1695,7 +1696,7 @@ int cmd_top(int argc, const char **argv)
>  		goto out_delete_evlist;
>  
>  	if (!top.evlist->core.nr_entries) {
> -		struct evlist *def_evlist = evlist__new_default();
> +		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
>  
>  		if (!def_evlist)
>  			goto out_delete_evlist;
> diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
> index cb9e6de2e033..facc65e29f20 100644
> --- a/tools/perf/tests/event_update.c
> +++ b/tools/perf/tests/event_update.c
> @@ -8,6 +8,7 @@
>  #include "header.h"
>  #include "machine.h"
>  #include "util/synthetic-events.h"
> +#include "target.h"
>  #include "tool.h"
>  #include "tests.h"
>  #include "debug.h"
> @@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
>  {
>  	struct evsel *evsel;
>  	struct event_name tmp;
> -	struct evlist *evlist = evlist__new_default();
> +	struct target target = {};
> +	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  
>  	TEST_ASSERT_VAL("failed to get evlist", evlist);
>  
> diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
> index c7b32a220ca1..dd547f2f77cc 100644
> --- a/tools/perf/tests/expand-cgroup.c
> +++ b/tools/perf/tests/expand-cgroup.c
> @@ -8,6 +8,7 @@
>  #include "parse-events.h"
>  #include "pmu-events/pmu-events.h"
>  #include "pfm.h"
> +#include "target.h"
>  #include <subcmd/parse-options.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
>  static int expand_default_events(void)
>  {
>  	int ret;
> -	struct evlist *evlist = evlist__new_default();
> +	struct target target = {};
> +	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  
>  	TEST_ASSERT_VAL("failed to get evlist", evlist);
>  
> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> index efbd9cd60c63..c6e31ab8a6b8 100644
> --- a/tools/perf/tests/perf-record.c
> +++ b/tools/perf/tests/perf-record.c
> @@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  	CPU_ZERO_S(cpu_mask_size, cpu_mask);
>  
>  	perf_sample__init(&sample, /*all=*/false);
> -	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> -		evlist = evlist__new_default();
> +	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> +		struct target target = {};
> +
> +		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> +	}
>  
>  	if (evlist == NULL) {
>  		pr_debug("Not enough memory to create evlist\n");
> diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
> index ec01150d208d..a34a7ab19a80 100644
> --- a/tools/perf/tests/topology.c
> +++ b/tools/perf/tests/topology.c
> @@ -9,6 +9,7 @@
>  #include "evlist.h"
>  #include "debug.h"
>  #include "pmus.h"
> +#include "target.h"
>  #include <linux/err.h>
>  
>  #define TEMPL "/tmp/perf-test-XXXXXX"
> @@ -37,11 +38,12 @@ static int session_write_header(char *path)
>  		.path = path,
>  		.mode = PERF_DATA_MODE_WRITE,
>  	};
> +	struct target target = {};
>  
>  	session = perf_session__new(&data, NULL);
>  	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
>  
> -	session->evlist = evlist__new_default();
> +	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  	TEST_ASSERT_VAL("can't get evlist", session->evlist);
>  	session->evlist->session = session;
>  
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 591bdf0b3e2a..c702741a9173 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -13,6 +13,7 @@
>  #include "util/mmap.h"
>  #include "thread_map.h"
>  #include "target.h"
> +#include "dwarf-regs.h"
>  #include "evlist.h"
>  #include "evsel.h"
>  #include "record.h"
> @@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
>  	return evlist;
>  }
>  
> -struct evlist *evlist__new_default(void)
> +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
>  {
>  	struct evlist *evlist = evlist__new();
>  	bool can_profile_kernel;
>  	struct perf_pmu *pmu = NULL;
> +	struct evsel *evsel;
> +	char buf[256];
> +	int err;
>  
>  	if (!evlist)
>  		return NULL;
>  
>  	can_profile_kernel = perf_event_paranoid_check(1);
>  
> -	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> -		char buf[256];
> -		int err;
> -
> -		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> +	if (EM_HOST == EM_S390 && sample_callchains) {
> +		snprintf(buf, sizeof(buf), "software/%s/%s",
> +			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
>  			 can_profile_kernel ? "P" : "Pu");
>  		err = parse_event(evlist, buf);
> -		if (err) {
> -			evlist__delete(evlist);
> -			return NULL;
> +		if (err)
> +			goto out_err;
> +	} else {
> +		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> +			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> +				can_profile_kernel ? "P" : "Pu");
> +			err = parse_event(evlist, buf);
> +			if (err)
> +				goto out_err;
>  		}
>  	}
>  
> +	/* If there is only 1 event a sample identifier isn't necessary. */
>  	if (evlist->core.nr_entries > 1) {
> -		struct evsel *evsel;
> -
>  		evlist__for_each_entry(evlist, evsel)
>  			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
>  	}
>  
>  	return evlist;
> +out_err:
> +	evlist__delete(evlist);
> +	return NULL;
>  }
>  
>  struct evlist *evlist__new_dummy(void)
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index d17c3b57a409..e507f5f20ef6 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -104,7 +104,7 @@ struct evsel_str_handler {
>  };
>  
>  struct evlist *evlist__new(void);
> -struct evlist *evlist__new_default(void);
> +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
>  struct evlist *evlist__new_dummy(void);
>  void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
>  		  struct perf_thread_map *threads);
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 54c8922a8e47..5a294595a677 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
>  	bool function = evsel__is_function_event(evsel);
>  	struct perf_event_attr *attr = &evsel->core.attr;
>  
> +	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
> +		pr_warning_once(
> +			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
> +	}
> +
>  	evsel__set_sample_bit(evsel, CALLCHAIN);
>  
>  	attr->sample_max_stack = param->max_stack;

Great, here is the output on my LPAR. Thanks very much fpr addressing and fixing this!!!

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s 
❯ ./perf record  --call-graph dwarf  -- perf test -w noploop
[ perf record: Woken up 133 times to write data ]
[ perf record: Captured and wrote 32.928 MB perf.data (4039 samples) ]

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s 
❯ ./perf evlist
software/task-clock/P

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc 
❯ ./perf record  -g -- perf test -w noploop
[ perf record: Woken up 133 times to write data ]
[ perf record: Captured and wrote 32.952 MB perf.data (4042 samples) ]

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s 
❯ ./perf evlist
software/task-clock/P

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc 
❯ ./perf record  -- perf test -w noploop
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 0.179 MB perf.data (3974 samples) ]

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s 
❯ ./perf evlist
cpum_cf/cycles/P

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc 
❯ ./perf record  
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.250 MB perf.data (24 samples) ]


root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s 
❯ ./perf evlist
cpum_cf/cycles/P
dummy:u

root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc 
❯ ./perf record  -g
^C[ perf record: Woken up 8 times to write data ]
[ perf record: Captured and wrote 5.186 MB perf.data (34255 samples) ]


root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s 
❯ ./perf evlist
software/cpu-clock/P
dummy:u


root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s 
 
Tested-by:  Thomas Richter <tmricht@linux.ibm.com>
-- 
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH

Vorsitzender des Aufsichtsrats: Wolfgang Wendt

Geschäftsführung: David Faller

Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Re: [PATCH v5 5/5] perf evlist: Improve default event for s390
Posted by Ian Rogers 2 weeks, 6 days ago
On Tue, Mar 17, 2026 at 12:52 AM Thomas Richter <tmricht@linux.ibm.com> wrote:
>
> On 3/17/26 06:53, Ian Rogers wrote:
> > Frame pointer callchains are not supported on s390 and dwarf
> > callchains are only supported on software events.
> >
> > Switch the default event from cycles to cpu-clock or task-clock on
> > s390 if callchains are enabled.
> >
> > If frame pointer callchains are requested on s390 show a
> > warning. Modify the '-g' option of `perf top` and `perf record` to
> > default to dwarf callchains on s390.
> >
> > Signed-off-by: Ian Rogers <irogers@google.com>
> > ---
> >  tools/perf/builtin-record.c      |  8 ++++++--
> >  tools/perf/builtin-top.c         |  5 +++--
> >  tools/perf/tests/event_update.c  |  4 +++-
> >  tools/perf/tests/expand-cgroup.c |  4 +++-
> >  tools/perf/tests/perf-record.c   |  7 +++++--
> >  tools/perf/tests/topology.c      |  4 +++-
> >  tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
> >  tools/perf/util/evlist.h         |  2 +-
> >  tools/perf/util/evsel.c          |  5 +++++
> >  9 files changed, 50 insertions(+), 21 deletions(-)
> >
> > diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> > index af1fe6b7c65c..ef97c7a54088 100644
> > --- a/tools/perf/builtin-record.c
> > +++ b/tools/perf/builtin-record.c
> > @@ -55,6 +55,7 @@
> >  #include "asm/bug.h"
> >  #include "perf.h"
> >  #include "cputopo.h"
> > +#include "dwarf-regs.h"
> >
> >  #include <errno.h>
> >  #include <inttypes.h>
> > @@ -2986,7 +2987,9 @@ static int record_callchain_opt(const struct option *opt,
> >                               const char *arg __maybe_unused,
> >                               int unset)
> >  {
> > -     return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
> > +     return record_opts__parse_callchain(opt->value, &callchain_param,
> > +                                         EM_HOST != EM_S390 ? "fp" : "dwarf",
> > +                                         unset);
> >  }
> >
> >
> > @@ -4265,7 +4268,8 @@ int cmd_record(int argc, const char **argv)
> >               record.opts.tail_synthesize = true;
> >
> >       if (rec->evlist->core.nr_entries == 0) {
> > -             struct evlist *def_evlist = evlist__new_default();
> > +             struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
> > +                                                             callchain_param.enabled);
> >
> >               if (!def_evlist)
> >                       goto out;
> > diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> > index 2a949d956d0b..84211a78977e 100644
> > --- a/tools/perf/builtin-top.c
> > +++ b/tools/perf/builtin-top.c
> > @@ -56,6 +56,7 @@
> >  #include "util/debug.h"
> >  #include "util/ordered-events.h"
> >  #include "util/pfm.h"
> > +#include "dwarf-regs.h"
> >
> >  #include <assert.h>
> >  #include <elf.h>
> > @@ -1409,7 +1410,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
> >  static int
> >  callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
> >  {
> > -     return parse_callchain_opt(opt, "fp", unset);
> > +     return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
> >  }
> >
> >
> > @@ -1695,7 +1696,7 @@ int cmd_top(int argc, const char **argv)
> >               goto out_delete_evlist;
> >
> >       if (!top.evlist->core.nr_entries) {
> > -             struct evlist *def_evlist = evlist__new_default();
> > +             struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
> >
> >               if (!def_evlist)
> >                       goto out_delete_evlist;
> > diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
> > index cb9e6de2e033..facc65e29f20 100644
> > --- a/tools/perf/tests/event_update.c
> > +++ b/tools/perf/tests/event_update.c
> > @@ -8,6 +8,7 @@
> >  #include "header.h"
> >  #include "machine.h"
> >  #include "util/synthetic-events.h"
> > +#include "target.h"
> >  #include "tool.h"
> >  #include "tests.h"
> >  #include "debug.h"
> > @@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
> >  {
> >       struct evsel *evsel;
> >       struct event_name tmp;
> > -     struct evlist *evlist = evlist__new_default();
> > +     struct target target = {};
> > +     struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> >
> >       TEST_ASSERT_VAL("failed to get evlist", evlist);
> >
> > diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
> > index c7b32a220ca1..dd547f2f77cc 100644
> > --- a/tools/perf/tests/expand-cgroup.c
> > +++ b/tools/perf/tests/expand-cgroup.c
> > @@ -8,6 +8,7 @@
> >  #include "parse-events.h"
> >  #include "pmu-events/pmu-events.h"
> >  #include "pfm.h"
> > +#include "target.h"
> >  #include <subcmd/parse-options.h>
> >  #include <stdio.h>
> >  #include <stdlib.h>
> > @@ -99,7 +100,8 @@ out:       for (i = 0; i < nr_events; i++)
> >  static int expand_default_events(void)
> >  {
> >       int ret;
> > -     struct evlist *evlist = evlist__new_default();
> > +     struct target target = {};
> > +     struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> >
> >       TEST_ASSERT_VAL("failed to get evlist", evlist);
> >
> > diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> > index efbd9cd60c63..c6e31ab8a6b8 100644
> > --- a/tools/perf/tests/perf-record.c
> > +++ b/tools/perf/tests/perf-record.c
> > @@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
> >       CPU_ZERO_S(cpu_mask_size, cpu_mask);
> >
> >       perf_sample__init(&sample, /*all=*/false);
> > -     if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> > -             evlist = evlist__new_default();
> > +     if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> > +             struct target target = {};
> > +
> > +             evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> > +     }
> >
> >       if (evlist == NULL) {
> >               pr_debug("Not enough memory to create evlist\n");
> > diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
> > index ec01150d208d..a34a7ab19a80 100644
> > --- a/tools/perf/tests/topology.c
> > +++ b/tools/perf/tests/topology.c
> > @@ -9,6 +9,7 @@
> >  #include "evlist.h"
> >  #include "debug.h"
> >  #include "pmus.h"
> > +#include "target.h"
> >  #include <linux/err.h>
> >
> >  #define TEMPL "/tmp/perf-test-XXXXXX"
> > @@ -37,11 +38,12 @@ static int session_write_header(char *path)
> >               .path = path,
> >               .mode = PERF_DATA_MODE_WRITE,
> >       };
> > +     struct target target = {};
> >
> >       session = perf_session__new(&data, NULL);
> >       TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
> >
> > -     session->evlist = evlist__new_default();
> > +     session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> >       TEST_ASSERT_VAL("can't get evlist", session->evlist);
> >       session->evlist->session = session;
> >
> > diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> > index 591bdf0b3e2a..c702741a9173 100644
> > --- a/tools/perf/util/evlist.c
> > +++ b/tools/perf/util/evlist.c
> > @@ -13,6 +13,7 @@
> >  #include "util/mmap.h"
> >  #include "thread_map.h"
> >  #include "target.h"
> > +#include "dwarf-regs.h"
> >  #include "evlist.h"
> >  #include "evsel.h"
> >  #include "record.h"
> > @@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
> >       return evlist;
> >  }
> >
> > -struct evlist *evlist__new_default(void)
> > +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
> >  {
> >       struct evlist *evlist = evlist__new();
> >       bool can_profile_kernel;
> >       struct perf_pmu *pmu = NULL;
> > +     struct evsel *evsel;
> > +     char buf[256];
> > +     int err;
> >
> >       if (!evlist)
> >               return NULL;
> >
> >       can_profile_kernel = perf_event_paranoid_check(1);
> >
> > -     while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> > -             char buf[256];
> > -             int err;
> > -
> > -             snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> > +     if (EM_HOST == EM_S390 && sample_callchains) {
> > +             snprintf(buf, sizeof(buf), "software/%s/%s",
> > +                      target__has_cpu(target) ? "cpu-clock" : "task-clock",
> >                        can_profile_kernel ? "P" : "Pu");
> >               err = parse_event(evlist, buf);
> > -             if (err) {
> > -                     evlist__delete(evlist);
> > -                     return NULL;
> > +             if (err)
> > +                     goto out_err;
> > +     } else {
> > +             while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> > +                     snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> > +                             can_profile_kernel ? "P" : "Pu");
> > +                     err = parse_event(evlist, buf);
> > +                     if (err)
> > +                             goto out_err;
> >               }
> >       }
> >
> > +     /* If there is only 1 event a sample identifier isn't necessary. */
> >       if (evlist->core.nr_entries > 1) {
> > -             struct evsel *evsel;
> > -
> >               evlist__for_each_entry(evlist, evsel)
> >                       evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
> >       }
> >
> >       return evlist;
> > +out_err:
> > +     evlist__delete(evlist);
> > +     return NULL;
> >  }
> >
> >  struct evlist *evlist__new_dummy(void)
> > diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> > index d17c3b57a409..e507f5f20ef6 100644
> > --- a/tools/perf/util/evlist.h
> > +++ b/tools/perf/util/evlist.h
> > @@ -104,7 +104,7 @@ struct evsel_str_handler {
> >  };
> >
> >  struct evlist *evlist__new(void);
> > -struct evlist *evlist__new_default(void);
> > +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
> >  struct evlist *evlist__new_dummy(void);
> >  void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
> >                 struct perf_thread_map *threads);
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 54c8922a8e47..5a294595a677 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
> >       bool function = evsel__is_function_event(evsel);
> >       struct perf_event_attr *attr = &evsel->core.attr;
> >
> > +     if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
> > +             pr_warning_once(
> > +                     "Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
> > +     }
> > +
> >       evsel__set_sample_bit(evsel, CALLCHAIN);
> >
> >       attr->sample_max_stack = param->max_stack;
>
> Great, here is the output on my LPAR. Thanks very much fpr addressing and fixing this!!!
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s
> ❯ ./perf record  --call-graph dwarf  -- perf test -w noploop
> [ perf record: Woken up 133 times to write data ]
> [ perf record: Captured and wrote 32.928 MB perf.data (4039 samples) ]
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s
> ❯ ./perf evlist
> software/task-clock/P
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc
> ❯ ./perf record  -g -- perf test -w noploop
> [ perf record: Woken up 133 times to write data ]
> [ perf record: Captured and wrote 32.952 MB perf.data (4042 samples) ]
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s
> ❯ ./perf evlist
> software/task-clock/P
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc
> ❯ ./perf record  -- perf test -w noploop
> [ perf record: Woken up 2 times to write data ]
> [ perf record: Captured and wrote 0.179 MB perf.data (3974 samples) ]
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 2s
> ❯ ./perf evlist
> cpum_cf/cycles/P
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc
> ❯ ./perf record
> ^C[ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 0.250 MB perf.data (24 samples) ]
>
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s
> ❯ ./perf evlist
> cpum_cf/cycles/P
> dummy:u
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc
> ❯ ./perf record  -g
> ^C[ perf record: Woken up 8 times to write data ]
> [ perf record: Captured and wrote 5.186 MB perf.data (34255 samples) ]
>
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s
> ❯ ./perf evlist
> software/cpu-clock/P
> dummy:u
>
>
> root in 🌐 b83lp69 in mirror-linux-next/tools/perf on  master [⇡] via C v15.2.1-gcc took 3s
>
> Tested-by:  Thomas Richter <tmricht@linux.ibm.com>

Thanks Thomas! I'll add the tag into the next version. Why a new
version? I'm trying to appease sashiko that is rightly flagging more
issues:
https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com

Thanks,
Ian

> --
> Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
> --
> IBM Deutschland Research & Development GmbH
>
> Vorsitzender des Aufsichtsrats: Wolfgang Wendt
>
> Geschäftsführung: David Faller
>
> Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
[PATCH v6 0/5] perf evsel fallback changes, better s390 defaults
Posted by Ian Rogers 2 weeks, 6 days ago
Discussion with Thomas Richter in:
https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
showed that the evsel__fallback wasn't working for s390. These patches
avoid the problematic frame pointer callchain on s390 and fix
evsel__fallback from a range of problems when falling back to a
software event. I simulated failures when developing the patches but
they are untested other than that.

v6: Sashiko noted that target wasn't fully set up when creating the
    default evlist in `perf top`, so move it earlier. Fix const char*
    casting issues in __parse_callchain_report_opt. Make '-g' not
    override the .perfconfig setting again.
https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com

v5: Fix the value for the top option to match that of record. Tidy the
    callchain parsing option callbacks. Based on AI review feedback:
https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/

v4: Changing the callchain parameter at configuration time means other
    options aren't set the same as they would for `--call-graph
    dwarf`, for example the stack size. Switch to setting the
    callchain option on s390 to parameter parse time. For '-g' use
    '--call-graph dwarf' for s390. Other --call-graph options are
    parsed as normal, but a warning is generated when setting
    `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
    when there is only 1 event in the evlist.
https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/

v3: Incorporate feedback about event and callchain behavior for s390:
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/

v2: try exclude_callchain_user for s390 rather than fully disabling
    the callchain. Fix a missed clearing of is_pmu_core if the
    software event fallback.
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/

v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/

Ian Rogers (5):
  perf evsel: Improve falling back from cycles
  perf target: Constify simple check functions
  perf evsel: Constify option arguments to config functions
  perf callchain: Refactor callchain option parsing
  perf evlist: Improve default event for s390

 tools/perf/builtin-record.c      | 66 +++++++----------------------
 tools/perf/builtin-top.c         | 67 ++++++++++++++++-------------
 tools/perf/builtin-trace.c       |  9 +++-
 tools/perf/tests/event_update.c  |  4 +-
 tools/perf/tests/expand-cgroup.c |  4 +-
 tools/perf/tests/perf-record.c   |  7 ++-
 tools/perf/tests/topology.c      |  4 +-
 tools/perf/util/callchain.c      | 73 ++++++++++++++++++++++++++------
 tools/perf/util/callchain.h      | 12 ++----
 tools/perf/util/evlist.c         | 32 +++++++++-----
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          | 70 +++++++++++++++++++-----------
 tools/perf/util/evsel.h          | 10 +++--
 tools/perf/util/target.h         | 12 +++---
 14 files changed, 217 insertions(+), 155 deletions(-)

-- 
2.53.0.851.ga537e3e6e9-goog
Re: [PATCH v6 0/5] perf evsel fallback changes, better s390 defaults
Posted by Thomas Richter 2 weeks, 5 days ago
On 3/17/26 18:56, Ian Rogers wrote:
> Discussion with Thomas Richter in:
> https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
> showed that the evsel__fallback wasn't working for s390. These patches
> avoid the problematic frame pointer callchain on s390 and fix
> evsel__fallback from a range of problems when falling back to a
> software event. I simulated failures when developing the patches but
> they are untested other than that.
> 
> v6: Sashiko noted that target wasn't fully set up when creating the
>     default evlist in `perf top`, so move it earlier. Fix const char*
>     casting issues in __parse_callchain_report_opt. Make '-g' not
>     override the .perfconfig setting again.
> https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com
> 
> v5: Fix the value for the top option to match that of record. Tidy the
>     callchain parsing option callbacks. Based on AI review feedback:
> https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
> https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/
> 
> v4: Changing the callchain parameter at configuration time means other
>     options aren't set the same as they would for `--call-graph
>     dwarf`, for example the stack size. Switch to setting the
>     callchain option on s390 to parameter parse time. For '-g' use
>     '--call-graph dwarf' for s390. Other --call-graph options are
>     parsed as normal, but a warning is generated when setting
>     `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
>     when there is only 1 event in the evlist.
> https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/
> 
> v3: Incorporate feedback about event and callchain behavior for s390:
> https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
> https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/
> 
> v2: try exclude_callchain_user for s390 rather than fully disabling
>     the callchain. Fix a missed clearing of is_pmu_core if the
>     software event fallback.
> https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
> 
> v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/
> 
> Ian Rogers (5):
>   perf evsel: Improve falling back from cycles
>   perf target: Constify simple check functions
>   perf evsel: Constify option arguments to config functions
>   perf callchain: Refactor callchain option parsing
>   perf evlist: Improve default event for s390
> 
>  tools/perf/builtin-record.c      | 66 +++++++----------------------
>  tools/perf/builtin-top.c         | 67 ++++++++++++++++-------------
>  tools/perf/builtin-trace.c       |  9 +++-
>  tools/perf/tests/event_update.c  |  4 +-
>  tools/perf/tests/expand-cgroup.c |  4 +-
>  tools/perf/tests/perf-record.c   |  7 ++-
>  tools/perf/tests/topology.c      |  4 +-
>  tools/perf/util/callchain.c      | 73 ++++++++++++++++++++++++++------
>  tools/perf/util/callchain.h      | 12 ++----
>  tools/perf/util/evlist.c         | 32 +++++++++-----
>  tools/perf/util/evlist.h         |  2 +-
>  tools/perf/util/evsel.c          | 70 +++++++++++++++++++-----------
>  tools/perf/util/evsel.h          | 10 +++--
>  tools/perf/util/target.h         | 12 +++---
>  14 files changed, 217 insertions(+), 155 deletions(-)
> 

Ian, thanks a lot. I tested it using the same sequences as for v5.

Tested-by: Thomas Richter <tmricht@linux.ibm.com>

-- 
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH

Vorsitzender des Aufsichtsrats: Wolfgang Wendt

Geschäftsführung: David Faller

Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Re: [PATCH v6 0/5] perf evsel fallback changes, better s390 defaults
Posted by Ian Rogers 2 weeks, 5 days ago
On Wed, Mar 18, 2026 at 1:20 AM Thomas Richter <tmricht@linux.ibm.com> wrote:
>
> On 3/17/26 18:56, Ian Rogers wrote:
> > Discussion with Thomas Richter in:
> > https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
> > showed that the evsel__fallback wasn't working for s390. These patches
> > avoid the problematic frame pointer callchain on s390 and fix
> > evsel__fallback from a range of problems when falling back to a
> > software event. I simulated failures when developing the patches but
> > they are untested other than that.
> >
> > v6: Sashiko noted that target wasn't fully set up when creating the
> >     default evlist in `perf top`, so move it earlier. Fix const char*
> >     casting issues in __parse_callchain_report_opt. Make '-g' not
> >     override the .perfconfig setting again.
> > https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com
> >
> > v5: Fix the value for the top option to match that of record. Tidy the
> >     callchain parsing option callbacks. Based on AI review feedback:
> > https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
> > https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/
> >
> > v4: Changing the callchain parameter at configuration time means other
> >     options aren't set the same as they would for `--call-graph
> >     dwarf`, for example the stack size. Switch to setting the
> >     callchain option on s390 to parameter parse time. For '-g' use
> >     '--call-graph dwarf' for s390. Other --call-graph options are
> >     parsed as normal, but a warning is generated when setting
> >     `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
> >     when there is only 1 event in the evlist.
> > https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/
> >
> > v3: Incorporate feedback about event and callchain behavior for s390:
> > https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
> > https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/
> >
> > v2: try exclude_callchain_user for s390 rather than fully disabling
> >     the callchain. Fix a missed clearing of is_pmu_core if the
> >     software event fallback.
> > https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
> >
> > v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/
> >
> > Ian Rogers (5):
> >   perf evsel: Improve falling back from cycles
> >   perf target: Constify simple check functions
> >   perf evsel: Constify option arguments to config functions
> >   perf callchain: Refactor callchain option parsing
> >   perf evlist: Improve default event for s390
> >
> >  tools/perf/builtin-record.c      | 66 +++++++----------------------
> >  tools/perf/builtin-top.c         | 67 ++++++++++++++++-------------
> >  tools/perf/builtin-trace.c       |  9 +++-
> >  tools/perf/tests/event_update.c  |  4 +-
> >  tools/perf/tests/expand-cgroup.c |  4 +-
> >  tools/perf/tests/perf-record.c   |  7 ++-
> >  tools/perf/tests/topology.c      |  4 +-
> >  tools/perf/util/callchain.c      | 73 ++++++++++++++++++++++++++------
> >  tools/perf/util/callchain.h      | 12 ++----
> >  tools/perf/util/evlist.c         | 32 +++++++++-----
> >  tools/perf/util/evlist.h         |  2 +-
> >  tools/perf/util/evsel.c          | 70 +++++++++++++++++++-----------
> >  tools/perf/util/evsel.h          | 10 +++--
> >  tools/perf/util/target.h         | 12 +++---
> >  14 files changed, 217 insertions(+), 155 deletions(-)
> >
>
> Ian, thanks a lot. I tested it using the same sequences as for v5.
>
> Tested-by: Thomas Richter <tmricht@linux.ibm.com>

Thanks Thomas! There's a Sashiko review that moving the target
initialization for `perf top` will cause issues, I'll do a v7 with
your tags. That change shouldn't impact any testing.
https://sashiko.dev/#/patchset/20260317175642.161647-1-irogers%40google.com

Thanks,
Ian

> --
> Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
> --
> IBM Deutschland Research & Development GmbH
>
> Vorsitzender des Aufsichtsrats: Wolfgang Wendt
>
> Geschäftsführung: David Faller
>
> Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
[PATCH v7 0/5] perf evsel fallback changes, better s390 defaults
Posted by Ian Rogers 2 weeks, 5 days ago
Discussion with Thomas Richter in:
https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
showed that the evsel__fallback wasn't working for s390. These patches
avoid the problematic frame pointer callchain on s390 and fix
evsel__fallback from a range of problems when falling back to a
software event. I simulated failures when developing the patches but
they are untested other than that.

v7: In perf top, move the target uid handling back to after the evlist
    is setup. A regression caught by Sashiko in:
https://sashiko.dev/#/patchset/20260317175642.161647-1-irogers%40google.com

v6: Sashiko noted that target wasn't fully set up when creating the
    default evlist in `perf top`, so move it earlier. Fix const char*
    casting issues in __parse_callchain_report_opt. Make '-g' not
    override the .perfconfig setting again.
https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com
https://lore.kernel.org/lkml/20260317175642.161647-1-irogers@google.com/

v5: Fix the value for the top option to match that of record. Tidy the
    callchain parsing option callbacks. Based on AI review feedback:
https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/

v4: Changing the callchain parameter at configuration time means other
    options aren't set the same as they would for `--call-graph
    dwarf`, for example the stack size. Switch to setting the
    callchain option on s390 to parameter parse time. For '-g' use
    '--call-graph dwarf' for s390. Other --call-graph options are
    parsed as normal, but a warning is generated when setting
    `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
    when there is only 1 event in the evlist.
https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/

v3: Incorporate feedback about event and callchain behavior for s390:
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/

v2: try exclude_callchain_user for s390 rather than fully disabling
    the callchain. Fix a missed clearing of is_pmu_core if the
    software event fallback.
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/

v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/

Ian Rogers (5):
  perf evsel: Improve falling back from cycles
  perf target: Constify simple check functions
  perf evsel: Constify option arguments to config functions
  perf callchain: Refactor callchain option parsing
  perf evlist: Improve default event for s390

 tools/perf/builtin-record.c      | 66 +++++++----------------------
 tools/perf/builtin-top.c         | 41 ++++++++++--------
 tools/perf/builtin-trace.c       |  9 +++-
 tools/perf/tests/event_update.c  |  4 +-
 tools/perf/tests/expand-cgroup.c |  4 +-
 tools/perf/tests/perf-record.c   |  7 ++-
 tools/perf/tests/topology.c      |  4 +-
 tools/perf/util/callchain.c      | 73 ++++++++++++++++++++++++++------
 tools/perf/util/callchain.h      | 12 ++----
 tools/perf/util/evlist.c         | 32 +++++++++-----
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          | 70 +++++++++++++++++++-----------
 tools/perf/util/evsel.h          | 10 +++--
 tools/perf/util/target.h         | 12 +++---
 14 files changed, 204 insertions(+), 142 deletions(-)

-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v8 0/5] perf evsel fallback changes, better s390 defaults
Posted by Ian Rogers 2 weeks, 4 days ago
Discussion with Thomas Richter in:
https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
showed that the evsel__fallback wasn't working for s390. These patches
avoid the problematic frame pointer callchain on s390 and fix
evsel__fallback from a range of problems when falling back to a
software event. I simulated failures when developing the patches but
they are untested other than that.

v8: Address sashiko review that target wasn't fully initialized for
    `perf record` with '-u'. Ensure the callchain is enabled with '-g'
    and a .perfconfig setting. This don't impact testing so keeping
    Thomas' tested-by tags.
https://sashiko.dev/#/patchset/20260318175808.582009-1-irogers%40google.com

v7: In perf top, move the target uid handling back to after the evlist
    is setup. A regression caught by Sashiko in:
https://sashiko.dev/#/patchset/20260317175642.161647-1-irogers%40google.com
https://lore.kernel.org/lkml/20260318175808.582009-1-irogers@google.com/

v6: Sashiko noted that target wasn't fully set up when creating the
    default evlist in `perf top`, so move it earlier. Fix const char*
    casting issues in __parse_callchain_report_opt. Make '-g' not
    override the .perfconfig setting again.
https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com
https://lore.kernel.org/lkml/20260317175642.161647-1-irogers@google.com/

v5: Fix the value for the top option to match that of record. Tidy the
    callchain parsing option callbacks. Based on AI review feedback:
https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/

v4: Changing the callchain parameter at configuration time means other
    options aren't set the same as they would for `--call-graph
    dwarf`, for example the stack size. Switch to setting the
    callchain option on s390 to parameter parse time. For '-g' use
    '--call-graph dwarf' for s390. Other --call-graph options are
    parsed as normal, but a warning is generated when setting
    `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
    when there is only 1 event in the evlist.
https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/

v3: Incorporate feedback about event and callchain behavior for s390:
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/

v2: try exclude_callchain_user for s390 rather than fully disabling
    the callchain. Fix a missed clearing of is_pmu_core if the
    software event fallback.
https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/

v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/

Ian Rogers (5):
  perf evsel: Improve falling back from cycles
  perf target: Constify simple check functions
  perf evsel: Constify option arguments to config functions
  perf callchain: Refactor callchain option parsing
  perf evlist: Improve default event for s390

 tools/perf/builtin-record.c      | 81 ++++++++++----------------------
 tools/perf/builtin-top.c         | 46 +++++++++++-------
 tools/perf/builtin-trace.c       |  9 +++-
 tools/perf/tests/event_update.c  |  4 +-
 tools/perf/tests/expand-cgroup.c |  4 +-
 tools/perf/tests/perf-record.c   |  7 ++-
 tools/perf/tests/topology.c      |  4 +-
 tools/perf/util/callchain.c      | 73 +++++++++++++++++++++++-----
 tools/perf/util/callchain.h      | 12 ++---
 tools/perf/util/evlist.c         | 32 ++++++++-----
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          | 70 +++++++++++++++++----------
 tools/perf/util/evsel.h          | 10 ++--
 tools/perf/util/target.h         | 12 ++---
 14 files changed, 219 insertions(+), 147 deletions(-)

-- 
2.53.0.851.ga537e3e6e9-goog
Re: [PATCH v8 0/5] perf evsel fallback changes, better s390 defaults
Posted by Ian Rogers 2 weeks, 4 days ago
On Wed, Mar 18, 2026 at 4:46 PM Ian Rogers <irogers@google.com> wrote:
>
> Discussion with Thomas Richter in:
> https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
> showed that the evsel__fallback wasn't working for s390. These patches
> avoid the problematic frame pointer callchain on s390 and fix
> evsel__fallback from a range of problems when falling back to a
> software event. I simulated failures when developing the patches but
> they are untested other than that.

I think at this point I call it a day. Sashiko still has feedback that
could justify a v9:
https://sashiko.dev/#/patchset/20260318234600.730340-1-irogers%40google.com
Specifically:
1) software PMUs may fail if sysfs isn't mounted. This isn't a real
problem as "software" is a "well known" PMU that we create even if
sysfs isn't mounted.
2) the handling of callchain in .perfconfig files isn't right, but the
patches aren't making it any worse. I worry there could be several
more patches if I start fixing things wrong with .perfconfig.
3) the possibility to add another NULL check for safety exists, but
the code would already crash at the same point.
Apparently, the tendency of prompts to generate further refinements
instead of providing all the problems at once is a known limitation of
LLMs and the current prompting methods.

Thanks,
Ian

> v8: Address sashiko review that target wasn't fully initialized for
>     `perf record` with '-u'. Ensure the callchain is enabled with '-g'
>     and a .perfconfig setting. This don't impact testing so keeping
>     Thomas' tested-by tags.
> https://sashiko.dev/#/patchset/20260318175808.582009-1-irogers%40google.com
>
> v7: In perf top, move the target uid handling back to after the evlist
>     is setup. A regression caught by Sashiko in:
> https://sashiko.dev/#/patchset/20260317175642.161647-1-irogers%40google.com
> https://lore.kernel.org/lkml/20260318175808.582009-1-irogers@google.com/
>
> v6: Sashiko noted that target wasn't fully set up when creating the
>     default evlist in `perf top`, so move it earlier. Fix const char*
>     casting issues in __parse_callchain_report_opt. Make '-g' not
>     override the .perfconfig setting again.
> https://sashiko.dev/#/patchset/20260317055334.760347-1-irogers%40google.com
> https://lore.kernel.org/lkml/20260317175642.161647-1-irogers@google.com/
>
> v5: Fix the value for the top option to match that of record. Tidy the
>     callchain parsing option callbacks. Based on AI review feedback:
> https://sashiko.dev/#/patchset/20260317030601.567422-1-irogers%40google.com
> https://lore.kernel.org/lkml/20260317055334.760347-1-irogers@google.com/
>
> v4: Changing the callchain parameter at configuration time means other
>     options aren't set the same as they would for `--call-graph
>     dwarf`, for example the stack size. Switch to setting the
>     callchain option on s390 to parameter parse time. For '-g' use
>     '--call-graph dwarf' for s390. Other --call-graph options are
>     parsed as normal, but a warning is generated when setting
>     `--call-graph fp` for s390. Also fix that sample IDs aren't wanted
>     when there is only 1 event in the evlist.
> https://lore.kernel.org/lkml/20260317030601.567422-1-irogers@google.com/
>
> v3: Incorporate feedback about event and callchain behavior for s390:
> https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
> https://lore.kernel.org/lkml/20260313202811.2599195-1-irogers@google.com/
>
> v2: try exclude_callchain_user for s390 rather than fully disabling
>     the callchain. Fix a missed clearing of is_pmu_core if the
>     software event fallback.
> https://lore.kernel.org/lkml/20260312061628.1593105-1-irogers@google.com/
>
> v1: https://lore.kernel.org/lkml/20260312031928.1494864-1-irogers@google.com/
>
> Ian Rogers (5):
>   perf evsel: Improve falling back from cycles
>   perf target: Constify simple check functions
>   perf evsel: Constify option arguments to config functions
>   perf callchain: Refactor callchain option parsing
>   perf evlist: Improve default event for s390
>
>  tools/perf/builtin-record.c      | 81 ++++++++++----------------------
>  tools/perf/builtin-top.c         | 46 +++++++++++-------
>  tools/perf/builtin-trace.c       |  9 +++-
>  tools/perf/tests/event_update.c  |  4 +-
>  tools/perf/tests/expand-cgroup.c |  4 +-
>  tools/perf/tests/perf-record.c   |  7 ++-
>  tools/perf/tests/topology.c      |  4 +-
>  tools/perf/util/callchain.c      | 73 +++++++++++++++++++++++-----
>  tools/perf/util/callchain.h      | 12 ++---
>  tools/perf/util/evlist.c         | 32 ++++++++-----
>  tools/perf/util/evlist.h         |  2 +-
>  tools/perf/util/evsel.c          | 70 +++++++++++++++++----------
>  tools/perf/util/evsel.h          | 10 ++--
>  tools/perf/util/target.h         | 12 ++---
>  14 files changed, 219 insertions(+), 147 deletions(-)
>
> --
> 2.53.0.851.ga537e3e6e9-goog
>
Re: [PATCH v8 0/5] perf evsel fallback changes, better s390 defaults
Posted by Thomas Richter 2 weeks, 4 days ago
On 3/19/26 06:39, Ian Rogers wrote:
> On Wed, Mar 18, 2026 at 4:46 PM Ian Rogers <irogers@google.com> wrote:
>>
>> Discussion with Thomas Richter in:
>> https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
>> showed that the evsel__fallback wasn't working for s390. These patches
>> avoid the problematic frame pointer callchain on s390 and fix
>> evsel__fallback from a range of problems when falling back to a
>> software event. I simulated failures when developing the patches but
>> they are untested other than that.
> 
> I think at this point I call it a day. Sashiko still has feedback that
> could justify a v9:
> https://sashiko.dev/#/patchset/20260318234600.730340-1-irogers%40google.com
> Specifically:
> 1) software PMUs may fail if sysfs isn't mounted. This isn't a real
> problem as "software" is a "well known" PMU that we create even if
> sysfs isn't mounted.

I totally agree with you. Patch set 8 should be it.
Honestly, if /sys (of sysfs) is not mounted, I guess perf tool is the
least of the issues that arise. I believe this is now getting 
'over-engineered'.

> 2) the handling of callchain in .perfconfig files isn't right, but the
> patches aren't making it any worse. I worry there could be several
> more patches if I start fixing things wrong with .perfconfig.

Correct, you can not fix everything in one patch set :-)

> 3) the possibility to add another NULL check for safety exists, but
> the code would already crash at the same point.
> Apparently, the tendency of prompts to generate further refinements
> instead of providing all the problems at once is a known limitation of
> LLMs and the current prompting methods.
> 
> Thanks,
> Ian
> 
my 2 cents.... 
-- 
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH

Vorsitzender des Aufsichtsrats: Wolfgang Wendt

Geschäftsführung: David Faller

Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
Re: [PATCH v8 0/5] perf evsel fallback changes, better s390 defaults
Posted by Namhyung Kim 2 weeks, 3 days ago
On Wed, 18 Mar 2026 16:45:55 -0700, Ian Rogers wrote:

> Discussion with Thomas Richter in:
> https://lore.kernel.org/lkml/20260306071002.2526085-1-tmricht@linux.ibm.com/
> showed that the evsel__fallback wasn't working for s390. These patches
> avoid the problematic frame pointer callchain on s390 and fix
> evsel__fallback from a range of problems when falling back to a
> software event. I simulated failures when developing the patches but
> they are untested other than that.
> 
> [...]

Applied to perf-tools-next, thanks!

Best regards,
Namhyung
[PATCH v8 1/5] perf evsel: Improve falling back from cycles
Posted by Ian Rogers 2 weeks, 4 days ago
Switch to using evsel__match rather than comparing perf_event_attr
values, this is robust on hybrid architectures.
Ensure evsel->pmu matches the evsel->core.attr.
Remove exclude bits that get set in other fallback attempts when
switching the event.
Log the event name with modifiers when switching the event on fallback.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/evsel.c | 45 ++++++++++++++++++++++++++++-------------
 tools/perf/util/evsel.h |  2 ++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..bd14d9bbc91f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3785,25 +3785,42 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 {
 	int paranoid;
 
-	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
-	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
-	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+	if ((err == ENODEV || err == ENOENT || err == ENXIO) &&
+	    evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		/*
-		 * If it's cycles then fall back to hrtimer based cpu-clock sw
-		 * counter, which is always available even if no PMU support.
-		 *
-		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
-		 * b0a873e).
+		 * If it's the legacy hardware cycles event fails then fall back
+		 * to hrtimer based cpu-clock sw counter, which is always
+		 * available even if no PMU support. PPC returned ENXIO rather
+		 * than ENODEV or ENOENT until 2.6.37.
 		 */
-		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->pmu = perf_pmus__find_by_type(PERF_TYPE_SOFTWARE);
+		assert(evsel->pmu); /* software is a "well-known" and can't fail PMU type. */
+
+		/* Configure the event. */
+		evsel->core.attr.type = PERF_TYPE_SOFTWARE;
 		evsel->core.attr.config = target__has_cpu(target)
 			? PERF_COUNT_SW_CPU_CLOCK
 			: PERF_COUNT_SW_TASK_CLOCK;
-		scnprintf(msg, msgsize,
-			"The cycles event is not supported, trying to fall back to %s",
-			target__has_cpu(target) ? "cpu-clock" : "task-clock");
+		evsel->core.is_pmu_core = false;
+
+		/* Remove excludes for new event. */
+		if (evsel->fallenback_eacces) {
+			evsel->core.attr.exclude_kernel = 0;
+			evsel->core.attr.exclude_hv     = 0;
+			evsel->fallenback_eacces = false;
+		}
+		if (evsel->fallenback_eopnotsupp) {
+			evsel->core.attr.exclude_guest = 0;
+			evsel->fallenback_eopnotsupp = false;
+		}
 
+		/* Name is recomputed by evsel__name. */
 		zfree(&evsel->name);
+
+		/* Log message. */
+		scnprintf(msg, msgsize,
+			  "The cycles event is not supported, trying to fall back to %s",
+			  evsel__name(evsel));
 		return true;
 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
 		   (paranoid = perf_event_paranoid()) > 1) {
@@ -3830,7 +3847,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 			  " samples", paranoid);
 		evsel->core.attr.exclude_kernel = 1;
 		evsel->core.attr.exclude_hv     = 1;
-
+		evsel->fallenback_eacces = true;
 		return true;
 	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
 		   !evsel->exclude_GH) {
@@ -3851,7 +3868,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 		/* Apple M1 requires exclude_guest */
 		scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
 		evsel->core.attr.exclude_guest = 1;
-
+		evsel->fallenback_eopnotsupp = true;
 		return true;
 	}
 no_fallback:
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a3d754c029a0..97f57fab28ce 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -124,6 +124,8 @@ struct evsel {
 	bool			default_metricgroup; /* A member of the Default metricgroup */
 	bool			default_show_events; /* If a default group member, show the event */
 	bool			needs_uniquify;
+	bool			fallenback_eacces;
+	bool			fallenback_eopnotsupp;
 	struct hashmap		*per_pkg_mask;
 	int			err;
 	int			script_output_type;
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v8 2/5] perf target: Constify simple check functions
Posted by Ian Rogers 2 weeks, 4 days ago
Allow the target to be const in callers.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/target.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 84ebb9c940c6..bc2bff9c6842 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -49,22 +49,22 @@ uid_t parse_uid(const char *str);
 
 int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
 
-static inline bool target__has_task(struct target *target)
+static inline bool target__has_task(const struct target *target)
 {
 	return target->tid || target->pid;
 }
 
-static inline bool target__has_cpu(struct target *target)
+static inline bool target__has_cpu(const struct target *target)
 {
 	return target->system_wide || target->cpu_list;
 }
 
-static inline bool target__none(struct target *target)
+static inline bool target__none(const struct target *target)
 {
 	return !target__has_task(target) && !target__has_cpu(target);
 }
 
-static inline bool target__enable_on_exec(struct target *target)
+static inline bool target__enable_on_exec(const struct target *target)
 {
 	/*
 	 * Normally enable_on_exec should be set if:
@@ -75,12 +75,12 @@ static inline bool target__enable_on_exec(struct target *target)
 	return target__none(target) && !target->initial_delay;
 }
 
-static inline bool target__has_per_thread(struct target *target)
+static inline bool target__has_per_thread(const struct target *target)
 {
 	return target->system_wide && target->per_thread;
 }
 
-static inline bool target__uses_dummy_map(struct target *target)
+static inline bool target__uses_dummy_map(const struct target *target)
 {
 	bool use_dummy = false;
 
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v8 3/5] perf evsel: Constify option arguments to config functions
Posted by Ian Rogers 2 weeks, 4 days ago
The options are used to configure the evsel but are not themselves
configured. Make the arguments const to better capture this.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/evsel.c | 20 ++++++++++----------
 tools/perf/util/evsel.h |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bd14d9bbc91f..54c8922a8e47 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1015,8 +1015,8 @@ uint16_t evsel__e_machine(struct evsel *evsel, uint32_t *e_flags)
 	return perf_session__e_machine(session, e_flags);
 }
 
-static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-				      struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+				      const struct callchain_param *param)
 {
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -1080,14 +1080,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 		attr->defer_callchain = 1;
 }
 
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const struct callchain_param *param)
 {
 	if (param->enabled)
 		return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, const struct callchain_param *param)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -1106,7 +1106,7 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
 
 static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 				       struct perf_event_attr *attr,
-				       struct record_opts *opts,
+				       const struct record_opts *opts,
 				       const char *buf)
 {
 	struct perf_event_attr *prev_attr = NULL;
@@ -1170,7 +1170,7 @@ static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 }
 
 static void evsel__apply_config_terms(struct evsel *evsel,
-				      struct record_opts *opts, bool track)
+				      const struct record_opts *opts, bool track)
 {
 	struct evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
@@ -1445,7 +1445,7 @@ void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
 {
 }
 
-static void evsel__set_default_freq_period(struct record_opts *opts,
+static void evsel__set_default_freq_period(const struct record_opts *opts,
 					   struct perf_event_attr *attr)
 {
 	if (opts->freq) {
@@ -1490,8 +1490,8 @@ bool evsel__is_offcpu_event(struct evsel *evsel)
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain)
 {
 	struct evsel *leader = evsel__leader(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 97f57fab28ce..339b5c08a33d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -287,10 +287,10 @@ void evsel__set_priv_destructor(void (*destructor)(void *priv));
 
 struct callchain_param;
 
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain);
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *callchain);
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const  struct callchain_param *callchain);
 
 int __evsel__sample_size(u64 sample_type);
 void evsel__calc_id_pos(struct evsel *evsel);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v8 4/5] perf callchain: Refactor callchain option parsing
Posted by Ian Rogers 2 weeks, 4 days ago
record_opts__parse_callchain is shared by builtin-record and
builtin-trace, it is declared in callchain.h. Move the declaration to
callchain.c for consistency with the header. In other cases make the
option callback a small static stub that then calls into callchain.c.

Make the no argument '-g' callchain option just a short-cut for
'--call-graph fp' so that there is consistency in how the arguments
are handled. This requires the const char* string to be strdup-ed in
__parse_callchain_report_opt. For consistency also make
parse_callchain_record use strdup and remove some unnecessary
casts. Also, be more explicit about the '-g' behavior if there is a
.perfconfig file setting.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/builtin-record.c | 65 ++++++++-------------------------
 tools/perf/builtin-top.c    | 25 +++++++++----
 tools/perf/builtin-trace.c  |  9 ++++-
 tools/perf/util/callchain.c | 73 ++++++++++++++++++++++++++++++-------
 tools/perf/util/callchain.h | 12 ++----
 5 files changed, 104 insertions(+), 80 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 40917a0be238..59b8125d1b13 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2975,65 +2975,30 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	return status;
 }
 
-static void callchain_debug(struct callchain_param *callchain)
-{
-	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
-
-	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
-
-	if (callchain->record_mode == CALLCHAIN_DWARF)
-		pr_debug("callchain: stack dump size %d\n",
-			 callchain->dump_size);
-}
-
-int record_opts__parse_callchain(struct record_opts *record,
-				 struct callchain_param *callchain,
-				 const char *arg, bool unset)
-{
-	int ret;
-	callchain->enabled = !unset;
-
-	/* --no-call-graph */
-	if (unset) {
-		callchain->record_mode = CALLCHAIN_NONE;
-		pr_debug("callchain: disabled\n");
-		return 0;
-	}
-
-	ret = parse_callchain_record_opt(arg, callchain);
-	if (!ret) {
-		/* Enable data address sampling for DWARF unwind. */
-		if (callchain->record_mode == CALLCHAIN_DWARF &&
-		    !record->record_data_mmap_set)
-			record->record_data_mmap = true;
-		callchain_debug(callchain);
-	}
-
-	return ret;
-}
-
-int record_parse_callchain_opt(const struct option *opt,
+static int record_parse_callchain_opt(const struct option *opt,
 			       const char *arg,
 			       int unset)
 {
 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
 }
 
-int record_callchain_opt(const struct option *opt,
-			 const char *arg __maybe_unused,
-			 int unset __maybe_unused)
+static int record_callchain_opt(const struct option *opt,
+				const char *arg __maybe_unused,
+				int unset)
 {
-	struct callchain_param *callchain = opt->value;
-
-	callchain->enabled = true;
-
-	if (callchain->record_mode == CALLCHAIN_NONE)
-		callchain->record_mode = CALLCHAIN_FP;
+	/*
+	 * The -g option only sets the callchain if not already configured by
+	 * .perfconfig. It does, however, enable it.
+	 */
+	if (callchain_param.record_mode != CALLCHAIN_NONE) {
+		callchain_param.enabled = true;
+		return 0;
+	}
 
-	callchain_debug(callchain);
-	return 0;
+	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
 }
 
+
 static int perf_record_config(const char *var, const char *value, void *cb)
 {
 	struct record *rec = cb;
@@ -3525,7 +3490,7 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
 		     record__mmap_flush_parse),
-	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
 	OPT_CALLBACK(0, "call-graph", &record.opts,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 710604c4f6f6..b6726f4dffb3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1386,13 +1386,6 @@ static int __cmd_top(struct perf_top *top)
 	return ret;
 }
 
-static int
-callchain_opt(const struct option *opt, const char *arg, int unset)
-{
-	symbol_conf.use_callchain = true;
-	return record_callchain_opt(opt, arg, unset);
-}
-
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
@@ -1413,6 +1406,24 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return parse_callchain_top_opt(arg);
 }
 
+static int
+callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	/*
+	 * The -g option only sets the callchain if not already configured by
+	 * .perfconfig. It does, however, enable it.
+	 */
+	if (callchain->record_mode != CALLCHAIN_NONE) {
+		callchain->enabled = true;
+		return 0;
+	}
+
+	return parse_callchain_opt(opt, "fp", unset);
+}
+
+
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph")) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1c38f3d16a31..f487fbaa0ad6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5300,6 +5300,13 @@ static int trace__parse_summary_mode(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int trace_parse_callchain_opt(const struct option *opt,
+				     const char *arg,
+				     int unset)
+{
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
 static int trace__config(const char *var, const char *value, void *arg)
 {
 	struct trace *trace = arg;
@@ -5447,7 +5454,7 @@ int cmd_trace(int argc, const char **argv)
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
 	OPT_CALLBACK(0, "call-graph", &trace.opts,
 		     "record_mode[,record_size]", record_callchain_help,
-		     &record_parse_callchain_opt),
+		     &trace_parse_callchain_opt),
 	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
 		    "Use libtraceevent to print the tracepoint arguments."),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8ff0898799ee..f879b84f8ff9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -30,6 +30,7 @@
 #include "map.h"
 #include "callchain.h"
 #include "branch.h"
+#include "record.h"
 #include "symbol.h"
 #include "thread.h"
 #include "util.h"
@@ -170,7 +171,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
 static int
 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 {
-	char *tok;
+	char *tok, *arg_copy;
 	char *endptr, *saveptr = NULL;
 	bool minpcnt_set = false;
 	bool record_opt_set = false;
@@ -182,12 +183,17 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 	if (!arg)
 		return 0;
 
-	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+	arg_copy = strdup(arg);
+	if (!arg_copy)
+		return -ENOMEM;
+
+	tok = strtok_r(arg_copy, ",", &saveptr);
+	while (tok) {
 		if (!strncmp(tok, "none", strlen(tok))) {
 			callchain_param.mode = CHAIN_NONE;
 			callchain_param.enabled = false;
 			symbol_conf.use_callchain = false;
-			return 0;
+			goto out;
 		}
 
 		if (!parse_callchain_mode(tok) ||
@@ -214,30 +220,35 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 			unsigned long size = 0;
 
 			if (get_stack_size(tok, &size) < 0)
-				return -1;
+				goto err_out;
 			callchain_param.dump_size = size;
 			try_stack_size = false;
 		} else if (!minpcnt_set) {
 			/* try to get the min percent */
 			callchain_param.min_percent = strtod(tok, &endptr);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 			minpcnt_set = true;
 		} else {
 			/* try print limit at last */
 			callchain_param.print_limit = strtoul(tok, &endptr, 0);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 		}
 next:
-		arg = NULL;
+		tok = strtok_r(NULL, ",", &saveptr);
 	}
 
 	if (callchain_register_param(&callchain_param) < 0) {
 		pr_err("Can't register callchain params\n");
-		return -1;
+		goto err_out;
 	}
+out:
+	free(arg_copy);
 	return 0;
+err_out:
+	free(arg_copy);
+	return -1;
 }
 
 int parse_callchain_report_opt(const char *arg)
@@ -257,14 +268,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	int ret = -1;
 
 	/* We need buffer that we know we can write to. */
-	buf = malloc(strlen(arg) + 1);
+	buf = strdup(arg);
 	if (!buf)
 		return -ENOMEM;
 
-	strcpy(buf, arg);
-
-	tok = strtok_r((char *)buf, ",", &saveptr);
-	name = tok ? : (char *)buf;
+	tok = strtok_r(buf, ",", &saveptr);
+	name = tok ? : buf;
 
 	do {
 		/* Framepointer style */
@@ -328,6 +337,44 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	return ret;
 }
 
+static void callchain_debug(const struct callchain_param *callchain)
+{
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+	if (callchain->record_mode == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF &&
+		    !record->record_data_mmap_set)
+			record->record_data_mmap = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
+}
+
 int perf_callchain_config(const char *var, const char *value)
 {
 	char *endptr;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index df54ddb8c0cb..06d463ccc7a0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -9,11 +9,13 @@
 
 struct addr_location;
 struct evsel;
+struct hist_entry;
+struct hists;
 struct ip_callchain;
 struct map;
 struct perf_sample;
+struct record_opts;
 struct thread;
-struct hists;
 
 #define HELP_PAD "\t\t\t\t"
 
@@ -237,14 +239,6 @@ struct callchain_cursor *get_tls_callchain_cursor(void);
 int callchain_cursor__copy(struct callchain_cursor *dst,
 			   struct callchain_cursor *src);
 
-struct option;
-struct hist_entry;
-
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
-int record_callchain_opt(const struct option *opt, const char *arg, int unset);
-
-struct record_opts;
-
 int record_opts__parse_callchain(struct record_opts *record,
 				 struct callchain_param *callchain,
 				 const char *arg, bool unset);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v8 5/5] perf evlist: Improve default event for s390
Posted by Ian Rogers 2 weeks, 4 days ago
Frame pointer callchains are not supported on s390 and dwarf
callchains are only supported on software events.

Switch the default event from the hardware 'cycles' event to the
software 'cpu-clock' or 'task-clock' on s390 if callchains are
enabled. Move some of the target initialization earlier in builtin-top
and builtin-record, so it is ready for use by evlist__new_default.

If frame pointer callchains are requested on s390 show a
warning. Modify the '-g' option of `perf top` and `perf record` to
default to dwarf callchains on s390.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/builtin-record.c      | 18 +++++++++++-------
 tools/perf/builtin-top.c         | 23 ++++++++++++-----------
 tools/perf/tests/event_update.c  |  4 +++-
 tools/perf/tests/expand-cgroup.c |  4 +++-
 tools/perf/tests/perf-record.c   |  7 +++++--
 tools/perf/tests/topology.c      |  4 +++-
 tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          |  5 +++++
 9 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 59b8125d1b13..3276ffdc3141 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -55,6 +55,7 @@
 #include "asm/bug.h"
 #include "perf.h"
 #include "cputopo.h"
+#include "dwarf-regs.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -2995,7 +2996,9 @@ static int record_callchain_opt(const struct option *opt,
 		return 0;
 	}
 
-	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
+	return record_opts__parse_callchain(opt->value, &callchain_param,
+					    EM_HOST != EM_S390 ? "fp" : "dwarf",
+					    unset);
 }
 
 
@@ -4095,8 +4098,11 @@ int cmd_record(int argc, const char **argv)
 
 	perf_debuginfod_setup(&record.debuginfod);
 
-	/* Make system wide (-a) the default target. */
-	if (!argc && target__none(&rec->opts.target))
+	/*
+	 * Use system wide (-a) for the default target (ie. when no
+	 * workload). User ID filtering also implies system-wide.
+	 */
+	if ((!argc && target__none(&rec->opts.target)) || rec->uid_str)
 		rec->opts.target.system_wide = true;
 
 	if (nr_cgroups && !rec->opts.target.system_wide) {
@@ -4274,7 +4280,8 @@ int cmd_record(int argc, const char **argv)
 		record.opts.tail_synthesize = true;
 
 	if (rec->evlist->core.nr_entries == 0) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
+								callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out;
@@ -4303,9 +4310,6 @@ int cmd_record(int argc, const char **argv)
 		err = parse_uid_filter(rec->evlist, uid);
 		if (err)
 			goto out;
-
-		/* User ID filtering implies system wide. */
-		rec->opts.target.system_wide = true;
 	}
 
 	/* Enable ignoring missing threads when -p option is defined. */
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index b6726f4dffb3..37950efb28ac 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -56,6 +56,7 @@
 #include "util/debug.h"
 #include "util/ordered-events.h"
 #include "util/pfm.h"
+#include "dwarf-regs.h"
 
 #include <assert.h>
 #include <elf.h>
@@ -1420,7 +1421,7 @@ callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unse
 		return 0;
 	}
 
-	return parse_callchain_opt(opt, "fp", unset);
+	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
 }
 
 
@@ -1705,8 +1706,17 @@ int cmd_top(int argc, const char **argv)
 	if (annotate_check_args() < 0)
 		goto out_delete_evlist;
 
+	status = target__validate(target);
+	if (status) {
+		target__strerror(target, status, errbuf, BUFSIZ);
+		ui__warning("%s\n", errbuf);
+	}
+
+	if (target__none(target))
+		target->system_wide = true;
+
 	if (!top.evlist->core.nr_entries) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out_delete_evlist;
@@ -1799,12 +1809,6 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	status = target__validate(target);
-	if (status) {
-		target__strerror(target, status, errbuf, BUFSIZ);
-		ui__warning("%s\n", errbuf);
-	}
-
 	if (top.uid_str) {
 		uid_t uid = parse_uid(top.uid_str);
 
@@ -1818,9 +1822,6 @@ int cmd_top(int argc, const char **argv)
 			goto out_delete_evlist;
 	}
 
-	if (target__none(target))
-		target->system_wide = true;
-
 	if (evlist__create_maps(top.evlist, target) < 0) {
 		ui__error("Couldn't create thread/CPU maps: %s\n",
 			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index cb9e6de2e033..facc65e29f20 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -8,6 +8,7 @@
 #include "header.h"
 #include "machine.h"
 #include "util/synthetic-events.h"
+#include "target.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
 {
 	struct evsel *evsel;
 	struct event_name tmp;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index c7b32a220ca1..dd547f2f77cc 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -8,6 +8,7 @@
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
 #include "pfm.h"
+#include "target.h"
 #include <subcmd/parse-options.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
 	int ret;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index efbd9cd60c63..c6e31ab8a6b8 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	CPU_ZERO_S(cpu_mask_size, cpu_mask);
 
 	perf_sample__init(&sample, /*all=*/false);
-	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-		evlist = evlist__new_default();
+	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+		struct target target = {};
+
+		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
+	}
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ec01150d208d..a34a7ab19a80 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -9,6 +9,7 @@
 #include "evlist.h"
 #include "debug.h"
 #include "pmus.h"
+#include "target.h"
 #include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
@@ -37,11 +38,12 @@ static int session_write_header(char *path)
 		.path = path,
 		.mode = PERF_DATA_MODE_WRITE,
 	};
+	struct target target = {};
 
 	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-	session->evlist = evlist__new_default();
+	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 	TEST_ASSERT_VAL("can't get evlist", session->evlist);
 	session->evlist->session = session;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 591bdf0b3e2a..c702741a9173 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -13,6 +13,7 @@
 #include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
+#include "dwarf-regs.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "record.h"
@@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
-struct evlist *evlist__new_default(void)
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
 {
 	struct evlist *evlist = evlist__new();
 	bool can_profile_kernel;
 	struct perf_pmu *pmu = NULL;
+	struct evsel *evsel;
+	char buf[256];
+	int err;
 
 	if (!evlist)
 		return NULL;
 
 	can_profile_kernel = perf_event_paranoid_check(1);
 
-	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
-		char buf[256];
-		int err;
-
-		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+	if (EM_HOST == EM_S390 && sample_callchains) {
+		snprintf(buf, sizeof(buf), "software/%s/%s",
+			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
 			 can_profile_kernel ? "P" : "Pu");
 		err = parse_event(evlist, buf);
-		if (err) {
-			evlist__delete(evlist);
-			return NULL;
+		if (err)
+			goto out_err;
+	} else {
+		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+				can_profile_kernel ? "P" : "Pu");
+			err = parse_event(evlist, buf);
+			if (err)
+				goto out_err;
 		}
 	}
 
+	/* If there is only 1 event a sample identifier isn't necessary. */
 	if (evlist->core.nr_entries > 1) {
-		struct evsel *evsel;
-
 		evlist__for_each_entry(evlist, evsel)
 			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
 	}
 
 	return evlist;
+out_err:
+	evlist__delete(evlist);
+	return NULL;
 }
 
 struct evlist *evlist__new_dummy(void)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d17c3b57a409..e507f5f20ef6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -104,7 +104,7 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
 struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 54c8922a8e47..5a294595a677 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
 
+	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
+		pr_warning_once(
+			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
+	}
+
 	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	attr->sample_max_stack = param->max_stack;
-- 
2.53.0.851.ga537e3e6e9-goog
Re: [PATCH v8 5/5] perf evlist: Improve default event for s390
Posted by Thomas Richter 2 weeks, 4 days ago
On 3/19/26 00:46, Ian Rogers wrote:
> Frame pointer callchains are not supported on s390 and dwarf
> callchains are only supported on software events.
> 
> Switch the default event from the hardware 'cycles' event to the
> software 'cpu-clock' or 'task-clock' on s390 if callchains are
> enabled. Move some of the target initialization earlier in builtin-top
> and builtin-record, so it is ready for use by evlist__new_default.
> 
> If frame pointer callchains are requested on s390 show a
> warning. Modify the '-g' option of `perf top` and `perf record` to
> default to dwarf callchains on s390.
> 
> Signed-off-by: Ian Rogers <irogers@google.com>
> Tested-by: Thomas Richter <tmricht@linux.ibm.com>
> ---
>  tools/perf/builtin-record.c      | 18 +++++++++++-------
>  tools/perf/builtin-top.c         | 23 ++++++++++++-----------
>  tools/perf/tests/event_update.c  |  4 +++-
>  tools/perf/tests/expand-cgroup.c |  4 +++-
>  tools/perf/tests/perf-record.c   |  7 +++++--
>  tools/perf/tests/topology.c      |  4 +++-
>  tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
>  tools/perf/util/evlist.h         |  2 +-
>  tools/perf/util/evsel.c          |  5 +++++
>  9 files changed, 64 insertions(+), 35 deletions(-)
> 
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 59b8125d1b13..3276ffdc3141 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -55,6 +55,7 @@
>  #include "asm/bug.h"
>  #include "perf.h"
>  #include "cputopo.h"
> +#include "dwarf-regs.h"
>  
>  #include <errno.h>
>  #include <inttypes.h>
> @@ -2995,7 +2996,9 @@ static int record_callchain_opt(const struct option *opt,
>  		return 0;
>  	}
>  
> -	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
> +	return record_opts__parse_callchain(opt->value, &callchain_param,
> +					    EM_HOST != EM_S390 ? "fp" : "dwarf",
> +					    unset);
>  }
>  
>  
> @@ -4095,8 +4098,11 @@ int cmd_record(int argc, const char **argv)
>  
>  	perf_debuginfod_setup(&record.debuginfod);
>  
> -	/* Make system wide (-a) the default target. */
> -	if (!argc && target__none(&rec->opts.target))
> +	/*
> +	 * Use system wide (-a) for the default target (ie. when no
> +	 * workload). User ID filtering also implies system-wide.
> +	 */
> +	if ((!argc && target__none(&rec->opts.target)) || rec->uid_str)
>  		rec->opts.target.system_wide = true;
>  
>  	if (nr_cgroups && !rec->opts.target.system_wide) {
> @@ -4274,7 +4280,8 @@ int cmd_record(int argc, const char **argv)
>  		record.opts.tail_synthesize = true;
>  
>  	if (rec->evlist->core.nr_entries == 0) {
> -		struct evlist *def_evlist = evlist__new_default();
> +		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
> +								callchain_param.enabled);
>  
>  		if (!def_evlist)
>  			goto out;
> @@ -4303,9 +4310,6 @@ int cmd_record(int argc, const char **argv)
>  		err = parse_uid_filter(rec->evlist, uid);
>  		if (err)
>  			goto out;
> -
> -		/* User ID filtering implies system wide. */
> -		rec->opts.target.system_wide = true;
>  	}
>  
>  	/* Enable ignoring missing threads when -p option is defined. */
> diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
> index b6726f4dffb3..37950efb28ac 100644
> --- a/tools/perf/builtin-top.c
> +++ b/tools/perf/builtin-top.c
> @@ -56,6 +56,7 @@
>  #include "util/debug.h"
>  #include "util/ordered-events.h"
>  #include "util/pfm.h"
> +#include "dwarf-regs.h"
>  
>  #include <assert.h>
>  #include <elf.h>
> @@ -1420,7 +1421,7 @@ callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unse
>  		return 0;
>  	}
>  
> -	return parse_callchain_opt(opt, "fp", unset);
> +	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
>  }
>  
>  
> @@ -1705,8 +1706,17 @@ int cmd_top(int argc, const char **argv)
>  	if (annotate_check_args() < 0)
>  		goto out_delete_evlist;
>  
> +	status = target__validate(target);
> +	if (status) {
> +		target__strerror(target, status, errbuf, BUFSIZ);
> +		ui__warning("%s\n", errbuf);
> +	}
> +
> +	if (target__none(target))
> +		target->system_wide = true;
> +
>  	if (!top.evlist->core.nr_entries) {
> -		struct evlist *def_evlist = evlist__new_default();
> +		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
>  
>  		if (!def_evlist)
>  			goto out_delete_evlist;
> @@ -1799,12 +1809,6 @@ int cmd_top(int argc, const char **argv)
>  		goto out_delete_evlist;
>  	}
>  
> -	status = target__validate(target);
> -	if (status) {
> -		target__strerror(target, status, errbuf, BUFSIZ);
> -		ui__warning("%s\n", errbuf);
> -	}
> -
>  	if (top.uid_str) {
>  		uid_t uid = parse_uid(top.uid_str);
>  
> @@ -1818,9 +1822,6 @@ int cmd_top(int argc, const char **argv)
>  			goto out_delete_evlist;
>  	}
>  
> -	if (target__none(target))
> -		target->system_wide = true;
> -
>  	if (evlist__create_maps(top.evlist, target) < 0) {
>  		ui__error("Couldn't create thread/CPU maps: %s\n",
>  			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
> diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
> index cb9e6de2e033..facc65e29f20 100644
> --- a/tools/perf/tests/event_update.c
> +++ b/tools/perf/tests/event_update.c
> @@ -8,6 +8,7 @@
>  #include "header.h"
>  #include "machine.h"
>  #include "util/synthetic-events.h"
> +#include "target.h"
>  #include "tool.h"
>  #include "tests.h"
>  #include "debug.h"
> @@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
>  {
>  	struct evsel *evsel;
>  	struct event_name tmp;
> -	struct evlist *evlist = evlist__new_default();
> +	struct target target = {};
> +	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  
>  	TEST_ASSERT_VAL("failed to get evlist", evlist);
>  
> diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
> index c7b32a220ca1..dd547f2f77cc 100644
> --- a/tools/perf/tests/expand-cgroup.c
> +++ b/tools/perf/tests/expand-cgroup.c
> @@ -8,6 +8,7 @@
>  #include "parse-events.h"
>  #include "pmu-events/pmu-events.h"
>  #include "pfm.h"
> +#include "target.h"
>  #include <subcmd/parse-options.h>
>  #include <stdio.h>
>  #include <stdlib.h>
> @@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
>  static int expand_default_events(void)
>  {
>  	int ret;
> -	struct evlist *evlist = evlist__new_default();
> +	struct target target = {};
> +	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  
>  	TEST_ASSERT_VAL("failed to get evlist", evlist);
>  
> diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
> index efbd9cd60c63..c6e31ab8a6b8 100644
> --- a/tools/perf/tests/perf-record.c
> +++ b/tools/perf/tests/perf-record.c
> @@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
>  	CPU_ZERO_S(cpu_mask_size, cpu_mask);
>  
>  	perf_sample__init(&sample, /*all=*/false);
> -	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> -		evlist = evlist__new_default();
> +	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
> +		struct target target = {};
> +
> +		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
> +	}
>  
>  	if (evlist == NULL) {
>  		pr_debug("Not enough memory to create evlist\n");
> diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
> index ec01150d208d..a34a7ab19a80 100644
> --- a/tools/perf/tests/topology.c
> +++ b/tools/perf/tests/topology.c
> @@ -9,6 +9,7 @@
>  #include "evlist.h"
>  #include "debug.h"
>  #include "pmus.h"
> +#include "target.h"
>  #include <linux/err.h>
>  
>  #define TEMPL "/tmp/perf-test-XXXXXX"
> @@ -37,11 +38,12 @@ static int session_write_header(char *path)
>  		.path = path,
>  		.mode = PERF_DATA_MODE_WRITE,
>  	};
> +	struct target target = {};
>  
>  	session = perf_session__new(&data, NULL);
>  	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
>  
> -	session->evlist = evlist__new_default();
> +	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
>  	TEST_ASSERT_VAL("can't get evlist", session->evlist);
>  	session->evlist->session = session;
>  
> diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
> index 591bdf0b3e2a..c702741a9173 100644
> --- a/tools/perf/util/evlist.c
> +++ b/tools/perf/util/evlist.c
> @@ -13,6 +13,7 @@
>  #include "util/mmap.h"
>  #include "thread_map.h"
>  #include "target.h"
> +#include "dwarf-regs.h"
>  #include "evlist.h"
>  #include "evsel.h"
>  #include "record.h"
> @@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
>  	return evlist;
>  }
>  
> -struct evlist *evlist__new_default(void)
> +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
>  {
>  	struct evlist *evlist = evlist__new();
>  	bool can_profile_kernel;
>  	struct perf_pmu *pmu = NULL;
> +	struct evsel *evsel;
> +	char buf[256];
> +	int err;
>  
>  	if (!evlist)
>  		return NULL;
>  
>  	can_profile_kernel = perf_event_paranoid_check(1);
>  
> -	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> -		char buf[256];
> -		int err;
> -
> -		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> +	if (EM_HOST == EM_S390 && sample_callchains) {
> +		snprintf(buf, sizeof(buf), "software/%s/%s",
> +			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
>  			 can_profile_kernel ? "P" : "Pu");
>  		err = parse_event(evlist, buf);
> -		if (err) {
> -			evlist__delete(evlist);
> -			return NULL;
> +		if (err)
> +			goto out_err;
> +	} else {
> +		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
> +			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
> +				can_profile_kernel ? "P" : "Pu");
> +			err = parse_event(evlist, buf);
> +			if (err)
> +				goto out_err;
>  		}
>  	}
>  
> +	/* If there is only 1 event a sample identifier isn't necessary. */
>  	if (evlist->core.nr_entries > 1) {
> -		struct evsel *evsel;
> -
>  		evlist__for_each_entry(evlist, evsel)
>  			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
>  	}
>  
>  	return evlist;
> +out_err:
> +	evlist__delete(evlist);
> +	return NULL;
>  }
>  
>  struct evlist *evlist__new_dummy(void)
> diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
> index d17c3b57a409..e507f5f20ef6 100644
> --- a/tools/perf/util/evlist.h
> +++ b/tools/perf/util/evlist.h
> @@ -104,7 +104,7 @@ struct evsel_str_handler {
>  };
>  
>  struct evlist *evlist__new(void);
> -struct evlist *evlist__new_default(void);
> +struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
>  struct evlist *evlist__new_dummy(void);
>  void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
>  		  struct perf_thread_map *threads);
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 54c8922a8e47..5a294595a677 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
>  	bool function = evsel__is_function_event(evsel);
>  	struct perf_event_attr *attr = &evsel->core.attr;
>  
> +	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
> +		pr_warning_once(
> +			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
> +	}
> +
>  	evsel__set_sample_bit(evsel, CALLCHAIN);
>  
>  	attr->sample_max_stack = param->max_stack;

Ian,

again Thanks very much.

Tested-by: Thomas Richter <tmricht@linux.ibm.com>

-- 
Thomas Richter, Dept 3303, IBM s390 Linux Development, Boeblingen, Germany
--
IBM Deutschland Research & Development GmbH

Vorsitzender des Aufsichtsrats: Wolfgang Wendt

Geschäftsführung: David Faller

Sitz der Gesellschaft: Böblingen / Registergericht: Amtsgericht Stuttgart, HRB 243294
[PATCH v7 1/5] perf evsel: Improve falling back from cycles
Posted by Ian Rogers 2 weeks, 5 days ago
Switch to using evsel__match rather than comparing perf_event_attr
values, this is robust on hybrid architectures.
Ensure evsel->pmu matches the evsel->core.attr.
Remove exclude bits that get set in other fallback attempts when
switching the event.
Log the event name with modifiers when switching the event on fallback.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/evsel.c | 45 ++++++++++++++++++++++++++++-------------
 tools/perf/util/evsel.h |  2 ++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..bd14d9bbc91f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3785,25 +3785,42 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 {
 	int paranoid;
 
-	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
-	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
-	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+	if ((err == ENODEV || err == ENOENT || err == ENXIO) &&
+	    evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		/*
-		 * If it's cycles then fall back to hrtimer based cpu-clock sw
-		 * counter, which is always available even if no PMU support.
-		 *
-		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
-		 * b0a873e).
+		 * If it's the legacy hardware cycles event fails then fall back
+		 * to hrtimer based cpu-clock sw counter, which is always
+		 * available even if no PMU support. PPC returned ENXIO rather
+		 * than ENODEV or ENOENT until 2.6.37.
 		 */
-		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->pmu = perf_pmus__find_by_type(PERF_TYPE_SOFTWARE);
+		assert(evsel->pmu); /* software is a "well-known" and can't fail PMU type. */
+
+		/* Configure the event. */
+		evsel->core.attr.type = PERF_TYPE_SOFTWARE;
 		evsel->core.attr.config = target__has_cpu(target)
 			? PERF_COUNT_SW_CPU_CLOCK
 			: PERF_COUNT_SW_TASK_CLOCK;
-		scnprintf(msg, msgsize,
-			"The cycles event is not supported, trying to fall back to %s",
-			target__has_cpu(target) ? "cpu-clock" : "task-clock");
+		evsel->core.is_pmu_core = false;
+
+		/* Remove excludes for new event. */
+		if (evsel->fallenback_eacces) {
+			evsel->core.attr.exclude_kernel = 0;
+			evsel->core.attr.exclude_hv     = 0;
+			evsel->fallenback_eacces = false;
+		}
+		if (evsel->fallenback_eopnotsupp) {
+			evsel->core.attr.exclude_guest = 0;
+			evsel->fallenback_eopnotsupp = false;
+		}
 
+		/* Name is recomputed by evsel__name. */
 		zfree(&evsel->name);
+
+		/* Log message. */
+		scnprintf(msg, msgsize,
+			  "The cycles event is not supported, trying to fall back to %s",
+			  evsel__name(evsel));
 		return true;
 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
 		   (paranoid = perf_event_paranoid()) > 1) {
@@ -3830,7 +3847,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 			  " samples", paranoid);
 		evsel->core.attr.exclude_kernel = 1;
 		evsel->core.attr.exclude_hv     = 1;
-
+		evsel->fallenback_eacces = true;
 		return true;
 	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
 		   !evsel->exclude_GH) {
@@ -3851,7 +3868,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 		/* Apple M1 requires exclude_guest */
 		scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
 		evsel->core.attr.exclude_guest = 1;
-
+		evsel->fallenback_eopnotsupp = true;
 		return true;
 	}
 no_fallback:
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a3d754c029a0..97f57fab28ce 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -124,6 +124,8 @@ struct evsel {
 	bool			default_metricgroup; /* A member of the Default metricgroup */
 	bool			default_show_events; /* If a default group member, show the event */
 	bool			needs_uniquify;
+	bool			fallenback_eacces;
+	bool			fallenback_eopnotsupp;
 	struct hashmap		*per_pkg_mask;
 	int			err;
 	int			script_output_type;
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v7 2/5] perf target: Constify simple check functions
Posted by Ian Rogers 2 weeks, 5 days ago
Allow the target to be const in callers.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/target.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 84ebb9c940c6..bc2bff9c6842 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -49,22 +49,22 @@ uid_t parse_uid(const char *str);
 
 int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
 
-static inline bool target__has_task(struct target *target)
+static inline bool target__has_task(const struct target *target)
 {
 	return target->tid || target->pid;
 }
 
-static inline bool target__has_cpu(struct target *target)
+static inline bool target__has_cpu(const struct target *target)
 {
 	return target->system_wide || target->cpu_list;
 }
 
-static inline bool target__none(struct target *target)
+static inline bool target__none(const struct target *target)
 {
 	return !target__has_task(target) && !target__has_cpu(target);
 }
 
-static inline bool target__enable_on_exec(struct target *target)
+static inline bool target__enable_on_exec(const struct target *target)
 {
 	/*
 	 * Normally enable_on_exec should be set if:
@@ -75,12 +75,12 @@ static inline bool target__enable_on_exec(struct target *target)
 	return target__none(target) && !target->initial_delay;
 }
 
-static inline bool target__has_per_thread(struct target *target)
+static inline bool target__has_per_thread(const struct target *target)
 {
 	return target->system_wide && target->per_thread;
 }
 
-static inline bool target__uses_dummy_map(struct target *target)
+static inline bool target__uses_dummy_map(const struct target *target)
 {
 	bool use_dummy = false;
 
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v7 3/5] perf evsel: Constify option arguments to config functions
Posted by Ian Rogers 2 weeks, 5 days ago
The options are used to configure the evsel but are not themselves
configured. Make the arguments const to better capture this.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/util/evsel.c | 20 ++++++++++----------
 tools/perf/util/evsel.h |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bd14d9bbc91f..54c8922a8e47 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1015,8 +1015,8 @@ uint16_t evsel__e_machine(struct evsel *evsel, uint32_t *e_flags)
 	return perf_session__e_machine(session, e_flags);
 }
 
-static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-				      struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+				      const struct callchain_param *param)
 {
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -1080,14 +1080,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 		attr->defer_callchain = 1;
 }
 
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const struct callchain_param *param)
 {
 	if (param->enabled)
 		return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, const struct callchain_param *param)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -1106,7 +1106,7 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
 
 static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 				       struct perf_event_attr *attr,
-				       struct record_opts *opts,
+				       const struct record_opts *opts,
 				       const char *buf)
 {
 	struct perf_event_attr *prev_attr = NULL;
@@ -1170,7 +1170,7 @@ static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 }
 
 static void evsel__apply_config_terms(struct evsel *evsel,
-				      struct record_opts *opts, bool track)
+				      const struct record_opts *opts, bool track)
 {
 	struct evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
@@ -1445,7 +1445,7 @@ void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
 {
 }
 
-static void evsel__set_default_freq_period(struct record_opts *opts,
+static void evsel__set_default_freq_period(const struct record_opts *opts,
 					   struct perf_event_attr *attr)
 {
 	if (opts->freq) {
@@ -1490,8 +1490,8 @@ bool evsel__is_offcpu_event(struct evsel *evsel)
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain)
 {
 	struct evsel *leader = evsel__leader(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 97f57fab28ce..339b5c08a33d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -287,10 +287,10 @@ void evsel__set_priv_destructor(void (*destructor)(void *priv));
 
 struct callchain_param;
 
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain);
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *callchain);
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const  struct callchain_param *callchain);
 
 int __evsel__sample_size(u64 sample_type);
 void evsel__calc_id_pos(struct evsel *evsel);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v7 4/5] perf callchain: Refactor callchain option parsing
Posted by Ian Rogers 2 weeks, 5 days ago
record_opts__parse_callchain is shared by builtin-record and
builtin-trace, it is declared in callchain.h. Move the declaration to
callchain.c for consistency with the header. In other cases make the
option callback a small static stub that then calls into callchain.c.

Make the no argument '-g' callchain option just a short-cut for
'--call-graph fp' so that there is consistency in how the arguments
are handled. This requires the const char* string to be strdup-ed in
__parse_callchain_report_opt. For consistency also make
parse_callchain_record use strdup and remove some unnecessary
casts. Also, be more explicit with comments about the '-g' behavior if
there is a .perfconfig file setting.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/builtin-record.c | 60 +++++-------------------------
 tools/perf/builtin-top.c    | 20 ++++++----
 tools/perf/builtin-trace.c  |  9 ++++-
 tools/perf/util/callchain.c | 73 ++++++++++++++++++++++++++++++-------
 tools/perf/util/callchain.h | 12 ++----
 5 files changed, 94 insertions(+), 80 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 40917a0be238..26223f9505c2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2975,65 +2975,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	return status;
 }
 
-static void callchain_debug(struct callchain_param *callchain)
-{
-	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
-
-	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
-
-	if (callchain->record_mode == CALLCHAIN_DWARF)
-		pr_debug("callchain: stack dump size %d\n",
-			 callchain->dump_size);
-}
-
-int record_opts__parse_callchain(struct record_opts *record,
-				 struct callchain_param *callchain,
-				 const char *arg, bool unset)
-{
-	int ret;
-	callchain->enabled = !unset;
-
-	/* --no-call-graph */
-	if (unset) {
-		callchain->record_mode = CALLCHAIN_NONE;
-		pr_debug("callchain: disabled\n");
-		return 0;
-	}
-
-	ret = parse_callchain_record_opt(arg, callchain);
-	if (!ret) {
-		/* Enable data address sampling for DWARF unwind. */
-		if (callchain->record_mode == CALLCHAIN_DWARF &&
-		    !record->record_data_mmap_set)
-			record->record_data_mmap = true;
-		callchain_debug(callchain);
-	}
-
-	return ret;
-}
-
-int record_parse_callchain_opt(const struct option *opt,
+static int record_parse_callchain_opt(const struct option *opt,
 			       const char *arg,
 			       int unset)
 {
 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
 }
 
-int record_callchain_opt(const struct option *opt,
-			 const char *arg __maybe_unused,
-			 int unset __maybe_unused)
+static int record_callchain_opt(const struct option *opt,
+				const char *arg __maybe_unused,
+				int unset)
 {
-	struct callchain_param *callchain = opt->value;
-
-	callchain->enabled = true;
-
-	if (callchain->record_mode == CALLCHAIN_NONE)
-		callchain->record_mode = CALLCHAIN_FP;
+	/* The -g option only sets the callchain if not already configure by .perfconfig. */
+	if (callchain_param.record_mode != CALLCHAIN_NONE)
+		return 0;
 
-	callchain_debug(callchain);
-	return 0;
+	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
 }
 
+
 static int perf_record_config(const char *var, const char *value, void *cb)
 {
 	struct record *rec = cb;
@@ -3525,7 +3485,7 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
 		     record__mmap_flush_parse),
-	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
 	OPT_CALLBACK(0, "call-graph", &record.opts,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 710604c4f6f6..5d2587228975 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1386,13 +1386,6 @@ static int __cmd_top(struct perf_top *top)
 	return ret;
 }
 
-static int
-callchain_opt(const struct option *opt, const char *arg, int unset)
-{
-	symbol_conf.use_callchain = true;
-	return record_callchain_opt(opt, arg, unset);
-}
-
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
@@ -1413,6 +1406,19 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return parse_callchain_top_opt(arg);
 }
 
+static int
+callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	/* The -g option only sets the callchain if not already configure by .perfconfig. */
+	if (callchain->record_mode != CALLCHAIN_NONE)
+		return 0;
+
+	return parse_callchain_opt(opt, "fp", unset);
+}
+
+
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph")) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1c38f3d16a31..f487fbaa0ad6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5300,6 +5300,13 @@ static int trace__parse_summary_mode(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int trace_parse_callchain_opt(const struct option *opt,
+				     const char *arg,
+				     int unset)
+{
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
 static int trace__config(const char *var, const char *value, void *arg)
 {
 	struct trace *trace = arg;
@@ -5447,7 +5454,7 @@ int cmd_trace(int argc, const char **argv)
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
 	OPT_CALLBACK(0, "call-graph", &trace.opts,
 		     "record_mode[,record_size]", record_callchain_help,
-		     &record_parse_callchain_opt),
+		     &trace_parse_callchain_opt),
 	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
 		    "Use libtraceevent to print the tracepoint arguments."),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8ff0898799ee..f879b84f8ff9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -30,6 +30,7 @@
 #include "map.h"
 #include "callchain.h"
 #include "branch.h"
+#include "record.h"
 #include "symbol.h"
 #include "thread.h"
 #include "util.h"
@@ -170,7 +171,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
 static int
 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 {
-	char *tok;
+	char *tok, *arg_copy;
 	char *endptr, *saveptr = NULL;
 	bool minpcnt_set = false;
 	bool record_opt_set = false;
@@ -182,12 +183,17 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 	if (!arg)
 		return 0;
 
-	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+	arg_copy = strdup(arg);
+	if (!arg_copy)
+		return -ENOMEM;
+
+	tok = strtok_r(arg_copy, ",", &saveptr);
+	while (tok) {
 		if (!strncmp(tok, "none", strlen(tok))) {
 			callchain_param.mode = CHAIN_NONE;
 			callchain_param.enabled = false;
 			symbol_conf.use_callchain = false;
-			return 0;
+			goto out;
 		}
 
 		if (!parse_callchain_mode(tok) ||
@@ -214,30 +220,35 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 			unsigned long size = 0;
 
 			if (get_stack_size(tok, &size) < 0)
-				return -1;
+				goto err_out;
 			callchain_param.dump_size = size;
 			try_stack_size = false;
 		} else if (!minpcnt_set) {
 			/* try to get the min percent */
 			callchain_param.min_percent = strtod(tok, &endptr);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 			minpcnt_set = true;
 		} else {
 			/* try print limit at last */
 			callchain_param.print_limit = strtoul(tok, &endptr, 0);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 		}
 next:
-		arg = NULL;
+		tok = strtok_r(NULL, ",", &saveptr);
 	}
 
 	if (callchain_register_param(&callchain_param) < 0) {
 		pr_err("Can't register callchain params\n");
-		return -1;
+		goto err_out;
 	}
+out:
+	free(arg_copy);
 	return 0;
+err_out:
+	free(arg_copy);
+	return -1;
 }
 
 int parse_callchain_report_opt(const char *arg)
@@ -257,14 +268,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	int ret = -1;
 
 	/* We need buffer that we know we can write to. */
-	buf = malloc(strlen(arg) + 1);
+	buf = strdup(arg);
 	if (!buf)
 		return -ENOMEM;
 
-	strcpy(buf, arg);
-
-	tok = strtok_r((char *)buf, ",", &saveptr);
-	name = tok ? : (char *)buf;
+	tok = strtok_r(buf, ",", &saveptr);
+	name = tok ? : buf;
 
 	do {
 		/* Framepointer style */
@@ -328,6 +337,44 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	return ret;
 }
 
+static void callchain_debug(const struct callchain_param *callchain)
+{
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+	if (callchain->record_mode == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF &&
+		    !record->record_data_mmap_set)
+			record->record_data_mmap = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
+}
+
 int perf_callchain_config(const char *var, const char *value)
 {
 	char *endptr;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index df54ddb8c0cb..06d463ccc7a0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -9,11 +9,13 @@
 
 struct addr_location;
 struct evsel;
+struct hist_entry;
+struct hists;
 struct ip_callchain;
 struct map;
 struct perf_sample;
+struct record_opts;
 struct thread;
-struct hists;
 
 #define HELP_PAD "\t\t\t\t"
 
@@ -237,14 +239,6 @@ struct callchain_cursor *get_tls_callchain_cursor(void);
 int callchain_cursor__copy(struct callchain_cursor *dst,
 			   struct callchain_cursor *src);
 
-struct option;
-struct hist_entry;
-
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
-int record_callchain_opt(const struct option *opt, const char *arg, int unset);
-
-struct record_opts;
-
 int record_opts__parse_callchain(struct record_opts *record,
 				 struct callchain_param *callchain,
 				 const char *arg, bool unset);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v7 5/5] perf evlist: Improve default event for s390
Posted by Ian Rogers 2 weeks, 5 days ago
Frame pointer callchains are not supported on s390 and dwarf
callchains are only supported on software events.

Switch the default event from the hardware 'cycles' event to the
software 'cpu-clock' or 'task-clock' on s390 if callchains are
enabled. Move the target initialization earlier in builtin-top so it
is ready for use by evlist__new_default.

If frame pointer callchains are requested on s390 show a
warning. Modify the '-g' option of `perf top` and `perf record` to
default to dwarf callchains on s390.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
---
 tools/perf/builtin-record.c      |  8 ++++++--
 tools/perf/builtin-top.c         | 23 ++++++++++++-----------
 tools/perf/tests/event_update.c  |  4 +++-
 tools/perf/tests/expand-cgroup.c |  4 +++-
 tools/perf/tests/perf-record.c   |  7 +++++--
 tools/perf/tests/topology.c      |  4 +++-
 tools/perf/util/evlist.c         | 32 +++++++++++++++++++++-----------
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          |  5 +++++
 9 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 26223f9505c2..a03acfe66c27 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -55,6 +55,7 @@
 #include "asm/bug.h"
 #include "perf.h"
 #include "cputopo.h"
+#include "dwarf-regs.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -2990,7 +2991,9 @@ static int record_callchain_opt(const struct option *opt,
 	if (callchain_param.record_mode != CALLCHAIN_NONE)
 		return 0;
 
-	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
+	return record_opts__parse_callchain(opt->value, &callchain_param,
+					    EM_HOST != EM_S390 ? "fp" : "dwarf",
+					    unset);
 }
 
 
@@ -4269,7 +4272,8 @@ int cmd_record(int argc, const char **argv)
 		record.opts.tail_synthesize = true;
 
 	if (rec->evlist->core.nr_entries == 0) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
+								callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5d2587228975..3cc339ec04e3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -56,6 +56,7 @@
 #include "util/debug.h"
 #include "util/ordered-events.h"
 #include "util/pfm.h"
+#include "dwarf-regs.h"
 
 #include <assert.h>
 #include <elf.h>
@@ -1415,7 +1416,7 @@ callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unse
 	if (callchain->record_mode != CALLCHAIN_NONE)
 		return 0;
 
-	return parse_callchain_opt(opt, "fp", unset);
+	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
 }
 
 
@@ -1700,8 +1701,17 @@ int cmd_top(int argc, const char **argv)
 	if (annotate_check_args() < 0)
 		goto out_delete_evlist;
 
+	status = target__validate(target);
+	if (status) {
+		target__strerror(target, status, errbuf, BUFSIZ);
+		ui__warning("%s\n", errbuf);
+	}
+
+	if (target__none(target))
+		target->system_wide = true;
+
 	if (!top.evlist->core.nr_entries) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out_delete_evlist;
@@ -1794,12 +1804,6 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	status = target__validate(target);
-	if (status) {
-		target__strerror(target, status, errbuf, BUFSIZ);
-		ui__warning("%s\n", errbuf);
-	}
-
 	if (top.uid_str) {
 		uid_t uid = parse_uid(top.uid_str);
 
@@ -1813,9 +1817,6 @@ int cmd_top(int argc, const char **argv)
 			goto out_delete_evlist;
 	}
 
-	if (target__none(target))
-		target->system_wide = true;
-
 	if (evlist__create_maps(top.evlist, target) < 0) {
 		ui__error("Couldn't create thread/CPU maps: %s\n",
 			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index cb9e6de2e033..facc65e29f20 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -8,6 +8,7 @@
 #include "header.h"
 #include "machine.h"
 #include "util/synthetic-events.h"
+#include "target.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
 {
 	struct evsel *evsel;
 	struct event_name tmp;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index c7b32a220ca1..dd547f2f77cc 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -8,6 +8,7 @@
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
 #include "pfm.h"
+#include "target.h"
 #include <subcmd/parse-options.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
 	int ret;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index efbd9cd60c63..c6e31ab8a6b8 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	CPU_ZERO_S(cpu_mask_size, cpu_mask);
 
 	perf_sample__init(&sample, /*all=*/false);
-	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-		evlist = evlist__new_default();
+	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+		struct target target = {};
+
+		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
+	}
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ec01150d208d..a34a7ab19a80 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -9,6 +9,7 @@
 #include "evlist.h"
 #include "debug.h"
 #include "pmus.h"
+#include "target.h"
 #include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
@@ -37,11 +38,12 @@ static int session_write_header(char *path)
 		.path = path,
 		.mode = PERF_DATA_MODE_WRITE,
 	};
+	struct target target = {};
 
 	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-	session->evlist = evlist__new_default();
+	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 	TEST_ASSERT_VAL("can't get evlist", session->evlist);
 	session->evlist->session = session;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 591bdf0b3e2a..c702741a9173 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -13,6 +13,7 @@
 #include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
+#include "dwarf-regs.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "record.h"
@@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
-struct evlist *evlist__new_default(void)
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
 {
 	struct evlist *evlist = evlist__new();
 	bool can_profile_kernel;
 	struct perf_pmu *pmu = NULL;
+	struct evsel *evsel;
+	char buf[256];
+	int err;
 
 	if (!evlist)
 		return NULL;
 
 	can_profile_kernel = perf_event_paranoid_check(1);
 
-	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
-		char buf[256];
-		int err;
-
-		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+	if (EM_HOST == EM_S390 && sample_callchains) {
+		snprintf(buf, sizeof(buf), "software/%s/%s",
+			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
 			 can_profile_kernel ? "P" : "Pu");
 		err = parse_event(evlist, buf);
-		if (err) {
-			evlist__delete(evlist);
-			return NULL;
+		if (err)
+			goto out_err;
+	} else {
+		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+				can_profile_kernel ? "P" : "Pu");
+			err = parse_event(evlist, buf);
+			if (err)
+				goto out_err;
 		}
 	}
 
+	/* If there is only 1 event a sample identifier isn't necessary. */
 	if (evlist->core.nr_entries > 1) {
-		struct evsel *evsel;
-
 		evlist__for_each_entry(evlist, evsel)
 			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
 	}
 
 	return evlist;
+out_err:
+	evlist__delete(evlist);
+	return NULL;
 }
 
 struct evlist *evlist__new_dummy(void)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d17c3b57a409..e507f5f20ef6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -104,7 +104,7 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
 struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 54c8922a8e47..5a294595a677 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
 
+	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
+		pr_warning_once(
+			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
+	}
+
 	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	attr->sample_max_stack = param->max_stack;
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v6 1/5] perf evsel: Improve falling back from cycles
Posted by Ian Rogers 2 weeks, 6 days ago
Switch to using evsel__match rather than comparing perf_event_attr
values, this is robust on hybrid architectures.
Ensure evsel->pmu matches the evsel->core.attr.
Remove exclude bits that get set in other fallback attempts when
switching the event.
Log the event name with modifiers when switching the event on fallback.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c | 45 ++++++++++++++++++++++++++++-------------
 tools/perf/util/evsel.h |  2 ++
 2 files changed, 33 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f59228c1a39e..bd14d9bbc91f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3785,25 +3785,42 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 {
 	int paranoid;
 
-	if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
-	    evsel->core.attr.type   == PERF_TYPE_HARDWARE &&
-	    evsel->core.attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+	if ((err == ENODEV || err == ENOENT || err == ENXIO) &&
+	    evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		/*
-		 * If it's cycles then fall back to hrtimer based cpu-clock sw
-		 * counter, which is always available even if no PMU support.
-		 *
-		 * PPC returns ENXIO until 2.6.37 (behavior changed with commit
-		 * b0a873e).
+		 * If it's the legacy hardware cycles event fails then fall back
+		 * to hrtimer based cpu-clock sw counter, which is always
+		 * available even if no PMU support. PPC returned ENXIO rather
+		 * than ENODEV or ENOENT until 2.6.37.
 		 */
-		evsel->core.attr.type   = PERF_TYPE_SOFTWARE;
+		evsel->pmu = perf_pmus__find_by_type(PERF_TYPE_SOFTWARE);
+		assert(evsel->pmu); /* software is a "well-known" and can't fail PMU type. */
+
+		/* Configure the event. */
+		evsel->core.attr.type = PERF_TYPE_SOFTWARE;
 		evsel->core.attr.config = target__has_cpu(target)
 			? PERF_COUNT_SW_CPU_CLOCK
 			: PERF_COUNT_SW_TASK_CLOCK;
-		scnprintf(msg, msgsize,
-			"The cycles event is not supported, trying to fall back to %s",
-			target__has_cpu(target) ? "cpu-clock" : "task-clock");
+		evsel->core.is_pmu_core = false;
+
+		/* Remove excludes for new event. */
+		if (evsel->fallenback_eacces) {
+			evsel->core.attr.exclude_kernel = 0;
+			evsel->core.attr.exclude_hv     = 0;
+			evsel->fallenback_eacces = false;
+		}
+		if (evsel->fallenback_eopnotsupp) {
+			evsel->core.attr.exclude_guest = 0;
+			evsel->fallenback_eopnotsupp = false;
+		}
 
+		/* Name is recomputed by evsel__name. */
 		zfree(&evsel->name);
+
+		/* Log message. */
+		scnprintf(msg, msgsize,
+			  "The cycles event is not supported, trying to fall back to %s",
+			  evsel__name(evsel));
 		return true;
 	} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
 		   (paranoid = perf_event_paranoid()) > 1) {
@@ -3830,7 +3847,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 			  " samples", paranoid);
 		evsel->core.attr.exclude_kernel = 1;
 		evsel->core.attr.exclude_hv     = 1;
-
+		evsel->fallenback_eacces = true;
 		return true;
 	} else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest &&
 		   !evsel->exclude_GH) {
@@ -3851,7 +3868,7 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err,
 		/* Apple M1 requires exclude_guest */
 		scnprintf(msg, msgsize, "Trying to fall back to excluding guest samples");
 		evsel->core.attr.exclude_guest = 1;
-
+		evsel->fallenback_eopnotsupp = true;
 		return true;
 	}
 no_fallback:
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a3d754c029a0..97f57fab28ce 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -124,6 +124,8 @@ struct evsel {
 	bool			default_metricgroup; /* A member of the Default metricgroup */
 	bool			default_show_events; /* If a default group member, show the event */
 	bool			needs_uniquify;
+	bool			fallenback_eacces;
+	bool			fallenback_eopnotsupp;
 	struct hashmap		*per_pkg_mask;
 	int			err;
 	int			script_output_type;
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v6 2/5] perf target: Constify simple check functions
Posted by Ian Rogers 2 weeks, 6 days ago
Allow the target to be const in callers.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/target.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
index 84ebb9c940c6..bc2bff9c6842 100644
--- a/tools/perf/util/target.h
+++ b/tools/perf/util/target.h
@@ -49,22 +49,22 @@ uid_t parse_uid(const char *str);
 
 int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
 
-static inline bool target__has_task(struct target *target)
+static inline bool target__has_task(const struct target *target)
 {
 	return target->tid || target->pid;
 }
 
-static inline bool target__has_cpu(struct target *target)
+static inline bool target__has_cpu(const struct target *target)
 {
 	return target->system_wide || target->cpu_list;
 }
 
-static inline bool target__none(struct target *target)
+static inline bool target__none(const struct target *target)
 {
 	return !target__has_task(target) && !target__has_cpu(target);
 }
 
-static inline bool target__enable_on_exec(struct target *target)
+static inline bool target__enable_on_exec(const struct target *target)
 {
 	/*
 	 * Normally enable_on_exec should be set if:
@@ -75,12 +75,12 @@ static inline bool target__enable_on_exec(struct target *target)
 	return target__none(target) && !target->initial_delay;
 }
 
-static inline bool target__has_per_thread(struct target *target)
+static inline bool target__has_per_thread(const struct target *target)
 {
 	return target->system_wide && target->per_thread;
 }
 
-static inline bool target__uses_dummy_map(struct target *target)
+static inline bool target__uses_dummy_map(const struct target *target)
 {
 	bool use_dummy = false;
 
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v6 3/5] perf evsel: Constify option arguments to config functions
Posted by Ian Rogers 2 weeks, 6 days ago
The options are used to configure the evsel but are not themselves
configured. Make the arguments const to better capture this.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/util/evsel.c | 20 ++++++++++----------
 tools/perf/util/evsel.h |  8 ++++----
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index bd14d9bbc91f..54c8922a8e47 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1015,8 +1015,8 @@ uint16_t evsel__e_machine(struct evsel *evsel, uint32_t *e_flags)
 	return perf_session__e_machine(session, e_flags);
 }
 
-static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-				      struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+				      const struct callchain_param *param)
 {
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
@@ -1080,14 +1080,14 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 		attr->defer_callchain = 1;
 }
 
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const struct callchain_param *param)
 {
 	if (param->enabled)
 		return __evsel__config_callchain(evsel, opts, param);
 }
 
-static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *param)
+static void evsel__reset_callgraph(struct evsel *evsel, const struct callchain_param *param)
 {
 	struct perf_event_attr *attr = &evsel->core.attr;
 
@@ -1106,7 +1106,7 @@ static void evsel__reset_callgraph(struct evsel *evsel, struct callchain_param *
 
 static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 				       struct perf_event_attr *attr,
-				       struct record_opts *opts,
+				       const struct record_opts *opts,
 				       const char *buf)
 {
 	struct perf_event_attr *prev_attr = NULL;
@@ -1170,7 +1170,7 @@ static void evsel__apply_ratio_to_prev(struct evsel *evsel,
 }
 
 static void evsel__apply_config_terms(struct evsel *evsel,
-				      struct record_opts *opts, bool track)
+				      const struct record_opts *opts, bool track)
 {
 	struct evsel_config_term *term;
 	struct list_head *config_terms = &evsel->config_terms;
@@ -1445,7 +1445,7 @@ void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused,
 {
 }
 
-static void evsel__set_default_freq_period(struct record_opts *opts,
+static void evsel__set_default_freq_period(const struct record_opts *opts,
 					   struct perf_event_attr *attr)
 {
 	if (opts->freq) {
@@ -1490,8 +1490,8 @@ bool evsel__is_offcpu_event(struct evsel *evsel)
  *     enable/disable events specifically, as there's no
  *     initial traced exec call.
  */
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain)
 {
 	struct evsel *leader = evsel__leader(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 97f57fab28ce..339b5c08a33d 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -287,10 +287,10 @@ void evsel__set_priv_destructor(void (*destructor)(void *priv));
 
 struct callchain_param;
 
-void evsel__config(struct evsel *evsel, struct record_opts *opts,
-		   struct callchain_param *callchain);
-void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
-			     struct callchain_param *callchain);
+void evsel__config(struct evsel *evsel, const struct record_opts *opts,
+		   const struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, const struct record_opts *opts,
+			     const  struct callchain_param *callchain);
 
 int __evsel__sample_size(u64 sample_type);
 void evsel__calc_id_pos(struct evsel *evsel);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v6 4/5] perf callchain: Refactor callchain option parsing
Posted by Ian Rogers 2 weeks, 6 days ago
record_opts__parse_callchain is shared by builtin-record and
builtin-trace, it is declared in callchain.h. Move the declaration to
callchain.c for consistency with the header. In other cases make the
option callback a small static stub that then calls into callchain.c.

Make the no argument '-g' callchain option just a short-cut for
'--call-graph fp' so that there is consistency in how the arguments
are handled. This requires the const char* string to be strdup-ed in
__parse_callchain_report_opt. For consistency also make
parse_callchain_record use strdup and remove some unnecessary
casts. Also, be more explicit with comments about the '-g' behavior if
there is a .perfconfig file setting.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c | 60 +++++-------------------------
 tools/perf/builtin-top.c    | 20 ++++++----
 tools/perf/builtin-trace.c  |  9 ++++-
 tools/perf/util/callchain.c | 73 ++++++++++++++++++++++++++++++-------
 tools/perf/util/callchain.h | 12 ++----
 5 files changed, 94 insertions(+), 80 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 40917a0be238..26223f9505c2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2975,65 +2975,25 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	return status;
 }
 
-static void callchain_debug(struct callchain_param *callchain)
-{
-	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
-
-	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
-
-	if (callchain->record_mode == CALLCHAIN_DWARF)
-		pr_debug("callchain: stack dump size %d\n",
-			 callchain->dump_size);
-}
-
-int record_opts__parse_callchain(struct record_opts *record,
-				 struct callchain_param *callchain,
-				 const char *arg, bool unset)
-{
-	int ret;
-	callchain->enabled = !unset;
-
-	/* --no-call-graph */
-	if (unset) {
-		callchain->record_mode = CALLCHAIN_NONE;
-		pr_debug("callchain: disabled\n");
-		return 0;
-	}
-
-	ret = parse_callchain_record_opt(arg, callchain);
-	if (!ret) {
-		/* Enable data address sampling for DWARF unwind. */
-		if (callchain->record_mode == CALLCHAIN_DWARF &&
-		    !record->record_data_mmap_set)
-			record->record_data_mmap = true;
-		callchain_debug(callchain);
-	}
-
-	return ret;
-}
-
-int record_parse_callchain_opt(const struct option *opt,
+static int record_parse_callchain_opt(const struct option *opt,
 			       const char *arg,
 			       int unset)
 {
 	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
 }
 
-int record_callchain_opt(const struct option *opt,
-			 const char *arg __maybe_unused,
-			 int unset __maybe_unused)
+static int record_callchain_opt(const struct option *opt,
+				const char *arg __maybe_unused,
+				int unset)
 {
-	struct callchain_param *callchain = opt->value;
-
-	callchain->enabled = true;
-
-	if (callchain->record_mode == CALLCHAIN_NONE)
-		callchain->record_mode = CALLCHAIN_FP;
+	/* The -g option only sets the callchain if not already configure by .perfconfig. */
+	if (callchain_param.record_mode != CALLCHAIN_NONE)
+		return 0;
 
-	callchain_debug(callchain);
-	return 0;
+	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
 }
 
+
 static int perf_record_config(const char *var, const char *value, void *cb)
 {
 	struct record *rec = cb;
@@ -3525,7 +3485,7 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
 		     record__mmap_flush_parse),
-	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
 	OPT_CALLBACK(0, "call-graph", &record.opts,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 710604c4f6f6..5d2587228975 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1386,13 +1386,6 @@ static int __cmd_top(struct perf_top *top)
 	return ret;
 }
 
-static int
-callchain_opt(const struct option *opt, const char *arg, int unset)
-{
-	symbol_conf.use_callchain = true;
-	return record_callchain_opt(opt, arg, unset);
-}
-
 static int
 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 {
@@ -1413,6 +1406,19 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
 	return parse_callchain_top_opt(arg);
 }
 
+static int
+callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unset)
+{
+	struct callchain_param *callchain = opt->value;
+
+	/* The -g option only sets the callchain if not already configure by .perfconfig. */
+	if (callchain->record_mode != CALLCHAIN_NONE)
+		return 0;
+
+	return parse_callchain_opt(opt, "fp", unset);
+}
+
+
 static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
 {
 	if (!strcmp(var, "top.call-graph")) {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 1c38f3d16a31..f487fbaa0ad6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -5300,6 +5300,13 @@ static int trace__parse_summary_mode(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int trace_parse_callchain_opt(const struct option *opt,
+				     const char *arg,
+				     int unset)
+{
+	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
 static int trace__config(const char *var, const char *value, void *arg)
 {
 	struct trace *trace = arg;
@@ -5447,7 +5454,7 @@ int cmd_trace(int argc, const char **argv)
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
 	OPT_CALLBACK(0, "call-graph", &trace.opts,
 		     "record_mode[,record_size]", record_callchain_help,
-		     &record_parse_callchain_opt),
+		     &trace_parse_callchain_opt),
 	OPT_BOOLEAN(0, "libtraceevent_print", &trace.libtraceevent_print,
 		    "Use libtraceevent to print the tracepoint arguments."),
 	OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 8ff0898799ee..f879b84f8ff9 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -30,6 +30,7 @@
 #include "map.h"
 #include "callchain.h"
 #include "branch.h"
+#include "record.h"
 #include "symbol.h"
 #include "thread.h"
 #include "util.h"
@@ -170,7 +171,7 @@ static int get_stack_size(const char *str, unsigned long *_size)
 static int
 __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 {
-	char *tok;
+	char *tok, *arg_copy;
 	char *endptr, *saveptr = NULL;
 	bool minpcnt_set = false;
 	bool record_opt_set = false;
@@ -182,12 +183,17 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 	if (!arg)
 		return 0;
 
-	while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+	arg_copy = strdup(arg);
+	if (!arg_copy)
+		return -ENOMEM;
+
+	tok = strtok_r(arg_copy, ",", &saveptr);
+	while (tok) {
 		if (!strncmp(tok, "none", strlen(tok))) {
 			callchain_param.mode = CHAIN_NONE;
 			callchain_param.enabled = false;
 			symbol_conf.use_callchain = false;
-			return 0;
+			goto out;
 		}
 
 		if (!parse_callchain_mode(tok) ||
@@ -214,30 +220,35 @@ __parse_callchain_report_opt(const char *arg, bool allow_record_opt)
 			unsigned long size = 0;
 
 			if (get_stack_size(tok, &size) < 0)
-				return -1;
+				goto err_out;
 			callchain_param.dump_size = size;
 			try_stack_size = false;
 		} else if (!minpcnt_set) {
 			/* try to get the min percent */
 			callchain_param.min_percent = strtod(tok, &endptr);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 			minpcnt_set = true;
 		} else {
 			/* try print limit at last */
 			callchain_param.print_limit = strtoul(tok, &endptr, 0);
 			if (tok == endptr)
-				return -1;
+				goto err_out;
 		}
 next:
-		arg = NULL;
+		tok = strtok_r(NULL, ",", &saveptr);
 	}
 
 	if (callchain_register_param(&callchain_param) < 0) {
 		pr_err("Can't register callchain params\n");
-		return -1;
+		goto err_out;
 	}
+out:
+	free(arg_copy);
 	return 0;
+err_out:
+	free(arg_copy);
+	return -1;
 }
 
 int parse_callchain_report_opt(const char *arg)
@@ -257,14 +268,12 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	int ret = -1;
 
 	/* We need buffer that we know we can write to. */
-	buf = malloc(strlen(arg) + 1);
+	buf = strdup(arg);
 	if (!buf)
 		return -ENOMEM;
 
-	strcpy(buf, arg);
-
-	tok = strtok_r((char *)buf, ",", &saveptr);
-	name = tok ? : (char *)buf;
+	tok = strtok_r(buf, ",", &saveptr);
+	name = tok ? : buf;
 
 	do {
 		/* Framepointer style */
@@ -328,6 +337,44 @@ int parse_callchain_record(const char *arg, struct callchain_param *param)
 	return ret;
 }
 
+static void callchain_debug(const struct callchain_param *callchain)
+{
+	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+	if (callchain->record_mode == CALLCHAIN_DWARF)
+		pr_debug("callchain: stack dump size %d\n",
+			 callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+				 struct callchain_param *callchain,
+				 const char *arg, bool unset)
+{
+	int ret;
+
+	callchain->enabled = !unset;
+
+	/* --no-call-graph */
+	if (unset) {
+		callchain->record_mode = CALLCHAIN_NONE;
+		pr_debug("callchain: disabled\n");
+		return 0;
+	}
+
+	ret = parse_callchain_record_opt(arg, callchain);
+	if (!ret) {
+		/* Enable data address sampling for DWARF unwind. */
+		if (callchain->record_mode == CALLCHAIN_DWARF &&
+		    !record->record_data_mmap_set)
+			record->record_data_mmap = true;
+		callchain_debug(callchain);
+	}
+
+	return ret;
+}
+
 int perf_callchain_config(const char *var, const char *value)
 {
 	char *endptr;
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index df54ddb8c0cb..06d463ccc7a0 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -9,11 +9,13 @@
 
 struct addr_location;
 struct evsel;
+struct hist_entry;
+struct hists;
 struct ip_callchain;
 struct map;
 struct perf_sample;
+struct record_opts;
 struct thread;
-struct hists;
 
 #define HELP_PAD "\t\t\t\t"
 
@@ -237,14 +239,6 @@ struct callchain_cursor *get_tls_callchain_cursor(void);
 int callchain_cursor__copy(struct callchain_cursor *dst,
 			   struct callchain_cursor *src);
 
-struct option;
-struct hist_entry;
-
-int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
-int record_callchain_opt(const struct option *opt, const char *arg, int unset);
-
-struct record_opts;
-
 int record_opts__parse_callchain(struct record_opts *record,
 				 struct callchain_param *callchain,
 				 const char *arg, bool unset);
-- 
2.53.0.851.ga537e3e6e9-goog
[PATCH v6 5/5] perf evlist: Improve default event for s390
Posted by Ian Rogers 2 weeks, 6 days ago
Frame pointer callchains are not supported on s390 and dwarf
callchains are only supported on software events.

Switch the default event from the hardware 'cycles' event to the
software 'cpu-clock' or 'task-clock' on s390 if callchains are
enabled. Move the target initialization earlier in builtin-top so it
is ready for use by evlist__new_default.

If frame pointer callchains are requested on s390 show a
warning. Modify the '-g' option of `perf top` and `perf record` to
default to dwarf callchains on s390.

Signed-off-by: Ian Rogers <irogers@google.com>
---
 tools/perf/builtin-record.c      |  8 ++++--
 tools/perf/builtin-top.c         | 49 ++++++++++++++++----------------
 tools/perf/tests/event_update.c  |  4 ++-
 tools/perf/tests/expand-cgroup.c |  4 ++-
 tools/perf/tests/perf-record.c   |  7 +++--
 tools/perf/tests/topology.c      |  4 ++-
 tools/perf/util/evlist.c         | 32 ++++++++++++++-------
 tools/perf/util/evlist.h         |  2 +-
 tools/perf/util/evsel.c          |  5 ++++
 9 files changed, 72 insertions(+), 43 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 26223f9505c2..a03acfe66c27 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -55,6 +55,7 @@
 #include "asm/bug.h"
 #include "perf.h"
 #include "cputopo.h"
+#include "dwarf-regs.h"
 
 #include <errno.h>
 #include <inttypes.h>
@@ -2990,7 +2991,9 @@ static int record_callchain_opt(const struct option *opt,
 	if (callchain_param.record_mode != CALLCHAIN_NONE)
 		return 0;
 
-	return record_opts__parse_callchain(opt->value, &callchain_param, "fp", unset);
+	return record_opts__parse_callchain(opt->value, &callchain_param,
+					    EM_HOST != EM_S390 ? "fp" : "dwarf",
+					    unset);
 }
 
 
@@ -4269,7 +4272,8 @@ int cmd_record(int argc, const char **argv)
 		record.opts.tail_synthesize = true;
 
 	if (rec->evlist->core.nr_entries == 0) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(&rec->opts.target,
+								callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 5d2587228975..f13560d5b1a3 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -56,6 +56,7 @@
 #include "util/debug.h"
 #include "util/ordered-events.h"
 #include "util/pfm.h"
+#include "dwarf-regs.h"
 
 #include <assert.h>
 #include <elf.h>
@@ -1415,7 +1416,7 @@ callchain_opt(const struct option *opt, const char *arg __maybe_unused, int unse
 	if (callchain->record_mode != CALLCHAIN_NONE)
 		return 0;
 
-	return parse_callchain_opt(opt, "fp", unset);
+	return parse_callchain_opt(opt, EM_HOST != EM_S390 ? "fp" : "dwarf", unset);
 }
 
 
@@ -1700,8 +1701,30 @@ int cmd_top(int argc, const char **argv)
 	if (annotate_check_args() < 0)
 		goto out_delete_evlist;
 
+	status = target__validate(target);
+	if (status) {
+		target__strerror(target, status, errbuf, BUFSIZ);
+		ui__warning("%s\n", errbuf);
+	}
+
+	if (top.uid_str) {
+		uid_t uid = parse_uid(top.uid_str);
+
+		if (uid == UINT_MAX) {
+			ui__error("Invalid User: %s", top.uid_str);
+			status = -EINVAL;
+			goto out_delete_evlist;
+		}
+		status = parse_uid_filter(top.evlist, uid);
+		if (status)
+			goto out_delete_evlist;
+	}
+
+	if (target__none(target))
+		target->system_wide = true;
+
 	if (!top.evlist->core.nr_entries) {
-		struct evlist *def_evlist = evlist__new_default();
+		struct evlist *def_evlist = evlist__new_default(target, callchain_param.enabled);
 
 		if (!def_evlist)
 			goto out_delete_evlist;
@@ -1794,28 +1817,6 @@ int cmd_top(int argc, const char **argv)
 		goto out_delete_evlist;
 	}
 
-	status = target__validate(target);
-	if (status) {
-		target__strerror(target, status, errbuf, BUFSIZ);
-		ui__warning("%s\n", errbuf);
-	}
-
-	if (top.uid_str) {
-		uid_t uid = parse_uid(top.uid_str);
-
-		if (uid == UINT_MAX) {
-			ui__error("Invalid User: %s", top.uid_str);
-			status = -EINVAL;
-			goto out_delete_evlist;
-		}
-		status = parse_uid_filter(top.evlist, uid);
-		if (status)
-			goto out_delete_evlist;
-	}
-
-	if (target__none(target))
-		target->system_wide = true;
-
 	if (evlist__create_maps(top.evlist, target) < 0) {
 		ui__error("Couldn't create thread/CPU maps: %s\n",
 			  errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index cb9e6de2e033..facc65e29f20 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -8,6 +8,7 @@
 #include "header.h"
 #include "machine.h"
 #include "util/synthetic-events.h"
+#include "target.h"
 #include "tool.h"
 #include "tests.h"
 #include "debug.h"
@@ -81,7 +82,8 @@ static int test__event_update(struct test_suite *test __maybe_unused, int subtes
 {
 	struct evsel *evsel;
 	struct event_name tmp;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/expand-cgroup.c b/tools/perf/tests/expand-cgroup.c
index c7b32a220ca1..dd547f2f77cc 100644
--- a/tools/perf/tests/expand-cgroup.c
+++ b/tools/perf/tests/expand-cgroup.c
@@ -8,6 +8,7 @@
 #include "parse-events.h"
 #include "pmu-events/pmu-events.h"
 #include "pfm.h"
+#include "target.h"
 #include <subcmd/parse-options.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -99,7 +100,8 @@ out:	for (i = 0; i < nr_events; i++)
 static int expand_default_events(void)
 {
 	int ret;
-	struct evlist *evlist = evlist__new_default();
+	struct target target = {};
+	struct evlist *evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 
 	TEST_ASSERT_VAL("failed to get evlist", evlist);
 
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index efbd9cd60c63..c6e31ab8a6b8 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -84,8 +84,11 @@ static int test__PERF_RECORD(struct test_suite *test __maybe_unused, int subtest
 	CPU_ZERO_S(cpu_mask_size, cpu_mask);
 
 	perf_sample__init(&sample, /*all=*/false);
-	if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
-		evlist = evlist__new_default();
+	if (evlist == NULL) { /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+		struct target target = {};
+
+		evlist = evlist__new_default(&target, /*sample_callchains=*/false);
+	}
 
 	if (evlist == NULL) {
 		pr_debug("Not enough memory to create evlist\n");
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index ec01150d208d..a34a7ab19a80 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -9,6 +9,7 @@
 #include "evlist.h"
 #include "debug.h"
 #include "pmus.h"
+#include "target.h"
 #include <linux/err.h>
 
 #define TEMPL "/tmp/perf-test-XXXXXX"
@@ -37,11 +38,12 @@ static int session_write_header(char *path)
 		.path = path,
 		.mode = PERF_DATA_MODE_WRITE,
 	};
+	struct target target = {};
 
 	session = perf_session__new(&data, NULL);
 	TEST_ASSERT_VAL("can't get session", !IS_ERR(session));
 
-	session->evlist = evlist__new_default();
+	session->evlist = evlist__new_default(&target, /*sample_callchains=*/false);
 	TEST_ASSERT_VAL("can't get evlist", session->evlist);
 	session->evlist->session = session;
 
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 591bdf0b3e2a..c702741a9173 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -13,6 +13,7 @@
 #include "util/mmap.h"
 #include "thread_map.h"
 #include "target.h"
+#include "dwarf-regs.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "record.h"
@@ -98,38 +99,47 @@ struct evlist *evlist__new(void)
 	return evlist;
 }
 
-struct evlist *evlist__new_default(void)
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains)
 {
 	struct evlist *evlist = evlist__new();
 	bool can_profile_kernel;
 	struct perf_pmu *pmu = NULL;
+	struct evsel *evsel;
+	char buf[256];
+	int err;
 
 	if (!evlist)
 		return NULL;
 
 	can_profile_kernel = perf_event_paranoid_check(1);
 
-	while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
-		char buf[256];
-		int err;
-
-		snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+	if (EM_HOST == EM_S390 && sample_callchains) {
+		snprintf(buf, sizeof(buf), "software/%s/%s",
+			 target__has_cpu(target) ? "cpu-clock" : "task-clock",
 			 can_profile_kernel ? "P" : "Pu");
 		err = parse_event(evlist, buf);
-		if (err) {
-			evlist__delete(evlist);
-			return NULL;
+		if (err)
+			goto out_err;
+	} else {
+		while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+			snprintf(buf, sizeof(buf), "%s/cycles/%s", pmu->name,
+				can_profile_kernel ? "P" : "Pu");
+			err = parse_event(evlist, buf);
+			if (err)
+				goto out_err;
 		}
 	}
 
+	/* If there is only 1 event a sample identifier isn't necessary. */
 	if (evlist->core.nr_entries > 1) {
-		struct evsel *evsel;
-
 		evlist__for_each_entry(evlist, evsel)
 			evsel__set_sample_id(evsel, /*can_sample_identifier=*/false);
 	}
 
 	return evlist;
+out_err:
+	evlist__delete(evlist);
+	return NULL;
 }
 
 struct evlist *evlist__new_dummy(void)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d17c3b57a409..e507f5f20ef6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -104,7 +104,7 @@ struct evsel_str_handler {
 };
 
 struct evlist *evlist__new(void);
-struct evlist *evlist__new_default(void);
+struct evlist *evlist__new_default(const struct target *target, bool sample_callchains);
 struct evlist *evlist__new_dummy(void);
 void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
 		  struct perf_thread_map *threads);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 54c8922a8e47..5a294595a677 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1021,6 +1021,11 @@ static void __evsel__config_callchain(struct evsel *evsel, const struct record_o
 	bool function = evsel__is_function_event(evsel);
 	struct perf_event_attr *attr = &evsel->core.attr;
 
+	if (EM_HOST == EM_S390 && param->record_mode == CALLCHAIN_FP) {
+		pr_warning_once(
+			"Framepointer unwinding lacks kernel support. Use '--call-graph dwarf'\n");
+	}
+
 	evsel__set_sample_bit(evsel, CALLCHAIN);
 
 	attr->sample_max_stack = param->max_stack;
-- 
2.53.0.851.ga537e3e6e9-goog