[PATCH v16 0/4] perf tool: Support the deferred unwinding infrastructure

Steven Rostedt posted 4 patches 5 months ago
tools/lib/perf/include/perf/event.h       |  8 +++
tools/perf/Documentation/perf-script.txt  |  5 ++
tools/perf/builtin-script.c               | 92 +++++++++++++++++++++++++++++++
tools/perf/util/callchain.c               | 24 ++++++++
tools/perf/util/callchain.h               |  3 +
tools/perf/util/event.c                   |  1 +
tools/perf/util/evlist.c                  |  1 +
tools/perf/util/evlist.h                  |  1 +
tools/perf/util/evsel.c                   | 42 ++++++++++++++
tools/perf/util/evsel.h                   |  1 +
tools/perf/util/machine.c                 |  1 +
tools/perf/util/perf_event_attr_fprintf.c |  1 +
tools/perf/util/sample.h                  |  4 +-
tools/perf/util/session.c                 | 81 +++++++++++++++++++++++++++
tools/perf/util/tool.c                    |  2 +
tools/perf/util/tool.h                    |  4 +-
16 files changed, 269 insertions(+), 2 deletions(-)
[PATCH v16 0/4] perf tool: Support the deferred unwinding infrastructure
Posted by Steven Rostedt 5 months ago
This is the user space side of perf changes to handle deferred unwinding.
It is based on the kernel side of perf patch series here:

  https://lore.kernel.org/linux-trace-kernel/20250908171412.268168931@kernel.org/

Changes since v15: https://lore.kernel.org/linux-trace-kernel/20250825180638.877627656@kernel.org/

- Separate out the kernel changes from the user space changes of perf.

- Have the matching deferred code only skip when TID does not match.
  Otherwise, process the sample but do not merge if the cookies do not match.
  (Namhyung Kim)

Namhyung Kim (4):
      perf tools: Minimal CALLCHAIN_DEFERRED support
      perf record: Enable defer_callchain for user callchains
      perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED
      perf tools: Merge deferred user callchains

----
 tools/lib/perf/include/perf/event.h       |  8 +++
 tools/perf/Documentation/perf-script.txt  |  5 ++
 tools/perf/builtin-script.c               | 92 +++++++++++++++++++++++++++++++
 tools/perf/util/callchain.c               | 24 ++++++++
 tools/perf/util/callchain.h               |  3 +
 tools/perf/util/event.c                   |  1 +
 tools/perf/util/evlist.c                  |  1 +
 tools/perf/util/evlist.h                  |  1 +
 tools/perf/util/evsel.c                   | 42 ++++++++++++++
 tools/perf/util/evsel.h                   |  1 +
 tools/perf/util/machine.c                 |  1 +
 tools/perf/util/perf_event_attr_fprintf.c |  1 +
 tools/perf/util/sample.h                  |  4 +-
 tools/perf/util/session.c                 | 81 +++++++++++++++++++++++++++
 tools/perf/util/tool.c                    |  2 +
 tools/perf/util/tool.h                    |  4 +-
 16 files changed, 269 insertions(+), 2 deletions(-)
Re: [PATCH v16 0/4] perf tool: Support the deferred unwinding infrastructure
Posted by Peter Zijlstra 3 months, 2 weeks ago

Per the hackery I did:

  https://lkml.kernel.org/r/20251023150002.GR4067720@noisy.programming.kicks-ass.net

The userspace bits need something like so on top..

---
 tools/perf/util/callchain.c               |    2 +-
 tools/perf/util/evsel.c                   |   10 +++++++---
 tools/perf/util/perf_event_attr_fprintf.c |    1 +
 3 files changed, 9 insertions(+), 4 deletions(-)

--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1832,7 +1832,7 @@ int sample__for_each_callchain_node(stru
 int sample__merge_deferred_callchain(struct perf_sample *sample_orig,
 				     struct perf_sample *sample_callchain)
 {
-	u64 nr_orig = sample_orig->callchain->nr - PERF_DEFERRED_ITEMS;
+	u64 nr_orig = sample_orig->callchain->nr - 1;
 	u64 nr_deferred = sample_callchain->callchain->nr;
 	struct ip_callchain *callchain;
 
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1520,6 +1520,7 @@ void evsel__config(struct evsel *evsel,
 	attr->mmap2    = track && !perf_missing_features.mmap2;
 	attr->comm     = track;
 	attr->build_id = track && opts->build_id;
+	attr->defer_output = track && !perf_missing_features.defer_callchain;
 
 	/*
 	 * ksymbol is tracked separately with text poke because it needs to be
@@ -2206,8 +2207,10 @@ static int __evsel__prepare_open(struct
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
-	if (perf_missing_features.defer_callchain)
+	if (perf_missing_features.defer_callchain) {
 		evsel->core.attr.defer_callchain = 0;
+		evsel->core.attr.defer_output = 0;
+	}
 	if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit &&
 	    (evsel->core.attr.sample_type & PERF_SAMPLE_READ))
 		evsel->core.attr.inherit = 0;
@@ -2489,6 +2492,7 @@ static bool evsel__detect_missing_featur
 	perf_missing_features.defer_callchain = true;
 	pr_debug2("switching off deferred callchain support\n");
 	attr.defer_callchain = false;
+	attr.defer_output = false;
 	attr.sample_type = 0;
 
 	attr.inherit = true;
@@ -3255,8 +3259,8 @@ int evsel__parse_sample(struct evsel *ev
 			return -EFAULT;
 		sz = data->callchain->nr * sizeof(u64);
 		if (evsel->core.attr.defer_callchain &&
-		    data->callchain->nr >= PERF_DEFERRED_ITEMS &&
-		    data->callchain->ips[data->callchain->nr - PERF_DEFERRED_ITEMS] == PERF_CONTEXT_USER_DEFERRED) {
+		    data->callchain->nr >= 2 &&
+		    data->callchain->ips[data->callchain->nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
 			data->deferred_callchain = true;
 			data->deferred_cookie = data->callchain->ips[data->callchain->nr - 1];
 		}
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -344,6 +344,7 @@ int perf_event_attr__fprintf(FILE *fp, s
 	PRINT_ATTRf(remove_on_exec, p_unsigned);
 	PRINT_ATTRf(sigtrap, p_unsigned);
 	PRINT_ATTRf(defer_callchain, p_unsigned);
+	PRINT_ATTRf(defer_output, p_unsigned);
 
 	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
 	PRINT_ATTRf(bp_type, p_unsigned);