[PATCH v3 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support

Namhyung Kim posted 5 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH v3 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
Posted by Namhyung Kim 2 months, 3 weeks ago
Add a new event type for deferred callchains and a new callback for the
struct perf_tool.  For now it doesn't actually handle the deferred
callchains but it just marks the sample if it has the PERF_CONTEXT_
USER_DEFFERED in the callchain array.

At least, perf report can dump the raw data with this change.  Actually
this requires the next commit to enable attr.defer_callchain, but if you
already have a data file, it'll show the following result.

  $ perf report -D
  ...
  0x2158@perf.data [0x40]: event: 22
  .
  . ... raw event: size 64 bytes
  .  0000:  16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00  ......@.........
  .  0010:  03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00  ..........3.....
  .  0020:  0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00  ..3.....H.3.....
  .  0030:  08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00  .........z.5....

  121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
  ... FP chain: nr:3
  .....  0: 00007f18fe337fa7
  .....  1: 00007f18fe330e0f
  .....  2: 00007f18fe331448
  : unhandled!

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/lib/perf/include/perf/event.h       |  8 ++++++++
 tools/perf/util/event.c                   |  1 +
 tools/perf/util/evsel.c                   | 19 +++++++++++++++++++
 tools/perf/util/machine.c                 |  1 +
 tools/perf/util/perf_event_attr_fprintf.c |  2 ++
 tools/perf/util/sample.h                  |  2 ++
 tools/perf/util/session.c                 | 20 ++++++++++++++++++++
 tools/perf/util/tool.c                    |  1 +
 tools/perf/util/tool.h                    |  3 ++-
 9 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index aa1e91c97a226e1a..769bc48ca85c0eb8 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -151,6 +151,13 @@ struct perf_record_switch {
 	__u32			 next_prev_tid;
 };
 
+struct perf_record_callchain_deferred {
+	struct perf_event_header header;
+	__u64			 cookie;
+	__u64			 nr;
+	__u64			 ips[];
+};
+
 struct perf_record_header_attr {
 	struct perf_event_header header;
 	struct perf_event_attr	 attr;
@@ -523,6 +530,7 @@ union perf_event {
 	struct perf_record_read			read;
 	struct perf_record_throttle		throttle;
 	struct perf_record_sample		sample;
+	struct perf_record_callchain_deferred	callchain_deferred;
 	struct perf_record_bpf_event		bpf;
 	struct perf_record_ksymbol		ksymbol;
 	struct perf_record_text_poke_event	text_poke;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index fcf44149feb20c35..4c92cc1a952c1d9f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_CGROUP]			= "CGROUP",
 	[PERF_RECORD_TEXT_POKE]			= "TEXT_POKE",
 	[PERF_RECORD_AUX_OUTPUT_HW_ID]		= "AUX_OUTPUT_HW_ID",
+	[PERF_RECORD_CALLCHAIN_DEFERRED]	= "CALLCHAIN_DEFERRED",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 989c56d4a23f74f4..244b3e44d090d413 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 	data->data_src = PERF_MEM_DATA_SRC_NONE;
 	data->vcpu = -1;
 
+	if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
+		const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+		data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
+		if (data->callchain->nr > max_callchain_nr)
+			return -EFAULT;
+
+		data->deferred_cookie = event->callchain_deferred.cookie;
+
+		if (evsel->core.attr.sample_id_all)
+			perf_evsel__parse_id_sample(evsel, event, data);
+		return 0;
+	}
+
 	if (event->header.type != PERF_RECORD_SAMPLE) {
 		if (!evsel->core.attr.sample_id_all)
 			return 0;
@@ -3219,6 +3233,11 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 		if (data->callchain->nr > max_callchain_nr)
 			return -EFAULT;
 		sz = data->callchain->nr * sizeof(u64);
+		if (evsel->core.attr.defer_callchain && data->callchain->nr >= 2 &&
+		    data->callchain->ips[data->callchain->nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
+			data->deferred_cookie = data->callchain->ips[data->callchain->nr - 1];
+			data->deferred_callchain = true;
+		}
 		OVERFLOW_CHECK(array, sz, max_size);
 		array = (void *)array + sz;
 	}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b5dd42588c916d91..841b711d970e9457 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
 				*cpumode = PERF_RECORD_MISC_KERNEL;
 				break;
 			case PERF_CONTEXT_USER:
+			case PERF_CONTEXT_USER_DEFERRED:
 				*cpumode = PERF_RECORD_MISC_USER;
 				break;
 			default:
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 66b666d9ce649dd7..741c3d657a8b6ae7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(inherit_thread, p_unsigned);
 	PRINT_ATTRf(remove_on_exec, p_unsigned);
 	PRINT_ATTRf(sigtrap, p_unsigned);
+	PRINT_ATTRf(defer_callchain, p_unsigned);
+	PRINT_ATTRf(defer_output, p_unsigned);
 
 	PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
 	PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index fae834144ef42105..a8307b20a9ea8066 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -107,6 +107,8 @@ struct perf_sample {
 	/** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
 	u16 weight3;
 	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
+	bool deferred_callchain;	/* Has deferred user callchains */
+	u64 deferred_cookie;
 	char insn[MAX_INSN];
 	void *raw_data;
 	struct ip_callchain *callchain;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4b0236b2df2913e1..361e15c1f26a96d0 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_CGROUP]		  = perf_event__cgroup_swap,
 	[PERF_RECORD_TEXT_POKE]		  = perf_event__text_poke_swap,
 	[PERF_RECORD_AUX_OUTPUT_HW_ID]	  = perf_event__all64_swap,
+	[PERF_RECORD_CALLCHAIN_DEFERRED]  = perf_event__all64_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
 	for (i = 0; i < callchain->nr; i++)
 		printf("..... %2d: %016" PRIx64 "\n",
 		       i, callchain->ips[i]);
+
+	if (sample->deferred_callchain)
+		printf("...... (deferred)\n");
 }
 
 static void branch_stack__printf(struct perf_sample *sample,
@@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
 		sample_read__printf(sample, evsel->core.attr.read_format);
 }
 
+static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
+				    struct perf_sample *sample)
+{
+	if (!dump_trace)
+		return;
+
+	printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
+	       event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
+
+	if (evsel__has_callchain(evsel))
+		callchain__printf(evsel, sample);
+}
+
 static void dump_read(struct evsel *evsel, union perf_event *event)
 {
 	struct perf_record_read *read_event = &event->read;
@@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
 		return tool->text_poke(tool, event, sample, machine);
 	case PERF_RECORD_AUX_OUTPUT_HW_ID:
 		return tool->aux_output_hw_id(tool, event, sample, machine);
+	case PERF_RECORD_CALLCHAIN_DEFERRED:
+		dump_deferred_callchain(evsel, event, sample);
+		return tool->callchain_deferred(tool, event, sample, evsel, machine);
 	default:
 		++evlist->stats.nr_unknown_events;
 		return -1;
diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
index 22a8a4ffe05f778e..f732d33e7f895ed4 100644
--- a/tools/perf/util/tool.c
+++ b/tools/perf/util/tool.c
@@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
 	tool->read = process_event_sample_stub;
 	tool->throttle = process_event_stub;
 	tool->unthrottle = process_event_stub;
+	tool->callchain_deferred = process_event_sample_stub;
 	tool->attr = process_event_synth_attr_stub;
 	tool->event_update = process_event_synth_event_update_stub;
 	tool->tracing_data = process_event_synth_tracing_data_stub;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -44,7 +44,8 @@ enum show_feature_header {
 
 struct perf_tool {
 	event_sample	sample,
-			read;
+			read,
+			callchain_deferred;
 	event_op	mmap,
 			mmap2,
 			comm,
-- 
2.52.0.rc1.455.g30608eb744-goog
Re: [PATCH v3 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
Posted by Ian Rogers 2 months, 3 weeks ago
On Thu, Nov 13, 2025 at 11:00 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> Add a new event type for deferred callchains and a new callback for the
> struct perf_tool.  For now it doesn't actually handle the deferred
> callchains but it just marks the sample if it has the PERF_CONTEXT_
> USER_DEFFERED in the callchain array.
>
> At least, perf report can dump the raw data with this change.  Actually
> this requires the next commit to enable attr.defer_callchain, but if you
> already have a data file, it'll show the following result.
>
>   $ perf report -D
>   ...
>   0x2158@perf.data [0x40]: event: 22
>   .
>   . ... raw event: size 64 bytes
>   .  0000:  16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00  ......@.........
>   .  0010:  03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00  ..........3.....
>   .  0020:  0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00  ..3.....H.3.....
>   .  0030:  08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00  .........z.5....
>
>   121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
>   ... FP chain: nr:3
>   .....  0: 00007f18fe337fa7
>   .....  1: 00007f18fe330e0f
>   .....  2: 00007f18fe331448
>   : unhandled!
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  tools/lib/perf/include/perf/event.h       |  8 ++++++++
>  tools/perf/util/event.c                   |  1 +
>  tools/perf/util/evsel.c                   | 19 +++++++++++++++++++
>  tools/perf/util/machine.c                 |  1 +
>  tools/perf/util/perf_event_attr_fprintf.c |  2 ++
>  tools/perf/util/sample.h                  |  2 ++
>  tools/perf/util/session.c                 | 20 ++++++++++++++++++++
>  tools/perf/util/tool.c                    |  1 +
>  tools/perf/util/tool.h                    |  3 ++-
>  9 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index aa1e91c97a226e1a..769bc48ca85c0eb8 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -151,6 +151,13 @@ struct perf_record_switch {
>         __u32                    next_prev_tid;
>  };
>
> +struct perf_record_callchain_deferred {
> +       struct perf_event_header header;
> +       __u64                    cookie;

Could we add a comment that this value is used to match user and
kernel stack traces together? I don't believe that intent is
immediately obvious from the word "cookie".

> +       __u64                    nr;
> +       __u64                    ips[];
> +};
> +
>  struct perf_record_header_attr {
>         struct perf_event_header header;
>         struct perf_event_attr   attr;
> @@ -523,6 +530,7 @@ union perf_event {
>         struct perf_record_read                 read;
>         struct perf_record_throttle             throttle;
>         struct perf_record_sample               sample;
> +       struct perf_record_callchain_deferred   callchain_deferred;
>         struct perf_record_bpf_event            bpf;
>         struct perf_record_ksymbol              ksymbol;
>         struct perf_record_text_poke_event      text_poke;
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index fcf44149feb20c35..4c92cc1a952c1d9f 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
>         [PERF_RECORD_CGROUP]                    = "CGROUP",
>         [PERF_RECORD_TEXT_POKE]                 = "TEXT_POKE",
>         [PERF_RECORD_AUX_OUTPUT_HW_ID]          = "AUX_OUTPUT_HW_ID",
> +       [PERF_RECORD_CALLCHAIN_DEFERRED]        = "CALLCHAIN_DEFERRED",
>         [PERF_RECORD_HEADER_ATTR]               = "ATTR",
>         [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
>         [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> index 989c56d4a23f74f4..244b3e44d090d413 100644
> --- a/tools/perf/util/evsel.c
> +++ b/tools/perf/util/evsel.c
> @@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>         data->data_src = PERF_MEM_DATA_SRC_NONE;
>         data->vcpu = -1;
>
> +       if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
> +               const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> +
> +               data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
> +               if (data->callchain->nr > max_callchain_nr)
> +                       return -EFAULT;
> +
> +               data->deferred_cookie = event->callchain_deferred.cookie;
> +
> +               if (evsel->core.attr.sample_id_all)
> +                       perf_evsel__parse_id_sample(evsel, event, data);
> +               return 0;
> +       }
> +
>         if (event->header.type != PERF_RECORD_SAMPLE) {
>                 if (!evsel->core.attr.sample_id_all)
>                         return 0;
> @@ -3219,6 +3233,11 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
>                 if (data->callchain->nr > max_callchain_nr)
>                         return -EFAULT;
>                 sz = data->callchain->nr * sizeof(u64);
> +               if (evsel->core.attr.defer_callchain && data->callchain->nr >= 2 &&
> +                   data->callchain->ips[data->callchain->nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
> +                       data->deferred_cookie = data->callchain->ips[data->callchain->nr - 1];
> +                       data->deferred_callchain = true;
> +               }

It'd be nice to have a comment saying what is going on here. I can see
that if there are 2 stack slots and the 2nd is a magic value then the
first should be read as the "cookie". At a first look this code is
difficult to parse so a comment would add value.

Thanks,
Ian

>                 OVERFLOW_CHECK(array, sz, max_size);
>                 array = (void *)array + sz;
>         }
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index b5dd42588c916d91..841b711d970e9457 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread,
>                                 *cpumode = PERF_RECORD_MISC_KERNEL;
>                                 break;
>                         case PERF_CONTEXT_USER:
> +                       case PERF_CONTEXT_USER_DEFERRED:
>                                 *cpumode = PERF_RECORD_MISC_USER;
>                                 break;
>                         default:
> diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
> index 66b666d9ce649dd7..741c3d657a8b6ae7 100644
> --- a/tools/perf/util/perf_event_attr_fprintf.c
> +++ b/tools/perf/util/perf_event_attr_fprintf.c
> @@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
>         PRINT_ATTRf(inherit_thread, p_unsigned);
>         PRINT_ATTRf(remove_on_exec, p_unsigned);
>         PRINT_ATTRf(sigtrap, p_unsigned);
> +       PRINT_ATTRf(defer_callchain, p_unsigned);
> +       PRINT_ATTRf(defer_output, p_unsigned);
>
>         PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned, false);
>         PRINT_ATTRf(bp_type, p_unsigned);
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index fae834144ef42105..a8307b20a9ea8066 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -107,6 +107,8 @@ struct perf_sample {
>         /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */
>         u16 weight3;
>         bool no_hw_idx;         /* No hw_idx collected in branch_stack */
> +       bool deferred_callchain;        /* Has deferred user callchains */
> +       u64 deferred_cookie;
>         char insn[MAX_INSN];
>         void *raw_data;
>         struct ip_callchain *callchain;
> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> index 4b0236b2df2913e1..361e15c1f26a96d0 100644
> --- a/tools/perf/util/session.c
> +++ b/tools/perf/util/session.c
> @@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
>         [PERF_RECORD_CGROUP]              = perf_event__cgroup_swap,
>         [PERF_RECORD_TEXT_POKE]           = perf_event__text_poke_swap,
>         [PERF_RECORD_AUX_OUTPUT_HW_ID]    = perf_event__all64_swap,
> +       [PERF_RECORD_CALLCHAIN_DEFERRED]  = perf_event__all64_swap,
>         [PERF_RECORD_HEADER_ATTR]         = perf_event__hdr_attr_swap,
>         [PERF_RECORD_HEADER_EVENT_TYPE]   = perf_event__event_type_swap,
>         [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
> @@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel,
>         for (i = 0; i < callchain->nr; i++)
>                 printf("..... %2d: %016" PRIx64 "\n",
>                        i, callchain->ips[i]);
> +
> +       if (sample->deferred_callchain)
> +               printf("...... (deferred)\n");
>  }
>
>  static void branch_stack__printf(struct perf_sample *sample,
> @@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
>                 sample_read__printf(sample, evsel->core.attr.read_format);
>  }
>
> +static void dump_deferred_callchain(struct evsel *evsel, union perf_event *event,
> +                                   struct perf_sample *sample)
> +{
> +       if (!dump_trace)
> +               return;
> +
> +       printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n",
> +              event->header.misc, sample->pid, sample->tid, sample->deferred_cookie);
> +
> +       if (evsel__has_callchain(evsel))
> +               callchain__printf(evsel, sample);
> +}
> +
>  static void dump_read(struct evsel *evsel, union perf_event *event)
>  {
>         struct perf_record_read *read_event = &event->read;
> @@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *machines,
>                 return tool->text_poke(tool, event, sample, machine);
>         case PERF_RECORD_AUX_OUTPUT_HW_ID:
>                 return tool->aux_output_hw_id(tool, event, sample, machine);
> +       case PERF_RECORD_CALLCHAIN_DEFERRED:
> +               dump_deferred_callchain(evsel, event, sample);
> +               return tool->callchain_deferred(tool, event, sample, evsel, machine);
>         default:
>                 ++evlist->stats.nr_unknown_events;
>                 return -1;
> diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c
> index 22a8a4ffe05f778e..f732d33e7f895ed4 100644
> --- a/tools/perf/util/tool.c
> +++ b/tools/perf/util/tool.c
> @@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events)
>         tool->read = process_event_sample_stub;
>         tool->throttle = process_event_stub;
>         tool->unthrottle = process_event_stub;
> +       tool->callchain_deferred = process_event_sample_stub;
>         tool->attr = process_event_synth_attr_stub;
>         tool->event_update = process_event_synth_event_update_stub;
>         tool->tracing_data = process_event_synth_tracing_data_stub;
> diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
> index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644
> --- a/tools/perf/util/tool.h
> +++ b/tools/perf/util/tool.h
> @@ -44,7 +44,8 @@ enum show_feature_header {
>
>  struct perf_tool {
>         event_sample    sample,
> -                       read;
> +                       read,
> +                       callchain_deferred;
>         event_op        mmap,
>                         mmap2,
>                         comm,
> --
> 2.52.0.rc1.455.g30608eb744-goog
>
Re: [PATCH v3 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support
Posted by Namhyung Kim 2 months, 3 weeks ago
On Fri, Nov 14, 2025 at 09:52:41AM -0800, Ian Rogers wrote:
> On Thu, Nov 13, 2025 at 11:00 PM Namhyung Kim <namhyung@kernel.org> wrote:
> >
> > Add a new event type for deferred callchains and a new callback for the
> > struct perf_tool.  For now it doesn't actually handle the deferred
> > callchains but it just marks the sample if it has the PERF_CONTEXT_
> > USER_DEFFERED in the callchain array.
> >
> > At least, perf report can dump the raw data with this change.  Actually
> > this requires the next commit to enable attr.defer_callchain, but if you
> > already have a data file, it'll show the following result.
> >
> >   $ perf report -D
> >   ...
> >   0x2158@perf.data [0x40]: event: 22
> >   .
> >   . ... raw event: size 64 bytes
> >   .  0000:  16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00  ......@.........
> >   .  0010:  03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00  ..........3.....
> >   .  0020:  0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00  ..3.....H.3.....
> >   .  0030:  08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00  .........z.5....
> >
> >   121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312/2312: 0xb00000006
> >   ... FP chain: nr:3
> >   .....  0: 00007f18fe337fa7
> >   .....  1: 00007f18fe330e0f
> >   .....  2: 00007f18fe331448
> >   : unhandled!
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  tools/lib/perf/include/perf/event.h       |  8 ++++++++
> >  tools/perf/util/event.c                   |  1 +
> >  tools/perf/util/evsel.c                   | 19 +++++++++++++++++++
> >  tools/perf/util/machine.c                 |  1 +
> >  tools/perf/util/perf_event_attr_fprintf.c |  2 ++
> >  tools/perf/util/sample.h                  |  2 ++
> >  tools/perf/util/session.c                 | 20 ++++++++++++++++++++
> >  tools/perf/util/tool.c                    |  1 +
> >  tools/perf/util/tool.h                    |  3 ++-
> >  9 files changed, 56 insertions(+), 1 deletion(-)
> >
> > diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> > index aa1e91c97a226e1a..769bc48ca85c0eb8 100644
> > --- a/tools/lib/perf/include/perf/event.h
> > +++ b/tools/lib/perf/include/perf/event.h
> > @@ -151,6 +151,13 @@ struct perf_record_switch {
> >         __u32                    next_prev_tid;
> >  };
> >
> > +struct perf_record_callchain_deferred {
> > +       struct perf_event_header header;
> > +       __u64                    cookie;
> 
> Could we add a comment that this value is used to match user and
> kernel stack traces together? I don't believe that intent is
> immediately obvious from the word "cookie".

Sounds good, will add.

> 
> > +       __u64                    nr;
> > +       __u64                    ips[];
> > +};
> > +
> >  struct perf_record_header_attr {
> >         struct perf_event_header header;
> >         struct perf_event_attr   attr;
> > @@ -523,6 +530,7 @@ union perf_event {
> >         struct perf_record_read                 read;
> >         struct perf_record_throttle             throttle;
> >         struct perf_record_sample               sample;
> > +       struct perf_record_callchain_deferred   callchain_deferred;
> >         struct perf_record_bpf_event            bpf;
> >         struct perf_record_ksymbol              ksymbol;
> >         struct perf_record_text_poke_event      text_poke;
> > diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> > index fcf44149feb20c35..4c92cc1a952c1d9f 100644
> > --- a/tools/perf/util/event.c
> > +++ b/tools/perf/util/event.c
> > @@ -61,6 +61,7 @@ static const char *perf_event__names[] = {
> >         [PERF_RECORD_CGROUP]                    = "CGROUP",
> >         [PERF_RECORD_TEXT_POKE]                 = "TEXT_POKE",
> >         [PERF_RECORD_AUX_OUTPUT_HW_ID]          = "AUX_OUTPUT_HW_ID",
> > +       [PERF_RECORD_CALLCHAIN_DEFERRED]        = "CALLCHAIN_DEFERRED",
> >         [PERF_RECORD_HEADER_ATTR]               = "ATTR",
> >         [PERF_RECORD_HEADER_EVENT_TYPE]         = "EVENT_TYPE",
> >         [PERF_RECORD_HEADER_TRACING_DATA]       = "TRACING_DATA",
> > diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
> > index 989c56d4a23f74f4..244b3e44d090d413 100644
> > --- a/tools/perf/util/evsel.c
> > +++ b/tools/perf/util/evsel.c
> > @@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> >         data->data_src = PERF_MEM_DATA_SRC_NONE;
> >         data->vcpu = -1;
> >
> > +       if (event->header.type == PERF_RECORD_CALLCHAIN_DEFERRED) {
> > +               const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
> > +
> > +               data->callchain = (struct ip_callchain *)&event->callchain_deferred.nr;
> > +               if (data->callchain->nr > max_callchain_nr)
> > +                       return -EFAULT;
> > +
> > +               data->deferred_cookie = event->callchain_deferred.cookie;
> > +
> > +               if (evsel->core.attr.sample_id_all)
> > +                       perf_evsel__parse_id_sample(evsel, event, data);
> > +               return 0;
> > +       }
> > +
> >         if (event->header.type != PERF_RECORD_SAMPLE) {
> >                 if (!evsel->core.attr.sample_id_all)
> >                         return 0;
> > @@ -3219,6 +3233,11 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
> >                 if (data->callchain->nr > max_callchain_nr)
> >                         return -EFAULT;
> >                 sz = data->callchain->nr * sizeof(u64);
> > +               if (evsel->core.attr.defer_callchain && data->callchain->nr >= 2 &&
> > +                   data->callchain->ips[data->callchain->nr - 2] == PERF_CONTEXT_USER_DEFERRED) {
> > +                       data->deferred_cookie = data->callchain->ips[data->callchain->nr - 1];
> > +                       data->deferred_callchain = true;
> > +               }
> 
> It'd be nice to have a comment saying what is going on here. I can see
> that if there are 2 stack slots and the 2nd is a magic value then the
> first should be read as the "cookie". At a first look this code is
> difficult to parse so a comment would add value.

Will add the comment.

Thanks,
Namhyung

 
> >                 OVERFLOW_CHECK(array, sz, max_size);
> >                 array = (void *)array + sz;
> >         }