From nobody Sat Feb 7 18:20:12 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 74AB81E1C02; Fri, 14 Nov 2025 07:00:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103621; cv=none; b=I6mS7Pq+nCjdHYOGKj7/kptWACr7IbVH/4skw7zkhhVYcTgzHHfs3O7+4XIAzblfBK7KX99H2BDW8UE/7wXJb353iwnlerVLMXiVxOC9PwTgKhps/Hiz7mV+v+eSqw5eXG9Kxunec88FdbXYVAzPBPg+i19lCXFt9YM+oDYM7i0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103621; c=relaxed/simple; bh=ojTHLSjZBXCP+lYeZxCWYORNJqxatfl7RNTwhIJgLEE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Lvo7XKQf2sGgxcxOPFOZcnUGjH479pg831FCqFdwfJga5rGVKTNu2rxJeGz+mEhyBEMfZtcM0nHjR+Jba/gO2PWfl53pX+dOdqyaJhVRJpjrQs3173gnWSNzh8x5aRNTJmXkZLl9izGKH0F3mpWekF/tMRj/8h9LASvvXMCdB34= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=RnSPqpm4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="RnSPqpm4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 85DBBC2BCAF; Fri, 14 Nov 2025 07:00:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1763103621; bh=ojTHLSjZBXCP+lYeZxCWYORNJqxatfl7RNTwhIJgLEE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=RnSPqpm4BG3OYy1ehexNiclK+1uprAddFXpTnJF/0SKsAH6mUEsXpLGwKZCYtCxan ND7bjt0cKwq/flFyRfTa0uzwy5md2BDfL5STGkGyR1D5aE2FdxnO01tb5LO7sNEXKg tEw+ZH5z8ALgaiu5QKTRkGdQis5EPqvjiY10RSzQRIMnrAP9lGbt7mGGpOIVmK4c7b Lf03F72cGmb8vwJGpwM7LPsaFWHHxNuCYSpog9jFtQn86RyW79/WOcMd8/8AVtnLpt fLapYdf7D5irjqEmKTLmbfrmE9cTlznLImhnDfBOMl1ukAxPlmXz864r/EeXR575OG RRXuHt3IQKC6A== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , James Clark Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Josh Poimboeuf , Indu Bhagat , Jens Remus , Mathieu Desnoyers , linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH v3 1/5] tools headers UAPI: Sync linux/perf_event.h for deferred callchains Date: Thu, 13 Nov 2025 23:00:14 -0800 Message-ID: <20251114070018.160330-2-namhyung@kernel.org> X-Mailer: git-send-email 2.52.0.rc1.455.g30608eb744-goog In-Reply-To: <20251114070018.160330-1-namhyung@kernel.org> References: <20251114070018.160330-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" It needs to sync with the kernel to support user space changes for the deferred callchains. Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/lin= ux/perf_event.h index 78a362b8002776e5..d292f96bc06f86bc 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -463,7 +463,9 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records= */ + defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records = */ + __reserved_1 : 24; =20 union { __u32 wakeup_events; /* wake up every n events */ @@ -1239,6 +1241,22 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 cookie; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1269,6 +1287,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, --=20 2.52.0.rc1.455.g30608eb744-goog From nobody Sat Feb 7 18:20:12 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 56BDE2F1FD5; Fri, 14 Nov 2025 07:00:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103622; cv=none; b=H6iJcmTMuI2nCa1I/JcB0KEp0KyK+tSK471qm9Qt6Z2fYdtduHrR3atnV+2gXe3HEwYyFPRPqIg0Uv5nyBDt4Pp8ILCj84JcJDzmkdg3prFmDmzW0fMVNIZsG1iNH1E9Mi83Yr1y/uNImZziBkPlohzS3txidoGPNFne8LlA8zQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103622; c=relaxed/simple; bh=QK6Gzpv2I6Cz445xrO288BbQHNVA2m7Du2espLuWlFI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=J+Qi8eCZr3NP9e576iv4VvvWEU9r+cNu8AF4R27NFfHs4hFZ5eIyzsHWZvX2Iqns5HJgI176syjRBqg9ryDItdtm+q2GHVP+edT+GJJVI6hmPxPEiJJ0KpawkbvfTgzDYsYQwqMeLFsLn2zORWnh1UoPEjnStSIzjBNvZIPowno= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cGiYK3wx; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cGiYK3wx" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3F1BDC2BCB0; Fri, 14 Nov 2025 07:00:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1763103621; bh=QK6Gzpv2I6Cz445xrO288BbQHNVA2m7Du2espLuWlFI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cGiYK3wxc1SvkBOUt2AXtr2JaJB8y10k8lpxCve/dVJ6aNgL9hrC0InT/jzQDALx3 Vw+u3mOQXnq6+1W1beTx3k39xq84s+zKTECFsnHxr+vpLXX5XqmoDuGFOowmqSdks2 1vsNGBtXUteekO7JwVcEBmFwpHd1W8LUJQ6OlRdAJyhxPYGMcBT8ij/J3h4fBDXtDa DALbCLHaXv9We0hpwWOeg8gBVSHlo1pfK4NK2x0YzUMqu9q277iEQOO9/J7y/Du+OS ULmS6nRhmFshMSmwUtOnptQ2P686xO2Fw5WqWWg5OOpH9xJmSsq4xrAA9vz1iNzKp3 9u9aG1lzWnlPA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , James Clark Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Josh Poimboeuf , Indu Bhagat , Jens Remus , Mathieu Desnoyers , linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH v3 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support Date: Thu, 13 Nov 2025 23:00:15 -0800 Message-ID: <20251114070018.160330-3-namhyung@kernel.org> X-Mailer: git-send-email 2.52.0.rc1.455.g30608eb744-goog In-Reply-To: <20251114070018.160330-1-namhyung@kernel.org> References: <20251114070018.160330-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new event type for deferred callchains and a new callback for the struct perf_tool. For now it doesn't actually handle the deferred callchains but it just marks the sample if it has the PERF_CONTEXT_ USER_DEFFERED in the callchain array. At least, perf report can dump the raw data with this change. Actually this requires the next commit to enable attr.defer_callchain, but if you already have a data file, it'll show the following result. $ perf report -D ... 0x2158@perf.data [0x40]: event: 22 . . ... raw event: size 64 bytes . 0000: 16 00 00 00 02 00 40 00 06 00 00 00 0b 00 00 00 ......@.......= .. . 0010: 03 00 00 00 00 00 00 00 a7 7f 33 fe 18 7f 00 00 ..........3...= .. . 0020: 0f 0e 33 fe 18 7f 00 00 48 14 33 fe 18 7f 00 00 ..3.....H.3...= .. . 0030: 08 09 00 00 08 09 00 00 e6 7a e7 35 1c 00 00 00 .........z.5..= .. 121163447014 0x2158 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 2312= /2312: 0xb00000006 ... FP chain: nr:3 ..... 0: 00007f18fe337fa7 ..... 1: 00007f18fe330e0f ..... 2: 00007f18fe331448 : unhandled! Signed-off-by: Namhyung Kim --- tools/lib/perf/include/perf/event.h | 8 ++++++++ tools/perf/util/event.c | 1 + tools/perf/util/evsel.c | 19 +++++++++++++++++++ tools/perf/util/machine.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 2 ++ tools/perf/util/sample.h | 2 ++ tools/perf/util/session.c | 20 ++++++++++++++++++++ tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 3 ++- 9 files changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/p= erf/event.h index aa1e91c97a226e1a..769bc48ca85c0eb8 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -151,6 +151,13 @@ struct perf_record_switch { __u32 next_prev_tid; }; =20 +struct perf_record_callchain_deferred { + struct perf_event_header header; + __u64 cookie; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; @@ -523,6 +530,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index fcf44149feb20c35..4c92cc1a952c1d9f 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -61,6 +61,7 @@ static const char *perf_event__names[] =3D { [PERF_RECORD_CGROUP] =3D "CGROUP", [PERF_RECORD_TEXT_POKE] =3D "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] =3D "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] =3D "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] =3D "TRACING_DATA", diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 989c56d4a23f74f4..244b3e44d090d413 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3089,6 +3089,20 @@ int evsel__parse_sample(struct evsel *evsel, union p= erf_event *event, data->data_src =3D PERF_MEM_DATA_SRC_NONE; data->vcpu =3D -1; =20 + if (event->header.type =3D=3D PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr =3D UINT64_MAX / sizeof(u64); + + data->callchain =3D (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + data->deferred_cookie =3D event->callchain_deferred.cookie; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type !=3D PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -3219,6 +3233,11 @@ int evsel__parse_sample(struct evsel *evsel, union p= erf_event *event, if (data->callchain->nr > max_callchain_nr) return -EFAULT; sz =3D data->callchain->nr * sizeof(u64); + if (evsel->core.attr.defer_callchain && data->callchain->nr >=3D 2 && + data->callchain->ips[data->callchain->nr - 2] =3D=3D PERF_CONTEXT_US= ER_DEFERRED) { + data->deferred_cookie =3D data->callchain->ips[data->callchain->nr - 1]; + data->deferred_callchain =3D true; + } OVERFLOW_CHECK(array, sz, max_size); array =3D (void *)array + sz; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index b5dd42588c916d91..841b711d970e9457 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode =3D PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode =3D PERF_RECORD_MISC_USER; break; default: diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/pe= rf_event_attr_fprintf.c index 66b666d9ce649dd7..741c3d657a8b6ae7 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -343,6 +343,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_even= t_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); + PRINT_ATTRf(defer_output, p_unsigned); =20 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsig= ned, false); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index fae834144ef42105..a8307b20a9ea8066 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -107,6 +107,8 @@ struct perf_sample { /** @weight3: On x86 holds retire_lat, on powerpc holds p_stage_cyc. */ u16 weight3; bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ + u64 deferred_cookie; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 4b0236b2df2913e1..361e15c1f26a96d0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -720,6 +720,7 @@ static perf_event__swap_op perf_event__swap_ops[] =3D { [PERF_RECORD_CGROUP] =3D perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] =3D perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] =3D perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] =3D perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] =3D perf_event__tracing_data_swap, @@ -854,6 +855,9 @@ static void callchain__printf(struct evsel *evsel, for (i =3D 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", i, callchain->ips[i]); + + if (sample->deferred_callchain) + printf("...... (deferred)\n"); } =20 static void branch_stack__printf(struct perf_sample *sample, @@ -1123,6 +1127,19 @@ static void dump_sample(struct evsel *evsel, union p= erf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } =20 +static void dump_deferred_callchain(struct evsel *evsel, union perf_event = *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->deferred_coo= kie); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event =3D &event->read; @@ -1353,6 +1370,9 @@ static int machines__deliver_event(struct machines *m= achines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return tool->callchain_deferred(tool, event, sample, evsel, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 22a8a4ffe05f778e..f732d33e7f895ed4 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -287,6 +287,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->read =3D process_event_sample_stub; tool->throttle =3D process_event_stub; tool->unthrottle =3D process_event_stub; + tool->callchain_deferred =3D process_event_sample_stub; tool->attr =3D process_event_synth_attr_stub; tool->event_update =3D process_event_synth_event_update_stub; tool->tracing_data =3D process_event_synth_tracing_data_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 88337cee1e3e2be3..9b9f0a8cbf3de4b5 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -44,7 +44,8 @@ enum show_feature_header { =20 struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, --=20 2.52.0.rc1.455.g30608eb744-goog From nobody Sat Feb 7 18:20:12 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1137C2F530E; Fri, 14 Nov 2025 07:00:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103623; cv=none; b=dUFXyE+akqshF9T0jG2u06we8pNrx65MeIUz8EepaXTEh0Ai44FZD+T4YtS6bdw7ySvq4HTJuumM1DKCnpDR+K/jZjWBn2BOFYjqDFB4zR/HcvSemuzMC8966J7Y5WVlm4aTQSQ65uqupuMYYnGJXP0cuYP28yD+MxS2k+Cxaas= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103623; c=relaxed/simple; bh=UOiCoeF48H4T7ORW4hNVtpfRxJwQznQgrihtEbItWqY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jDULy7OuDw2XVMhE0w462O5o4yHTCh5h2jgVU4WwI6uIPN/6I1n1X+Mzg2ugQOuTri/2xGlUq9jIo6pHnKcJrWGM0quEfdPKF3yJyHTLZrZy4/anaUCAL64ZA1lG/h6hjp3rHjKXTXAK3NXyqE+hB5fqlS5WZO5o1Z8Im9C2vOM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=K86TRe9L; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="K86TRe9L" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 079A3C4AF15; Fri, 14 Nov 2025 07:00:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1763103622; bh=UOiCoeF48H4T7ORW4hNVtpfRxJwQznQgrihtEbItWqY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=K86TRe9L5Bkh3UFkXl8muPY3C8myFbxI/zim8SAM45+5EUnrt3Fk2jzboZRDgtNDj pKQccZBjldheJXelosyE53pKwf4aS/k7agjUPRDOkHZYsSvKf3s6U7P94Gr8zfyAcN 113Ede9xallgO97sGd8NVRO9KnRagB2h3fPPhh3O5V6o1r7nuN5f1ITDa/OIsh6nja tV6hfzeYuApxZKkaTJbuwC17mY0D4cumAg/Pol3k9tBz+ZUCHyPD/SFIfAAPQeXW63 hyoj5ChU/5Nro5uMQcy8x9fjgEjshtPjhzxgTklfDkBRg0teS9OiNf/e7jGRTRY8xC sWXXd0n0z+eqw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , James Clark Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Josh Poimboeuf , Indu Bhagat , Jens Remus , Mathieu Desnoyers , linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH v3 3/5] perf record: Enable defer_callchain for user callchains Date: Thu, 13 Nov 2025 23:00:16 -0800 Message-ID: <20251114070018.160330-4-namhyung@kernel.org> X-Mailer: git-send-email 2.52.0.rc1.455.g30608eb744-goog In-Reply-To: <20251114070018.160330-1-namhyung@kernel.org> References: <20251114070018.160330-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" And add the missing feature detection logic to clear the flag on old kernels. $ perf record -g -vv true ... ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CALLCHAIN|PERIOD read_format ID|LOST disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 sample_id_all 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 defer_callchain 1 defer_output 1 ------------------------------------------------------------ sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off deferred callchain support Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 244b3e44d090d413..f5652d00b457d096 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1061,6 +1061,14 @@ static void __evsel__config_callchain(struct evsel *= evsel, struct record_opts *o } } =20 + if (param->record_mode =3D=3D CALLCHAIN_FP && !attr->exclude_callchain_us= er) { + /* + * Enable deferred callchains optimistically. It'll be switched + * off later if the kernel doesn't support it. + */ + attr->defer_callchain =3D 1; + } + if (function) { pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user =3D 1; @@ -1511,6 +1519,7 @@ void evsel__config(struct evsel *evsel, struct record= _opts *opts, attr->mmap2 =3D track && !perf_missing_features.mmap2; attr->comm =3D track; attr->build_id =3D track && opts->build_id; + attr->defer_output =3D track; =20 /* * ksymbol is tracked separately with text poke because it needs to be @@ -2199,6 +2208,10 @@ static int __evsel__prepare_open(struct evsel *evsel= , struct perf_cpu_map *cpus, =20 static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_callc= hain) + evsel->core.attr.defer_callchain =3D 0; + if (perf_missing_features.defer_callchain && evsel->core.attr.defer_outpu= t) + evsel->core.attr.defer_output =3D 0; if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit= && (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) evsel->core.attr.inherit =3D 0; @@ -2473,6 +2486,13 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel, struct perf_cpu =20 /* Please add new feature detection here. */ =20 + attr.defer_callchain =3D true; + if (has_attr_feature(&attr, /*flags=3D*/0)) + goto found; + perf_missing_features.defer_callchain =3D true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain =3D false; + attr.inherit =3D true; attr.sample_type =3D PERF_SAMPLE_READ | PERF_SAMPLE_TID; if (has_attr_feature(&attr, /*flags=3D*/0)) @@ -2584,6 +2604,10 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel, struct perf_cpu errno =3D old_errno; =20 check: + if ((evsel->core.attr.defer_callchain || evsel->core.attr.defer_output) && + perf_missing_features.defer_callchain) + return true; + if (evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && perf_missing_features.inherit_sample_read) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 3ae4ac8f9a37e009..a08130ff2e47a887 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -221,6 +221,7 @@ struct perf_missing_features { bool branch_counters; bool aux_action; bool inherit_sample_read; + bool defer_callchain; }; =20 extern struct perf_missing_features perf_missing_features; --=20 2.52.0.rc1.455.g30608eb744-goog From nobody Sat Feb 7 18:20:12 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 00CAF2F60B5; Fri, 14 Nov 2025 07:00:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103624; cv=none; b=VBbOUy8s/3KiM8WUdddSH2YEjpOVPhmfYO2rz9fyEox26Q26zy4qlbP2bCx9eBQ7kjnm71HgJk/hg8Sjf3wN+zgtZQGwONo+lbULJH06k1LM74WXAIZcZcqNZiUzWtPYt5XGpIRvBRop9hekgspYYXSjKdFq0T9sLJ8pmh/R/zI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103624; c=relaxed/simple; bh=ef/M4ov/RXKCLkBK3/atl8Q8KFaGT2Cgw+FGaEd8zcQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=H8XzgNZ1tYKX04NuZXeuJ01JTFsT/9ETYMbUHSqhnJPbH3O16X5/aW/Q+pX3fQSyyOGSQpVzgkMI93dZfN8TSBZEvZwc29Db2BHX4SM3Y3hQ2tpxVK6WhWHFoqg2OXBEnA7uY7Dt98tOXHqYLUQd2lh3lq402C7sW10zcP5lCvQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=WZcNGo7D; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="WZcNGo7D" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C6D3DC2BC87; Fri, 14 Nov 2025 07:00:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1763103623; bh=ef/M4ov/RXKCLkBK3/atl8Q8KFaGT2Cgw+FGaEd8zcQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=WZcNGo7D+4Y2hXR9Bi/QGOsaqkv/r0paldHahv7+4HcEYHYu6PYk3rXQQzIR0TGCT z8JO+fQjhtxl6xy/LNrJ+GPkuel/1gMsIF0pmwldHvh7+XPpbUjVMr1d73RPNNV18/ X47wigJfhPhj6QH14kfKSFZcU7C1sZEM2B7Beeg010fpTxmmFY/3qZP2lNaaY4GXDn eOKt/t1KOYuLl1utS6XCmHp0YtxcV5hErye+yskw3NIaiK3/HMc8xykn5kXwhdya5T 935h8izZDDn8+BEBu2txRdzqQacNjCp8SkzchiJyQBDZ+vtTCU8pEK98ZodTw0EJUL 7sfJ/TOw7I4CA== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , James Clark Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Josh Poimboeuf , Indu Bhagat , Jens Remus , Mathieu Desnoyers , linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH v3 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Date: Thu, 13 Nov 2025 23:00:17 -0800 Message-ID: <20251114070018.160330-5-namhyung@kernel.org> X-Mailer: git-send-email 2.52.0.rc1.455.g30608eb744-goog In-Reply-To: <20251114070018.160330-1-namhyung@kernel.org> References: <20251114070018.160330-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Handle the deferred callchains in the script output. $ perf script ... pwd 2312 121.163435: 249113 cpu/cycles/P: ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms]) ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms]) ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms]) ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms]) ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms]) ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms]) ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kal= lsyms]) b00000006 [unknown] ([unknown]) pwd 2312 121.163447: DEFERRED CALLCHAIN 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86= -64.so.2) 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-= linux-x86-64.so.2) 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-lin= ux-x86-64.so.2) Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index cf0040bbaba9cbc9..3b2896350bad2924 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2719,6 +2719,93 @@ static int process_sample_event(const struct perf_to= ol *tool, return ret; } =20 +static int process_deferred_sample_event(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct perf_script *scr =3D container_of(tool, struct perf_script, tool); + struct perf_event_attr *attr =3D &evsel->core.attr; + struct evsel_script *es =3D evsel->priv; + unsigned int type =3D output_type(attr->type); + struct addr_location al; + FILE *fp =3D es->fp; + int ret =3D 0; + + if (output[type].fields =3D=3D 0) + return 0; + + /* Set thread to NULL to indicate addr_al and al are not initialized */ + addr_location__init(&al); + + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { + goto out_put; + } + + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; + } + last_timestamp =3D sample->time; + goto out_put; + } + + if (filter_cpu(sample)) + goto out_put; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + ret =3D -1; + goto out_put; + } + + if (al.filtered) + goto out_put; + + if (!show_event(sample, evsel, al.thread, &al, NULL)) + goto out_put; + + if (evswitch__discard(&scr->evswitch, evsel)) + goto out_put; + + perf_sample__fprintf_start(scr, sample, al.thread, evsel, + PERF_RECORD_CALLCHAIN_DEFERRED, fp); + fprintf(fp, "DEFERRED CALLCHAIN"); + + if (PRINT_FIELD(IP)) { + struct callchain_cursor *cursor =3D NULL; + + if (symbol_conf.use_callchain && sample->callchain) { + cursor =3D get_tls_callchain_cursor(); + if (thread__resolve_callchain(al.thread, cursor, evsel, + sample, NULL, NULL, + scripting_max_stack)) { + pr_info("cannot resolve deferred callchains\n"); + cursor =3D NULL; + } + } + + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts, + cursor, symbol_conf.bt_stop_list, fp); + } + + fprintf(fp, "\n"); + + if (verbose > 0) + fflush(fp); + +out_put: + addr_location__exit(&al); + return ret; +} + // Used when scr->per_event_dump is not set static struct evsel_script es_stdout; =20 @@ -4320,6 +4407,7 @@ int cmd_script(int argc, const char **argv) =20 perf_tool__init(&script.tool, !unsorted_dump); script.tool.sample =3D process_sample_event; + script.tool.callchain_deferred =3D process_deferred_sample_event; script.tool.mmap =3D perf_event__process_mmap; script.tool.mmap2 =3D perf_event__process_mmap2; script.tool.comm =3D perf_event__process_comm; @@ -4346,6 +4434,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; + script.tool.merge_deferred_callchains =3D false; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); --=20 2.52.0.rc1.455.g30608eb744-goog From nobody Sat Feb 7 18:20:12 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D674C2F6598; Fri, 14 Nov 2025 07:00:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103625; cv=none; b=CZOaDNAoD0gD7hVJ7bYKGsOmxAG1dIXi1Eifp6gi6sH5Tplipci7KcejAQDlYk+lkxakiBi3gR3SRLnukFgR3PyKDhGKxF3ryBx8MUrPk3Py129XfdOXaIbj3IWIL1pZCova76aeHSWedYE3xxffgzB6bQLPZ9P4JNQ86XKSNk8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1763103625; c=relaxed/simple; bh=6Ocyy7HnJakMrtl0AhViXS4uUU1oNUj31kAZJUoFfo4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=XdAV634D6i41K5Haw2Ssfo0og28SaDCdSZxRnGuWRd6QG7h4+W7pFjTw9TZ8G3y8n/98Zn/hWiXG123mfAedXcne9zUckVP58AdsfDsamVo4A4ota6c74xS7I9ui2j/NshOkdqPDzJeKwLTKsZvT1o+zVsvW+s9OmptD87VOljI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=OfkXNr2E; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="OfkXNr2E" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A861DC2BC9E; Fri, 14 Nov 2025 07:00:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1763103624; bh=6Ocyy7HnJakMrtl0AhViXS4uUU1oNUj31kAZJUoFfo4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=OfkXNr2E1XUXu9gpdRBaRh20AqFYrdKc7C9mHW6qg0koE82zZC5k+dfPNKrOR6C8u qxKcuswvXhRRNNPf/9k03oD49SQe5aKlMcDd9HattXwfGxl0+szjF84rC7HrFPEzP5 iDLCB5hjh9mxXgU2BFFT3lkkGqBiWvpUZK/1aWoySSklkdDQYUhuGZBl+Ulgosl494 sJUU8u6/cVLJlaGjZyytQALburMY3RmhbAbO6F5EtiIDh6+4IzQTvi0tRJ1OZucFjm vWtsXmSG8ER51tUky1VvFRZavIojLkWCMI5B+2XuCOfW52hXEyWOuW1LXJuocWAtPJ cHgXURFfmYJSw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , James Clark Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Steven Rostedt , Josh Poimboeuf , Indu Bhagat , Jens Remus , Mathieu Desnoyers , linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org Subject: [PATCH v3 5/5] perf tools: Merge deferred user callchains Date: Thu, 13 Nov 2025 23:00:18 -0800 Message-ID: <20251114070018.160330-6-namhyung@kernel.org> X-Mailer: git-send-email 2.52.0.rc1.455.g30608eb744-goog In-Reply-To: <20251114070018.160330-1-namhyung@kernel.org> References: <20251114070018.160330-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Save samples with deferred callchains in a separate list and deliver them after merging the user callchains. If users don't want to merge they can set tool->merge_deferred_callchains to false to prevent the behavior. With previous result, now perf script will show the merged callchains. $ perf script ... pwd 2312 121.163435: 249113 cpu/cycles/P: ffffffff845b78d8 __build_id_parse.isra.0+0x218 ([kernel.kallsyms]) ffffffff83bb5bf6 perf_event_mmap+0x2e6 ([kernel.kallsyms]) ffffffff83c31959 mprotect_fixup+0x1e9 ([kernel.kallsyms]) ffffffff83c31dc5 do_mprotect_pkey+0x2b5 ([kernel.kallsyms]) ffffffff83c3206f __x64_sys_mprotect+0x1f ([kernel.kallsyms]) ffffffff845e6692 do_syscall_64+0x62 ([kernel.kallsyms]) ffffffff8360012f entry_SYSCALL_64_after_hwframe+0x76 ([kernel.kal= lsyms]) 7f18fe337fa7 mprotect+0x7 (/lib/x86_64-linux-gnu/ld-linux-x86= -64.so.2) 7f18fe330e0f _dl_sysdep_start+0x7f (/lib/x86_64-linux-gnu/ld-= linux-x86-64.so.2) 7f18fe331448 _dl_start_user+0x0 (/lib/x86_64-linux-gnu/ld-lin= ux-x86-64.so.2) ... The old output can be get using --no-merge-callchain option. Also perf report can get the user callchain entry at the end. $ perf report --no-children --stdio -q -S __build_id_parse.isra.0 # symbol: __build_id_parse.isra.0 8.40% pwd [kernel.kallsyms] | ---__build_id_parse.isra.0 perf_event_mmap mprotect_fixup do_mprotect_pkey __x64_sys_mprotect do_syscall_64 entry_SYSCALL_64_after_hwframe mprotect _dl_sysdep_start _dl_start_user Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-script.txt | 5 ++ tools/perf/builtin-inject.c | 1 + tools/perf/builtin-report.c | 1 + tools/perf/builtin-script.c | 5 +- tools/perf/util/callchain.c | 29 ++++++++++ tools/perf/util/callchain.h | 3 ++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 2 + tools/perf/util/session.c | 67 +++++++++++++++++++++++- tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 1 + 11 files changed, 114 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Document= ation/perf-script.txt index 28bec7e78bc858ba..03d1129606328d6d 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -527,6 +527,11 @@ include::itrace.txt[] The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. =20 +--merge-callchains:: + Enable merging deferred user callchains if available. This is the + default behavior. If you want to see separate CALLCHAIN_DEFERRED + records for some reason, use --no-merge-callchains explicitly. + :GMEXAMPLECMD: script :GMEXAMPLESUBCMD: include::guest-files.txt[] diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 044074080aa53abd..30ae38212f57580a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2542,6 +2542,7 @@ int cmd_inject(int argc, const char **argv) inject.tool.auxtrace =3D perf_event__repipe_auxtrace; inject.tool.bpf_metadata =3D perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group =3D true; + inject.tool.merge_deferred_callchains =3D false; inject.session =3D __perf_session__new(&data, &inject.tool, /*trace_event_repipe=3D*/inject.output.is_pipe, /*host_env=3D*/NULL); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2bc269f5fcef8023..add6b1c2aaf04270 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1614,6 +1614,7 @@ int cmd_report(int argc, const char **argv) report.tool.event_update =3D perf_event__process_event_update; report.tool.feature =3D process_feature_event; report.tool.ordering_requires_timestamps =3D true; + report.tool.merge_deferred_callchains =3D !dump_trace; =20 session =3D perf_session__new(&data, &report.tool); if (IS_ERR(session)) { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3b2896350bad2924..2374c7a1684028cc 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4025,6 +4025,7 @@ int cmd_script(int argc, const char **argv) bool header_only =3D false; bool script_started =3D false; bool unsorted_dump =3D false; + bool merge_deferred_callchains =3D true; char *rec_script_path =3D NULL; char *rep_script_path =3D NULL; struct perf_session *session; @@ -4178,6 +4179,8 @@ int cmd_script(int argc, const char **argv) "Guest code can be found in hypervisor process"), OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, + "Enable merge deferred user callchains"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -4434,7 +4437,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; - script.tool.merge_deferred_callchains =3D false; + script.tool.merge_deferred_callchains =3D merge_deferred_callchains; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d7b7eef740b9d6ed..a0a0e6784420d478 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1828,3 +1828,32 @@ int sample__for_each_callchain_node(struct thread *t= hread, struct evsel *evsel, } return 0; } + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig =3D sample_orig->callchain->nr - 1; + u64 nr_deferred =3D sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + if (sample_orig->callchain->nr < 2) { + sample_orig->deferred_callchain =3D false; + return -EINVAL; + } + + callchain =3D calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain =3D=3D NULL) { + sample_orig->deferred_callchain =3D false; + return -ENOMEM; + } + + callchain->nr =3D nr_orig + nr_deferred; + /* copy original including PERF_CONTEXT_USER_DEFERRED (but the cookie) */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)= ); + /* copy deferred user callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain =3D callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 86ed9e4d04f9ee7b..89785125ed25783d 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -317,4 +317,7 @@ int sample__for_each_callchain_node(struct thread *thre= ad, struct evsel *evsel, struct perf_sample *sample, int max_stack, bool symbols, callchain_iter_fn cb, void *data); =20 +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e8217efdda5323c6..03674d2cbd015e4f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -85,6 +85,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_= map *cpus, evlist->ctl_fd.pos =3D -1; evlist->nr_br_cntr =3D -1; metricgroup__rblist_init(&evlist->metric_events); + INIT_LIST_HEAD(&evlist->deferred_samples); } =20 struct evlist *evlist__new(void) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 5e71e3dc60423079..911834ae7c2a6f76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -92,6 +92,8 @@ struct evlist { * of struct metric_expr. */ struct rblist metric_events; + /* samples with deferred_callchain would wait here. */ + struct list_head deferred_samples; }; =20 struct evsel_str_handler { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 361e15c1f26a96d0..2e777fd1bcf6707b 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1285,6 +1285,60 @@ static int evlist__deliver_sample(struct evlist *evl= ist, const struct perf_tool per_thread); } =20 +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +static int evlist__deliver_deferred_samples(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret =3D 0; + + if (!tool->merge_deferred_callchains) { + evsel =3D evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret =3D evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid !=3D orig_sample.tid) + continue; + + if (event->callchain_deferred.cookie =3D=3D orig_sample.deferred_cookie) + sample__merge_deferred_callchain(&orig_sample, sample); + else + orig_sample.deferred_callchain =3D false; + + evsel =3D evlist__id2evsel(evlist, orig_sample.id); + ret =3D evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de); + + if (ret) + break; + } + return ret; +} + static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, @@ -1313,6 +1367,16 @@ static int machines__deliver_event(struct machines *= machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de =3D malloc(sizeof(*de)); + + if (de =3D=3D NULL) + return -ENOMEM; + + de->event =3D event; + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machin= e); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1372,7 +1436,8 @@ static int machines__deliver_event(struct machines *m= achines, return tool->aux_output_hw_id(tool, event, sample, machine); case PERF_RECORD_CALLCHAIN_DEFERRED: dump_deferred_callchain(evsel, event, sample); - return tool->callchain_deferred(tool, event, sample, evsel, machine); + return evlist__deliver_deferred_samples(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index f732d33e7f895ed4..c5d3b464b2a433b3 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->cgroup_events =3D false; tool->no_warn =3D false; tool->show_feat_hdr =3D SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains =3D true; =20 tool->sample =3D process_event_sample_stub; tool->mmap =3D process_event_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 9b9f0a8cbf3de4b5..e96b69d25a5b737d 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -90,6 +90,7 @@ struct perf_tool { bool cgroup_events; bool no_warn; bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; =20 --=20 2.52.0.rc1.455.g30608eb744-goog