From nobody Fri Nov 29 15:44:11 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 162A8193079; Tue, 17 Sep 2024 22:28:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612111; cv=none; b=hyy8lT+IQRAYBoWPq4bV2uRej1r/02ZXATjfT0KI2j+9JBeRtqwK+3VLnUPnMV1okhITv1ej/DgEpXzQc3YuDaCuYym2af6wTaNAEVKjU4iR48B32jU5/bMbYEXuLaSV7KhdN76zjxBGgvWKKYWKTnZVzhtQ7hNSCZo1JoNTh3o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612111; c=relaxed/simple; bh=nRaCSyRVb+6blaQn01AbFqvMY9x1Rpzwg5L3eXpi3O4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UDOo1dlgaP/amDjQp/Tx0CCByhaEXHzyu0nuw5oW7tQTb/D3mabccUCOuM+wDVQEtpffa+HgXoflLQfN57cZzt+giRPmObhf8wtBFH7leXtf0eGOnNL12GIgg9xjC0s2hjvfpV4H20ys9bAlycbJM10en9sqvqUHgleERTc0bMg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=tC4kqkQ2; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="tC4kqkQ2" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 09184C4CEC5; Tue, 17 Sep 2024 22:28:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726612110; bh=nRaCSyRVb+6blaQn01AbFqvMY9x1Rpzwg5L3eXpi3O4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=tC4kqkQ2gSy22Ql+NqPlcxMx+VX1Cj4rX1qSAvBivGiaW3d2ldI46mIe09QeRKmp3 tBs2P3JfkLl4O7g2BwJ33fD/Pd2cOhst8qzuP6CPFSMGz1XBa8Q0A+68J4GRLN50J7 HvB/97ivAogsOIrOJJzYCnmT5aj6jz0WkaIH/IFqkeDzki4WTXWo24qRCho6Eh97Eo WABPzTcHoinyCGC31nSsnc4evEVNf7SmM/FN/FaYBOeJDYzofsRabNIlWm3M4kb0J5 8sscos2tb2Fp+JjDC3U9jTUryVU22kUr3ahBZLWAzQerkKsghLVX1giqc+kYNHHYHV ZTa3eh8m3iUhg== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Josh Poimboeuf , Steven Rostedt , Mathieu Desnoyers , Indu Bhagat , linux-toolchains@vger.kernel.org Subject: [PATCH 1/5] perf tools: Sync UAPI perf_event.h header Date: Tue, 17 Sep 2024 15:28:16 -0700 Message-ID: <20240917222820.197594-2-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.792.g87dc391469-goog In-Reply-To: <20240917222820.197594-1-namhyung@kernel.org> References: <20240917222820.197594-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To import defer_callchain changes. Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/lin= ux/perf_event.h index 4842c36fdf801996..a7f875eb29dd049a 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -460,7 +460,8 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* generate PERF_RECORD_CALLCHAIN_DEFERRED record= s */ + __reserved_1 : 25; =20 union { __u32 wakeup_events; /* wakeup every n events */ @@ -1217,6 +1218,23 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * TODO: do PERF_SAMPLE_{REGS,STACK}_USER also need deferral? + * + * struct { + * struct perf_event_header header; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1247,6 +1265,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, --=20 2.46.0.792.g87dc391469-goog From nobody Fri Nov 29 15:44:11 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 457AE185B52; Tue, 17 Sep 2024 22:28:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612115; cv=none; b=NqTjKZCKnwFVuyQL8UNGFkD1VXmibKe/S9Dh37wvYalwxJt8p5x35a342d0uA4edakmHPZPRKpo687y/fKuJt1Qbz5S/43w/fMoHytTufACJQ7fUj98VOIOlp3J2kgMihi/GBZgElfnwQQESYd5leXV6Idr5rEwNAla5oV5du5Q= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612115; c=relaxed/simple; bh=K7l6H1M0VsgZ0Q9hGEHVvdzK+luyXkqGAYtG/d8TRtw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=VwE7WSQHyQuuekzZL1gTBggg5KQWeme9Z36KROVLq4sOHp+VYCareXdBcqpgOzVOmEH5pHGe5aPL44NXq7Ldfn2817d6UsAzSZWlbGeh1hfyIX/8YOXQbtAe3Yofpybaoy6fK4OKiQtPqYyfrwKyAGqHUGxPTOmJdSm0sPsvofk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=JdrH6xjY; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="JdrH6xjY" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C5FEC4AF09; Tue, 17 Sep 2024 22:28:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726612114; bh=K7l6H1M0VsgZ0Q9hGEHVvdzK+luyXkqGAYtG/d8TRtw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JdrH6xjYP6ma2ao23XO+nRPVflgUiHDJRL5yOvCDRxqjIplzqB6NGb7Y29d8sdOLY NVYznZcEB0T/+BlmIDIBi2wpBD8XmJfzUalEjWdfPd3PxgFwn02MkWuP5ePPazWnzI jO6vD3hdTX2rLcW6WzZrTuMAbFNOc+O+NbKhMJD2aUIZDv3e7j8IkN7CFZ7ksR+hgM vpEI1oncnWC/D64aIME8hNBsASOYMFJOEVI86vAT9J8GA3Stj86F/9R1JoUZCPIIC7 t8Coutmi1Zrn3lpZAXU9hZXe1GR0SEoFmy02vtEFiFrWy/RJKRNTMgsKh3SDmNx0zZ D23uMs3ple29Q== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Josh Poimboeuf , Steven Rostedt , Mathieu Desnoyers , Indu Bhagat , linux-toolchains@vger.kernel.org Subject: [PATCH 2/5] perf tools: Minimal DEFERRED_CALLCHAIN support Date: Tue, 17 Sep 2024 15:28:17 -0700 Message-ID: <20240917222820.197594-3-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.792.g87dc391469-goog In-Reply-To: <20240917222820.197594-1-namhyung@kernel.org> References: <20240917222820.197594-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a new event type for deferred callchains and a new callback for the struct perf_tool. For now it doesn't actually handle the deferred callchains but it just marks the sample if it has the PERF_CONTEXT_ USER_DEFFERED in the callchain array. At least, perf report can dump the raw data with this change. Actually this requires the next commit to enable attr.defer_callchain, but if you already have a data file, it'll show the following result. $ perf report -D ... 0x5fe0@perf.data [0x40]: event: 22 . . ... raw event: size 64 bytes . 0000: 16 00 00 00 02 00 40 00 02 00 00 00 00 00 00 00 ......@.......= .. . 0010: 00 fe ff ff ff ff ff ff 4b d3 3f 25 45 7f 00 00 ........K.?%E.= .. . 0020: 21 03 00 00 21 03 00 00 43 02 12 ab 05 00 00 00 !...!...C.....= .. . 0030: 00 00 00 00 00 00 00 00 09 00 00 00 00 00 00 00 ..............= .. 0 24344920643 0x5fe0 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 801= /801: 0 ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 00007f45253fd34b : unhandled! Signed-off-by: Namhyung Kim --- tools/lib/perf/include/perf/event.h | 7 +++++++ tools/perf/util/event.c | 1 + tools/perf/util/evsel.c | 15 +++++++++++++++ tools/perf/util/machine.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + tools/perf/util/sample.h | 3 ++- tools/perf/util/session.c | 17 +++++++++++++++++ tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 3 ++- 9 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/p= erf/event.h index 37bb7771d9143466..f643a6a2b9fc2279 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -151,6 +151,12 @@ struct perf_record_switch { __u32 next_prev_tid; }; =20 +struct perf_record_callchain_deferred { + struct perf_event_header header; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; @@ -494,6 +500,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index aac96d5d19170091..8cdec373db44deac 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -58,6 +58,7 @@ static const char *perf_event__names[] =3D { [PERF_RECORD_CGROUP] =3D "CGROUP", [PERF_RECORD_TEXT_POKE] =3D "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] =3D "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] =3D "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] =3D "TRACING_DATA", diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5658f..701092d6b1b64124 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2676,6 +2676,18 @@ int evsel__parse_sample(struct evsel *evsel, union p= erf_event *event, data->data_src =3D PERF_MEM_DATA_SRC_NONE; data->vcpu =3D -1; =20 + if (event->header.type =3D=3D PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr =3D UINT64_MAX / sizeof(u64); + + data->callchain =3D (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type !=3D PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -2806,6 +2818,9 @@ int evsel__parse_sample(struct evsel *evsel, union pe= rf_event *event, if (data->callchain->nr > max_callchain_nr) return -EFAULT; sz =3D data->callchain->nr * sizeof(u64); + if (evsel->core.attr.defer_callchain && data->callchain->nr >=3D 1 && + data->callchain->ips[data->callchain->nr - 1] =3D=3D PERF_CONTEXT_US= ER_DEFERRED) + data->deferred_callchain =3D true; OVERFLOW_CHECK(array, sz, max_size); array =3D (void *)array + sz; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fad227b625d155c5..f367577c91ffa016 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2085,6 +2085,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode =3D PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode =3D PERF_RECORD_MISC_USER; break; default: diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/pe= rf_event_attr_fprintf.c index 59fbbba796974058..113845b35110262a 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -321,6 +321,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_even= t_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); =20 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsig= ned, false); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 70b2c3135555ec26..010659dc80f88652 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -108,7 +108,8 @@ struct perf_sample { u16 p_stage_cyc; u16 retire_lat; }; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbaf07bf6c5fb88c..1248a0317a2f164a 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -714,6 +714,7 @@ static perf_event__swap_op perf_event__swap_ops[] =3D { [PERF_RECORD_CGROUP] =3D perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] =3D perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] =3D perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] =3D perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] =3D perf_event__tracing_data_swap, @@ -1107,6 +1108,19 @@ static void dump_sample(struct evsel *evsel, union p= erf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } =20 +static void dump_deferred_callchain(struct evsel *evsel, union perf_event = *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->ip); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event =3D &event->read; @@ -1327,6 +1341,9 @@ static int machines__deliver_event(struct machines *m= achines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return tool->callchain_deferred(tool, event, sample, evsel, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 3b7f390f26eb427d..e78f16de912ed9e2 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -259,6 +259,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->read =3D process_event_sample_stub; tool->throttle =3D process_event_stub; tool->unthrottle =3D process_event_stub; + tool->callchain_deferred =3D process_event_sample_stub; tool->attr =3D process_event_synth_attr_stub; tool->event_update =3D process_event_synth_event_update_stub; tool->tracing_data =3D process_event_synth_tracing_data_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index db1c7642b0d1564d..9987bbde6d5e0565 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -42,7 +42,8 @@ enum show_feature_header { =20 struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, --=20 2.46.0.792.g87dc391469-goog From nobody Fri Nov 29 15:44:11 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DDBA1194139; Tue, 17 Sep 2024 22:28:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612119; cv=none; b=C0X6dNiDiFOPONES4aEBEqOMVvYw7qf09r6jMUlQkQVDKxGthXTMcwULKjZf5WCvVCMGwPus42UFE1wWrskJ6aNm4KMwXxaekHu7rr3bh0d2NLQER5T3KTANtHPnIn/8NuAgdzezqiAmXWQPOsC8rWC/LhzIXagYCw59kAQRxqA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612119; c=relaxed/simple; bh=AQQN1mnda2JM7GBlwwL4yRhYxSncFVa6aSSrMJcPoM4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=WLaenABJOH763pFqXZ9hLnQhH7adB6SWyZQxk5sRxUaSXkuTl/IiQwqv1QPyOMEmmYMqOSdRNxwlBgAq4/Si67wRIqWRaanSkpiKnuc9VFirP12jyAZ7p+YmR5U7zcjMmkJg7ZhtBB936ueUoiN80pOkqqomvg+9Xek1EBRscKw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=oSsfoT+t; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="oSsfoT+t" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3EEC9C4CED0; Tue, 17 Sep 2024 22:28:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726612118; bh=AQQN1mnda2JM7GBlwwL4yRhYxSncFVa6aSSrMJcPoM4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oSsfoT+tdqtfE5/R1Up6M/jwbXaK6OINoX/6NSpce/dnFQJpn54AylJAEriQgFkXS /JxPtiizo/xPif82+MxAoAwHd0HBAb1FMwQAyG+q2MLG10O4ROFK8I4qqSVG2FHN7Y Xppcy4dOeYKawdvuY/DADxeCoaZcybAhQza1xr30bnP+alGMNlbIpgkD28aLpYJ/vM G4tKzUlcce+9sqRMj/u5gNG1CDEf2TFXHaSPhP2sxUzX7G/j4e5O9rPiRLWDAFJlWL nzvMxBT3Jjs6LXRxu+jKiPUa4atKhdy+N59Tg8gaweaMwksqb+kbt0opSdyQ3euJNY FUsn9Xxm9yXrg== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Josh Poimboeuf , Steven Rostedt , Mathieu Desnoyers , Indu Bhagat , linux-toolchains@vger.kernel.org Subject: [PATCH 3/5] perf record: Enable defer_callchain for user callchains Date: Tue, 17 Sep 2024 15:28:18 -0700 Message-ID: <20240917222820.197594-4-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.792.g87dc391469-goog In-Reply-To: <20240917222820.197594-1-namhyung@kernel.org> References: <20240917222820.197594-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" And add the missing feature detection logic to clear the flag on old kernels. $ perf record -g -vv true ... ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CALLCHAIN|PERIOD read_format ID|LOST disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 sample_id_all 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 defer_callchain 1 ------------------------------------------------------------ sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off deferred callchain support Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 17 ++++++++++++++++- tools/perf/util/evsel.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 701092d6b1b64124..ad89644b32f23035 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -912,6 +912,14 @@ static void __evsel__config_callchain(struct evsel *ev= sel, struct record_opts *o } } =20 + if (param->record_mode =3D=3D CALLCHAIN_FP && !attr->exclude_callchain_us= er) { + /* + * Enable deferred callchains optimistically. It'll be switched + * off later if the kernel doesn't support it. + */ + attr->defer_callchain =3D 1; + } + if (function) { pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user =3D 1; @@ -2089,6 +2097,8 @@ static int __evsel__prepare_open(struct evsel *evsel,= struct perf_cpu_map *cpus, =20 static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain) + evsel->core.attr.defer_callchain =3D 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &=3D ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -2144,7 +2154,12 @@ bool evsel__detect_missing_features(struct evsel *ev= sel) * Must probe features in the order they were added to the * perf_event_attr interface. */ - if (!perf_missing_features.branch_counters && + if (!perf_missing_features.defer_callchain && + evsel->core.attr.defer_callchain) { + perf_missing_features.defer_callchain =3D true; + pr_debug2("switching off deferred callchain support\n"); + return true; + } else if (!perf_missing_features.branch_counters && (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { perf_missing_features.branch_counters =3D true; pr_debug2("switching off branch counters support\n"); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 15e745a9a798fa29..f0a1e1d789420a94 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -221,6 +221,7 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool defer_callchain; }; =20 extern struct perf_missing_features perf_missing_features; --=20 2.46.0.792.g87dc391469-goog From nobody Fri Nov 29 15:44:11 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 062BD192595; Tue, 17 Sep 2024 22:28:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612122; cv=none; b=MUNPocBET0ihleuRrI0cYEJhUhmcxKULG37Leal85kWJeBd/8iI7cpUcuYabpW4tTE+Ssn/TYnEaNEuW1+CQMpqN3QpDmB8x5adJQxkUv9YwAuZo461+fd20orL7ctj2wu1Cxt/aVEHu+1Rydagblma5L0u3lLiobehRFNTOJdU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612122; c=relaxed/simple; bh=SeC+e3eqXkp8WyiMLs27WV+ThL7sqre02t6tfARYUfs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=sXbJNhmDLAhhD1HGOiNNU3nUXT6hTJDn2ZeiwZhWWc4vLPNKQ6C8rETwrT/bgHXgqqf1VdMT3JsQqDN54rlaSRkqUevapAzSMgv5CcD4eHH4BYI/Y7Nrrkmh4Ne+ropT6XfbCYXA5ego1fsnvVQfrVd4boXvv+9yBH86sM5xL6o= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=G6WiAPxI; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="G6WiAPxI" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C9538C4CEC5; Tue, 17 Sep 2024 22:28:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726612121; bh=SeC+e3eqXkp8WyiMLs27WV+ThL7sqre02t6tfARYUfs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=G6WiAPxIVXCWNpgwBk6ltusH7SkEmjCodmdV90ECm1+40fyoFWEYRoeoKTZ7PUQsr XRH1PZ/PD9nPzKFBFjCEip5lWaPUIVeh1LZPRPE0imGGMY/DypjAyW7mtQBaFBsfY1 +B+gUBy1nrzch+moI0z3xmz0Sr+rMLUIDA5DMmN+H+S2dLITl5V6bHbTkr2OSKBO3q ekGq7GL9z9HApzvo6UlW9jdUJZRTBxpzRYJTTZ0f4NY0rupC7VTfSR7LhIEcCoq+FS oyca/YfQTNAuuTUacRhru6n0o4gN15xbk2htZO5Rh2isolutnWJsyzRQWVMJ+vNI87 ghd18HqZVIZBw== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Josh Poimboeuf , Steven Rostedt , Mathieu Desnoyers , Indu Bhagat , linux-toolchains@vger.kernel.org Subject: [PATCH 4/5] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Date: Tue, 17 Sep 2024 15:28:19 -0700 Message-ID: <20240917222820.197594-5-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.792.g87dc391469-goog In-Reply-To: <20240917222820.197594-1-namhyung@kernel.org> References: <20240917222820.197594-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Handle the deferred callchains in the script output. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) perf 801 [000] 18.031814: DEFERRED CALLCHAIN 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) Signed-off-by: Namhyung Kim --- tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a644787fa9e1dc25..311580e25f5b2008 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2540,6 +2540,93 @@ static int process_sample_event(const struct perf_to= ol *tool, return ret; } =20 +static int process_deferred_sample_event(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct perf_script *scr =3D container_of(tool, struct perf_script, tool); + struct perf_event_attr *attr =3D &evsel->core.attr; + struct evsel_script *es =3D evsel->priv; + unsigned int type =3D output_type(attr->type); + struct addr_location al; + FILE *fp =3D es->fp; + int ret =3D 0; + + if (output[type].fields =3D=3D 0) + return 0; + + /* Set thread to NULL to indicate addr_al and al are not initialized */ + addr_location__init(&al); + + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { + goto out_put; + } + + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; + } + last_timestamp =3D sample->time; + goto out_put; + } + + if (filter_cpu(sample)) + goto out_put; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + ret =3D -1; + goto out_put; + } + + if (al.filtered) + goto out_put; + + if (!show_event(sample, evsel, al.thread, &al, NULL)) + goto out_put; + + if (evswitch__discard(&scr->evswitch, evsel)) + goto out_put; + + perf_sample__fprintf_start(scr, sample, al.thread, evsel, + PERF_RECORD_CALLCHAIN_DEFERRED, fp); + fprintf(fp, "DEFERRED CALLCHAIN"); + + if (PRINT_FIELD(IP)) { + struct callchain_cursor *cursor =3D NULL; + + if (symbol_conf.use_callchain && sample->callchain) { + cursor =3D get_tls_callchain_cursor(); + if (thread__resolve_callchain(al.thread, cursor, evsel, + sample, NULL, NULL, + scripting_max_stack)) { + pr_info("cannot resolve deferred callchains\n"); + cursor =3D NULL; + } + } + + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts, + cursor, symbol_conf.bt_stop_list, fp); + } + + fprintf(fp, "\n"); + + if (verbose > 0) + fflush(fp); + +out_put: + addr_location__exit(&al); + return ret; +} + // Used when scr->per_event_dump is not set static struct evsel_script es_stdout; =20 @@ -4325,6 +4412,7 @@ int cmd_script(int argc, const char **argv) =20 perf_tool__init(&script.tool, !unsorted_dump); script.tool.sample =3D process_sample_event; + script.tool.callchain_deferred =3D process_deferred_sample_event; script.tool.mmap =3D perf_event__process_mmap; script.tool.mmap2 =3D perf_event__process_mmap2; script.tool.comm =3D perf_event__process_comm; @@ -4351,6 +4439,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; + script.tool.merge_deferred_callchains =3D false; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); --=20 2.46.0.792.g87dc391469-goog From nobody Fri Nov 29 15:44:11 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 92869192595; Tue, 17 Sep 2024 22:28:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612125; cv=none; b=a7r23+JX+2fCxAdPopjwRe3D0E6QQIPU6PWQdvOf5UY1COBk7NZijVstjMZN5T5I61NtRm2VDfYxrrsUbpWMzR2+S6DvLZU0/5KDCpBWd3AiUmg1yTQAkWIdzsuKWi+HRFg16rgEr3nHvV3wudeMbbq+mWzDTzsyiu3pttP5LZ0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1726612125; c=relaxed/simple; bh=/QG97BY6YAFfDemurMlh1FepNL4wqyS+Q72zKPdbUnQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=nSbOxouK7p0IFYMdcYl7jRr4iW/xIeuF2arWuxuAaGzbBmluJzQc3dQiUgHOUTFei3JxzcEGXbjLxI7u5h/059kDY12+NTUIdGS4M9P7qelsOyahHKRPxPHRkFz1hrCUWc5DKfXrgh7EWvnklHUGF6+K2yVM3upG59LuWHSBRoA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=EIaamC1c; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="EIaamC1c" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5EC33C4CED7; Tue, 17 Sep 2024 22:28:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1726612125; bh=/QG97BY6YAFfDemurMlh1FepNL4wqyS+Q72zKPdbUnQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=EIaamC1chnaEC/RA6WvZdIha6eIxRtcDP18CVMa3/P84tYGIK3OwD0pxFUIIEDaOm bugxC/NdJsaqlCNueXcPUSY8F+GgxUjD11JH1zmw31SpKnMh/82wxdGN13teb0v1sV ZFofmMhDsn9FioJ/xqQwRy9JG2A+DUs5iuDk9otdqnfQunFnIsaEzgGxZn8gn3R+54 TyMDNJnhJV95ff0eWtGaA02vgWpaDlHSMawtd5YbMRlszwdT94WcQW8EbAkovf6TzF vFQO036vJteM2hLZp91DxyiCtiyzASesz0KYDOZPxLI58s6t3O22P62wDtzJx+GHgx ae5Y6GeCeWXxg== From: Namhyung Kim To: Arnaldo Carvalho de Melo , Ian Rogers , Kan Liang Cc: Jiri Olsa , Adrian Hunter , Peter Zijlstra , Ingo Molnar , LKML , linux-perf-users@vger.kernel.org, Josh Poimboeuf , Steven Rostedt , Mathieu Desnoyers , Indu Bhagat , linux-toolchains@vger.kernel.org Subject: [PATCH 5/5] perf tools: Merge deferred user callchains Date: Tue, 17 Sep 2024 15:28:20 -0700 Message-ID: <20240917222820.197594-6-namhyung@kernel.org> X-Mailer: git-send-email 2.46.0.792.g87dc391469-goog In-Reply-To: <20240917222820.197594-1-namhyung@kernel.org> References: <20240917222820.197594-1-namhyung@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Save samples with deferred callchains in a separate list and deliver them after merging the user callchains. If users don't want to merge they can set tool->merge_deferred_callchains to false to prevent the behavior. With previous result, now perf script will show the merged callchains. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) ... The old output can be get using --no-merge-callchain option. Also perf report can get the user callchain entry at the end. $ perf report --no-children --percent-limit=3D0 --stdio -q -S __intel_pmu= _enable_all.isra.0 # symbol: __intel_pmu_enable_all.isra.0 0.00% perf [kernel.kallsyms] | ---__intel_pmu_enable_all.isra.0 perf_ctx_enable event_function remote_function generic_exec_single smp_call_function_single event_function_call perf_event_for_each_child _perf_ioctl perf_ioctl __x64_sys_ioctl do_syscall_64 entry_SYSCALL_64 __GI___ioctl Signed-off-by: Namhyung Kim --- tools/perf/Documentation/perf-script.txt | 5 ++ tools/perf/builtin-script.c | 5 +- tools/perf/util/callchain.c | 24 +++++++++ tools/perf/util/callchain.h | 3 ++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/session.c | 63 +++++++++++++++++++++++- tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 1 + 9 files changed, 102 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Document= ation/perf-script.txt index b72866ef270b9068..69f018b3d1993716 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -518,6 +518,11 @@ include::itrace.txt[] The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. =20 +--merge-callchains:: + Enable merging deferred user callchains if available. This is the + default behavior. If you want to see separate CALLCHAIN_DEFERRED + records for some reason, use --no-merge-callchains explicitly. + :GMEXAMPLECMD: script :GMEXAMPLESUBCMD: include::guest-files.txt[] diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 311580e25f5b2008..e3acf4979c36d902 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4031,6 +4031,7 @@ int cmd_script(int argc, const char **argv) bool header_only =3D false; bool script_started =3D false; bool unsorted_dump =3D false; + bool merge_deferred_callchains =3D true; char *rec_script_path =3D NULL; char *rep_script_path =3D NULL; struct perf_session *session; @@ -4184,6 +4185,8 @@ int cmd_script(int argc, const char **argv) "Guest code can be found in hypervisor process"), OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, + "Enable merge deferred user callchains"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -4439,7 +4442,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; - script.tool.merge_deferred_callchains =3D false; + script.tool.merge_deferred_callchains =3D merge_deferred_callchains; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 0c7564747a14e539..d1114491c3da5d0a 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1832,3 +1832,27 @@ int sample__for_each_callchain_node(struct thread *t= hread, struct evsel *evsel, } return 0; } + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig =3D sample_orig->callchain->nr - 1; + u64 nr_deferred =3D sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + callchain =3D calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain =3D=3D NULL) { + sample_orig->deferred_callchain =3D false; + return -ENOMEM; + } + + callchain->nr =3D nr_orig + nr_deferred; + /* copy except for the last PERF_CONTEXT_USER_DEFERRED */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)= ); + /* copy deferred use callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain =3D callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 86ed9e4d04f9ee7b..89785125ed25783d 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -317,4 +317,7 @@ int sample__for_each_callchain_node(struct thread *thre= ad, struct evsel *evsel, struct perf_sample *sample, int max_stack, bool symbols, callchain_iter_fn cb, void *data); =20 +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f14b7e6ff1dcc2cd..f27d8c4a22aadde9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -81,6 +81,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_= map *cpus, evlist->ctl_fd.ack =3D -1; evlist->ctl_fd.pos =3D -1; evlist->nr_br_cntr =3D -1; + INIT_LIST_HEAD(&evlist->deferred_samples); } =20 struct evlist *evlist__new(void) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bcc1c6984bb58a9d..c26379366554cf09 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -84,6 +84,7 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + struct list_head deferred_samples; }; =20 struct evsel_str_handler { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1248a0317a2f164a..e0a21b896b5784f3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1256,6 +1256,56 @@ static int evlist__deliver_sample(struct evlist *evl= ist, const struct perf_tool &sample->read.one, machine); } =20 +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +static int evlist__deliver_deferred_samples(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret =3D 0; + + if (!tool->merge_deferred_callchains) { + evsel =3D evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret =3D evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid !=3D orig_sample.tid) + continue; + + evsel =3D evlist__id2evsel(evlist, orig_sample.id); + sample__merge_deferred_callchain(&orig_sample, sample); + ret =3D evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de); + + if (ret) + break; + } + return ret; +} + static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, @@ -1284,6 +1334,16 @@ static int machines__deliver_event(struct machines *= machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de =3D malloc(sizeof(*de)); + + if (de =3D=3D NULL) + return -ENOMEM; + + de->event =3D event; + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machin= e); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1343,7 +1403,8 @@ static int machines__deliver_event(struct machines *m= achines, return tool->aux_output_hw_id(tool, event, sample, machine); case PERF_RECORD_CALLCHAIN_DEFERRED: dump_deferred_callchain(evsel, event, sample); - return tool->callchain_deferred(tool, event, sample, evsel, machine); + return evlist__deliver_deferred_samples(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index e78f16de912ed9e2..385043e06627d269 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -238,6 +238,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->cgroup_events =3D false; tool->no_warn =3D false; tool->show_feat_hdr =3D SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains =3D true; =20 tool->sample =3D process_event_sample_stub; tool->mmap =3D process_event_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 9987bbde6d5e0565..d06580478ab17a88 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -87,6 +87,7 @@ struct perf_tool { bool cgroup_events; bool no_warn; bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; =20 --=20 2.46.0.792.g87dc391469-goog