From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0FAF7EEDE; Fri, 18 Jul 2025 16:42:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; cv=none; b=V7J1/1nK9MwJH9WdKilYlKk+hfL2fYbVDw9OXtqIBcWtGoSuIr0WNK+tgxHYxBLcN4g6l1anBjybky3tPT9Lrs5/tttoiytqZPMkbXkwZ4/B3QbwgYi14BuKAOeNIrk4kci3mud9FhlRwyOWH/rVFUQJzHrGeSb8G+uKMj8IIGI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; c=relaxed/simple; bh=PlMed3FiOV4T6l6o16L8MmIemjqgKth6Lso3Lz/CSCM=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=avU8gkV2JRaNgC9MmJ3wecgQ6oWz2c+bCMQGTbgaW/mvJo1rNsEcGXe0QI9R6QGI9Q3RNu7QjikjKv96JWWzw3EEnrZA8ZN+0y/Q1JXwfwKfQGkbyj0kX7cSab8aR8zG2Ur7+5PJlgSePD6eR4AVnOL8KYjStIXHGFdF7t2Yu9s= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kpPeK7JH; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kpPeK7JH" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 95476C4CEED; Fri, 18 Jul 2025 16:42:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856979; bh=PlMed3FiOV4T6l6o16L8MmIemjqgKth6Lso3Lz/CSCM=; h=Date:From:To:Cc:Subject:References:From; b=kpPeK7JHJMkrK6cUZEgcxnAVqey9jEpYFdvgjwwMHRBKLzOljCgXPfKAEnkvZNhpM Mm8+2src/Zd8uKJT2p6vxmTUJC6zfj6ChjNbRsoK4bZ8Y6gHwq+gzs3ds3ooep/sn2 t6BCzEylWnibAsZQ0Kplc5GYcbWeYyt5Q2dbniabtAhJlUmgCHTY/lXCYw63UkKxiX b1+t6twac9ysuSrhHaRGxw6p11p0QiwK5ND9F5cEZvbmchLH2gyF8/ycu7MMhZANPe mc0i7JtKMoO5WEIzdGHKww45Rr64Zuk8sVE417Cj8tueC2Fu1nUZorPqGwZyQ9zVOr xJQkVpVaERyZw== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoB9-00000007JWK-1asR; Fri, 18 Jul 2025 12:43:23 -0400 Message-ID: <20250718164323.229516800@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:20 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James , Namhyung Kim , Alexei Starovoitov Subject: [PATCH v14 01/11] perf: Remove get_perf_callchain() init_nr argument References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Josh Poimboeuf The 'init_nr' argument has double duty: it's used to initialize both the number of contexts and the number of stack entries. That's confusing and the callers always pass zero anyway. Hard code the zero. Acked-by: Namhyung Kim Acked-by: Alexei Starovoitov Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- include/linux/perf_event.h | 2 +- kernel/bpf/stackmap.c | 4 ++-- kernel/events/callchain.c | 12 ++++++------ kernel/events/core.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ec9d96025683..54e0d31afcad 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1719,7 +1719,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_cal= lchain_entry); extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, st= ruct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, = struct pt_regs *regs); extern struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool us= er, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3615c06b7dfa..ec3a57a5fba1 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,7 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, str= uct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth =3D sysctl_perf_event_max_stack; =20 - trace =3D get_perf_callchain(regs, 0, kernel, user, max_depth, + trace =3D get_perf_callchain(regs, kernel, user, max_depth, false, false); =20 if (unlikely(!trace)) @@ -451,7 +451,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struc= t task_struct *task, else if (kernel && task) trace =3D get_callchain_entry_for_task(task, max_depth); else - trace =3D get_perf_callchain(regs, 0, kernel, user, max_depth, + trace =3D get_perf_callchain(regs, kernel, user, max_depth, crosstask, false); =20 if (unlikely(!trace) || trace->nr < skip) { diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 6c83ad674d01..b0f5bd228cd8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,7 +217,7 @@ static void fixup_uretprobe_trampoline_entries(struct p= erf_callchain_entry *entr } =20 struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool us= er, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) { struct perf_callchain_entry *entry; @@ -228,11 +228,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr,= bool kernel, bool user, if (!entry) return NULL; =20 - ctx.entry =3D entry; - ctx.max_stack =3D max_stack; - ctx.nr =3D entry->nr =3D init_nr; - ctx.contexts =3D 0; - ctx.contexts_maxed =3D false; + ctx.entry =3D entry; + ctx.max_stack =3D max_stack; + ctx.nr =3D entry->nr =3D 0; + ctx.contexts =3D 0; + ctx.contexts_maxed =3D false; =20 if (kernel && !user_mode(regs)) { if (add_mark) diff --git a/kernel/events/core.c b/kernel/events/core.c index 0db36b2b2448..b2a53cabcb17 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8178,7 +8178,7 @@ perf_callchain(struct perf_event *event, struct pt_re= gs *regs) if (!kernel && !user) return &__empty_callchain; =20 - callchain =3D get_perf_callchain(regs, 0, kernel, user, + callchain =3D get_perf_callchain(regs, kernel, user, max_stack, crosstask, true); return callchain ?: &__empty_callchain; } --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 24BAF2E6124; Fri, 18 Jul 2025 16:42:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; cv=none; b=AUr5vvjDoFzCSb6vvhYbxtDUn19maPQi+ktJe3HFRe0saBHq1AWWqWnBF8Vp74S3rY6vXnojkYWC6dyAR87zPeyfYb1uQLm+UtUtjfk8xZ7Ak0YpMbaaXsn+5msJrgD08L3qOosTpbq3rQvw3cLPHxolxd3YDoCdg2C4+M2F+wA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; c=relaxed/simple; bh=INPL5mykLyHh9BAddpKD33kEw5Zh1QT3Mw8WI6pEDxg=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=pnxD8Sbfy3UkjKj6+BhI7GUW0bMG9uabKiyB+lV8YGsfcpYM5bmwfBjzu4N39Gmx1+S0jm6YLzZxB4wEc/A50bcC56dlMKk4YwyKQnYOOWGPC+Y9oUHVdtlSpBG2jCZTbczoYkRS39WPwmZVZHQVBRyoRLmep86W3gBF00h6bRg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=e4mQoCg5; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="e4mQoCg5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9E490C4CEF5; Fri, 18 Jul 2025 16:42:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856979; bh=INPL5mykLyHh9BAddpKD33kEw5Zh1QT3Mw8WI6pEDxg=; h=Date:From:To:Cc:Subject:References:From; b=e4mQoCg5Tz9JFE4uV4vL2IjU1RigIFqVqZ4sJZO8H6VGogZEzTAIFy5jN65H1XejE oTqrFMqeIxHiuoPGmDVEEsZt8J/W8iFLxLm7FwIQkvSA5UxlhtPrgDRELQJ3UQcUea IDAcD5fyoT9TMo7rLvFdSr3ltWQ9riFoDHaLy6UBrcVGr7zZFK1fk8HrpOmLwH8ljq ijDJTTaZjSl89JddpAnDtIDJOZ1EMVGTFp02XmLG/0RnBM4eQvPC5XKmRFzUXnHI3n uaJhO4pLxQQZvm+KPbf1rNAkr19Ie9jo/dpoC9CQyHjv/5asbqP1NVvTD2w5tCtpTR AS3o6HnJ62jbg== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoB9-00000007JWo-2HKG; Fri, 18 Jul 2025 12:43:23 -0400 Message-ID: <20250718164323.399926416@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:21 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 02/11] perf: Have get_perf_callchain() return NULL if crosstask and user are set References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Josh Poimboeuf get_perf_callchain() doesn't support cross-task unwinding for user space stacks, have it return NULL if both the crosstask and user arguments are set. Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- kernel/events/callchain.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index b0f5bd228cd8..cd0e3fc7ed05 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -224,6 +224,10 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, = bool user, struct perf_callchain_entry_ctx ctx; int rctx, start_entry_idx; =20 + /* crosstask is not supported for user stacks */ + if (crosstask && user && !kernel) + return NULL; + entry =3D get_callchain_entry(&rctx); if (!entry) return NULL; @@ -240,7 +244,7 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, b= ool user, perf_callchain_kernel(&ctx, regs); } =20 - if (user) { + if (user && !crosstask) { if (!user_mode(regs)) { if (current->mm) regs =3D task_pt_regs(current); @@ -249,9 +253,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, b= ool user, } =20 if (regs) { - if (crosstask) - goto exit_put; - if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 @@ -261,7 +262,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, b= ool user, } } =20 -exit_put: put_callchain_entry(rctx); =20 return entry; --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 27A272E6D0C; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; cv=none; b=G3b4Y1t8UVntXSBdJo3n9nVHWW9O6ka424zhGB0IgcoAe0UzhRRCNhOdwSIC6zXE+t5ncSoOwRevyiNiAQDrrhj8639k4VyajhL78bKAPW0mE972/3ST4XV6GK+GD49lsxc5CSaYLexBOzh2reYwL9f5/hN1rPIV1/tZX48s6lA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; c=relaxed/simple; bh=Ah9wyXhO1XR0jWArWKELL4QxAEbvTR/z2UzGwouYFJc=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=eVe9kYlyPytzWK/YS7VaVZdXPBOveZuSasvmSZpju2pdrQII+xklCNnGlgMkk7Wj10Rf6Vo5a65nS7NzNqddPSJ4UUWWR/T5zMYJFsv8il8tWIFoQIWhWEYp4PSDex6/VEeWe0OoJYowGvaVGcAu+OJzYXuQ0ymYXxOfslChg8A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=E9dItfG7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="E9dItfG7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E4460C116C6; Fri, 18 Jul 2025 16:42:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=Ah9wyXhO1XR0jWArWKELL4QxAEbvTR/z2UzGwouYFJc=; h=Date:From:To:Cc:Subject:References:From; b=E9dItfG7Zjmn3cakdd9c6KJ/gnr/C6QrxdJLO86EyHn0cySLvoHsUJfUCN7dphlTV iHnvQTcAapRJrhcFp925w9zI+PxtjyxsVxJgjQLTB0QdMz2OFSMjRjhZixBeQ7Bhqe v3xzujBWVXnlqksC9oOY9bH0tVTlFv7o+rAXqMQ7sRhVmtivjZzYUN8a6QBEKWPihE kxKAvLYQmnBWzOgqooXuxugaVDKaxLOTyfctdlAGbqdZRg9ZKEITVy8pTmruJ7USMU SFwncqYZL2DLFRVhPMhvqjIRXyrtbmPyogCf3UeUAUyvyV+AsXCzHNuU8kEKIkarpv 8AYHhldvFY8Ow== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoB9-00000007JXI-2ySA; Fri, 18 Jul 2025 12:43:23 -0400 Message-ID: <20250718164323.562497415@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:22 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 03/11] perf: Use current->flags & PF_KTHREAD|PF_USER_WORKER instead of current->mm == NULL References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Steven Rostedt To determine if a task is a kernel thread or not, it is more reliable to use (current->flags & (PF_KTHREAD|PF_USER_WORKERi)) than to rely on current->mm being NULL. That is because some kernel tasks (io_uring helpers) may have a mm field. Link: https://lore.kernel.org/linux-trace-kernel/20250424163607.GE18306@noi= sy.programming.kicks-ass.net/ Link: https://lore.kernel.org/all/20250624130744.602c5b5f@batman.local.home/ Signed-off-by: Steven Rostedt (Google) --- Changes since v13: https://lore.kernel.org/20250708020050.410920799@kernel.= org - Missed one location that still only checked PF_KTHREAD kernel/events/callchain.c | 6 +++--- kernel/events/core.c | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index cd0e3fc7ed05..5982d18f169b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -246,10 +246,10 @@ get_perf_callchain(struct pt_regs *regs, bool kernel,= bool user, =20 if (user && !crosstask) { if (!user_mode(regs)) { - if (current->mm) - regs =3D task_pt_regs(current); - else + if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) regs =3D NULL; + else + regs =3D task_pt_regs(current); } =20 if (regs) { diff --git a/kernel/events/core.c b/kernel/events/core.c index b2a53cabcb17..1fa554e2666d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7414,7 +7414,7 @@ static void perf_sample_regs_user(struct perf_regs *r= egs_user, if (user_mode(regs)) { regs_user->abi =3D perf_reg_abi(current); regs_user->regs =3D regs; - } else if (!(current->flags & PF_KTHREAD)) { + } else if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { perf_get_regs_user(regs_user, regs); } else { regs_user->abi =3D PERF_SAMPLE_REGS_ABI_NONE; @@ -8054,7 +8054,7 @@ static u64 perf_virt_to_phys(u64 virt) * Try IRQ-safe get_user_page_fast_only first. * If failed, leave phys_addr as 0. */ - if (current->mm !=3D NULL) { + if (!(current->flags & (PF_KTHREAD | PF_USER_WORKER))) { struct page *p; =20 pagefault_disable(); --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6B27D2E6D3A; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; cv=none; b=dbq/kXEX44z4mmLhFx0HjM60+olsEmG4vk2C0CTaHSAfucP1IfTKIeDlWtjjk2IPMYXwA0wT7OLbgqjQxwJrTSk6I+MyLWn9H3YicI9UvPGo7p1IeJrenNCEgqgq1SaTyA9PrL828FUQ6OzQ4FupSxze4pCms1eO2dLNVxIpCFo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; c=relaxed/simple; bh=mI3D1M5ZCCJZqFgS/KQwGydDItxmqMt6XzVMmgcg8Qs=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=gQVv/J72a+s1Vl5Jv5K+NuwYT8WstkJ54FrKSNE6yLlzhQL6Cc4gqXomYo2DeDh2V8hTGh0BUXEzJCEFL0i0C6rfyht1gsqrFiQf1sVoeGfeBoFMY1mZEA0p+PzGNIqtrmYi8fKYy6sS3/EzWXoPyXmj8MdN4D6+c+Y4WM0l1bU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=bHrJK+aM; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="bHrJK+aM" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 06735C116D0; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=mI3D1M5ZCCJZqFgS/KQwGydDItxmqMt6XzVMmgcg8Qs=; h=Date:From:To:Cc:Subject:References:From; b=bHrJK+aMHBWvVtTh8THZE+njuw6VGsCiA1zSnb7U4/l1bGvYvxWnjuprC/LCYCcLj /ZLGjPkO2+5EaFtPfFMNMcDEZVi8IJERtBAjgxn8f7SyeYWbzZL04YhnpJdxaMPT+a jiJYU7yFtcWwl+h73eSMqrsQaXTSJd7L4nAASvFCaM2JEyH1SsCvgHHi3qMS0Q1G+C zy89ifqPj8q7bjgklInZP59WcayHV47csxiTz+SeKZE8bqRH+6ru5w1PGxxHrwXF4l ZDX0XnOGHI3qFTHwL/9QMQ5497MdpA145QYl3Dc52ncD+WLTAxVQ3ZsnkoUu9v0CES +i/cw4r60kM3Q== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoB9-00000007JXm-3luk; Fri, 18 Jul 2025 12:43:23 -0400 Message-ID: <20250718164323.749229679@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:23 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 04/11] perf: Simplify get_perf_callchain() user logic References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Josh Poimboeuf Simplify the get_perf_callchain() user logic a bit. task_pt_regs() should never be NULL. Acked-by: Namhyung Kim Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- kernel/events/callchain.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 5982d18f169b..808c0d7a31fa 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -247,21 +247,19 @@ get_perf_callchain(struct pt_regs *regs, bool kernel,= bool user, if (user && !crosstask) { if (!user_mode(regs)) { if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) - regs =3D NULL; - else - regs =3D task_pt_regs(current); + goto exit_put; + regs =3D task_pt_regs(current); } =20 - if (regs) { - if (add_mark) - perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); + if (add_mark) + perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 - start_entry_idx =3D entry->nr; - perf_callchain_user(&ctx, regs); - fixup_uretprobe_trampoline_entries(entry, start_entry_idx); - } + start_entry_idx =3D entry->nr; + perf_callchain_user(&ctx, regs); + fixup_uretprobe_trampoline_entries(entry, start_entry_idx); } =20 +exit_put: put_callchain_entry(rctx); =20 return entry; --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D3F8C2E7197; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; cv=none; b=nu+NeczR1Vjd/5Ziu986rVpfXfYByKqIczi6LZBZbM4+hStK5ewb+LMNwu4HoaHCjxDepirKK3T+xidJgNCBvUSatNyHEBezhDTkGE6PIM9lftcaMlFqPwj+g37QbfzaiskWJ+2EZFE/yg4XFvU5X3uhb4IKvMM/dmKbDSfZD7s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856980; c=relaxed/simple; bh=PrLV5/6t/jDNDZv13dJ+tk4FMXWbDNxrkHMrhIU764A=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=RO/iC9LGvPlArsPbmeIYnnl5eo9R9qse3T+Gq3NMNCsgizZdhxmVAEZ155BEOAB/towDI9xVioQmblJpMkfqsO0eXb0Ub2IZIlh81xPzb5uplEaeVPdmlVSmmVLGeVs3sH2VIjZN3qzztFhNEUAVxQGwD6ZvsJX5UJf8QM64ki0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=TSV60Ani; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="TSV60Ani" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 59305C4CEFE; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=PrLV5/6t/jDNDZv13dJ+tk4FMXWbDNxrkHMrhIU764A=; h=Date:From:To:Cc:Subject:References:From; b=TSV60Ani3MuKOzsA0ZrC05OJmgx3B+VpNfAUYCfhwT2YZ4q4C4oR6Ql7bL0nGxS47 AO1iLuFT51LYwUsrwBjj3G0e/k4oo61b3w4ZWluEJ36kVTs6yyhce9LrYhI+TgHa7D VaFL4XD9YyMiQ6T5SRYq3Wj/aUOLWKIXABGzJBh387N2vRSIsjC/c5DVjSwo6Cb+zd a+UYO9/GeuPcV+daYGTCsnibfIB/MWF2NhQMJjI+oDkBTsewGpdjVKK7X5iDCT+vJl eLHPy3iX3ZIyc5AsJDF0UnOw7HCOjHh+X6XyMj5hUINmbktHaCfmRgZQMGKz0OnRk7 /eGc61OOCxN/g== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007JYH-0HOh; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164323.918565553@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:24 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 05/11] perf: Skip user unwind if the task is a kernel thread References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Josh Poimboeuf If the task is not a user thread, there's no user stack to unwind. Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- kernel/events/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1fa554e2666d..bd0a33f389d2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8166,7 +8166,8 @@ struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel =3D !event->attr.exclude_callchain_kernel; - bool user =3D !event->attr.exclude_callchain_user; + bool user =3D !event->attr.exclude_callchain_user && + !(current->flags & (PF_KTHREAD | PF_USER_WORKER)); /* Disallow cross-task user callchains. */ bool crosstask =3D event->ctx->task && event->ctx->task !=3D current; const u32 max_stack =3D event->attr.sample_max_stack; --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 427E02E765B; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=B4D07a+5dCb4p/rHNM03ya3mVtLScXYc6nsR6F/XGsjv/7dM/qqngR3YCis5Sj7KoFVRdn2fqwFwXIMALKky4JRH1kWHTmG8pxtONs0mPimBiP86QiBqd0Hv4+FKW9WN8xEUsLv+ua3ojzhZiksU4hq7KokHiselrtkZjp9hQKg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=mE52kCncTgsKmMwqN/TwNKzPK4I5m5S0CzJSZP6iaZs=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=KMTsvHoRBV1RfvXitVykCVGuznVYi4xDtLI5Obml36Wp6CxNcnHdxS+ATXaRdMOO1c1RZmBBN3DPW2yU6JHdgPtqes9D8Wxpn2+SBRmpIoS4vNlhPvhwsbCr/an2RW/xJxLDF4QuVH80Gia5N/zpGU6qYdQWTlSx+InLatDNG0A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=F1yoeyo3; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="F1yoeyo3" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6EA9EC4CEFC; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=mE52kCncTgsKmMwqN/TwNKzPK4I5m5S0CzJSZP6iaZs=; h=Date:From:To:Cc:Subject:References:From; b=F1yoeyo3sZLIrk3rVbPuw6C0MfHzNFjnFuDKJC+pW4ElDXNwpSHQVHvjzGkuxQTca f+WIxmG8CaVfgmYd4d4n5OIvSWPHYkaZWXe711rne/x89zGnUDeC1PZNlGb3cswt4i F6wLSr2g5qqCAivZQbop8fYxZf850EVBqw0tcT9Cozcz9utdBeIJKvsn5oeUlCQhrE C/LaJoH1q94D2KsNw+ICe7hUQJa2ze1oYgz7Zy3H+3cQuoNL4YsfZz8/v6yEVu4lAp Zf4jc4bUXsFWZBFabPn1AWrnQyBTvvrBMhdLjbNej93kSJ9MrURKmWbVNZaWB2Hn6Y a8SIMiIp4VgxQ== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007JYl-0yLA; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164324.087851036@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:25 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 06/11] perf: Support deferred user callchains References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Josh Poimboeuf Use the new unwind_deferred_trace() interface (if available) to defer unwinds to task context. This will allow the use of .sframe (when it becomes available) and also prevents duplicate userspace unwinds. As the struct unwind_stacktrace has its entries as "unsigned long", and it is used to copy directly into struct perf_callchain_entry which its "ip" field is defined as u64, only allow deferred callchains for 64bit architect= ures. Suggested-by: Peter Zijlstra Co-developed-by: Steven Rostedt (Google) Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- Changse since v13: https://lore.kernel.org/20250708020050.928524258@kernel.= org - Rename unwind_deferred_trace() to unwind_user_faultable(). The function's name was changed, but wasn't caught because the code was moved around in the next patch which also did the rename. Compiling this patch as a standalone caught the issue. - Need to copy the trace.entries[] one a at a time as the perf entry in the ring buffer has 64 bit entries, but trace.entries[] are size long. include/linux/perf_event.h | 7 +- include/uapi/linux/perf_event.h | 19 +++- kernel/bpf/stackmap.c | 4 +- kernel/events/callchain.c | 11 +- kernel/events/core.c | 154 +++++++++++++++++++++++++- tools/include/uapi/linux/perf_event.h | 19 +++- 6 files changed, 206 insertions(+), 8 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 54e0d31afcad..c7d474391e51 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -53,6 +53,7 @@ #include #include #include +#include =20 #include =20 @@ -880,6 +881,10 @@ struct perf_event { struct callback_head pending_task; unsigned int pending_work; =20 + unsigned int pending_unwind_callback; + struct callback_head pending_unwind_work; + struct rcuwait pending_unwind_wait; + atomic_t event_limit; =20 /* address range filters */ @@ -1720,7 +1725,7 @@ extern void perf_callchain_user(struct perf_callchain= _entry_ctx *entry, struct p extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, = struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark); + u32 max_stack, bool crosstask, bool add_mark, bool defer_user); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_even= t.h index 78a362b80027..184740d1e79d 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -463,7 +463,8 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* generate PERF_RECORD_CALLCHAIN_DEFERRED record= s */ + __reserved_1 : 25; =20 union { __u32 wakeup_events; /* wake up every n events */ @@ -1239,6 +1240,21 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1269,6 +1285,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index ec3a57a5fba1..339f7cbbcf36 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -315,7 +315,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, str= uct bpf_map *, map, max_depth =3D sysctl_perf_event_max_stack; =20 trace =3D get_perf_callchain(regs, kernel, user, max_depth, - false, false); + false, false, false); =20 if (unlikely(!trace)) /* couldn't fetch the stack trace */ @@ -452,7 +452,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struc= t task_struct *task, trace =3D get_callchain_entry_for_task(task, max_depth); else trace =3D get_perf_callchain(regs, kernel, user, max_depth, - crosstask, false); + crosstask, false, false); =20 if (unlikely(!trace) || trace->nr < skip) { if (may_fault) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 808c0d7a31fa..d0e0da66a164 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -218,7 +218,7 @@ static void fixup_uretprobe_trampoline_entries(struct p= erf_callchain_entry *entr =20 struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark) + u32 max_stack, bool crosstask, bool add_mark, bool defer_user) { struct perf_callchain_entry *entry; struct perf_callchain_entry_ctx ctx; @@ -251,6 +251,15 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, = bool user, regs =3D task_pt_regs(current); } =20 + if (defer_user) { + /* + * Foretell the coming of PERF_RECORD_CALLCHAIN_DEFERRED + * which can be stitched to this one. + */ + perf_callchain_store_context(&ctx, PERF_CONTEXT_USER_DEFERRED); + goto exit_put; + } + if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 diff --git a/kernel/events/core.c b/kernel/events/core.c index bd0a33f389d2..cab5fa238684 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5582,6 +5582,93 @@ static bool exclusive_event_installable(struct perf_= event *event, return true; } =20 +static void perf_pending_unwind_sync(struct perf_event *event) +{ + might_sleep(); + + if (!event->pending_unwind_callback) + return; + + /* + * If the task is queued to the current task's queue, we + * obviously can't wait for it to complete. Simply cancel it. + */ + if (task_work_cancel(current, &event->pending_unwind_work)) { + event->pending_unwind_callback =3D 0; + local_dec(&event->ctx->nr_no_switch_fast); + return; + } + + /* + * All accesses related to the event are within the same RCU section in + * perf_event_callchain_deferred(). The RCU grace period before the + * event is freed will make sure all those accesses are complete by then. + */ + rcuwait_wait_event(&event->pending_unwind_wait, !event->pending_unwind_ca= llback, TASK_UNINTERRUPTIBLE); +} + +struct perf_callchain_deferred_event { + struct perf_event_header header; + u64 nr; + u64 ips[]; +}; + +static void perf_event_callchain_deferred(struct callback_head *work) +{ + struct perf_event *event =3D container_of(work, struct perf_event, pendin= g_unwind_work); + struct perf_callchain_deferred_event deferred_event; + u64 callchain_context =3D PERF_CONTEXT_USER; + struct unwind_stacktrace trace; + struct perf_output_handle handle; + struct perf_sample_data data; + u64 nr; + + if (!event->pending_unwind_callback) + return; + + if (unwind_user_faultable(&trace) < 0) + goto out; + + /* + * All accesses to the event must belong to the same implicit RCU + * read-side critical section as the ->pending_unwind_callback reset. + * See comment in perf_pending_unwind_sync(). + */ + guard(rcu)(); + + if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) + goto out; + + nr =3D trace.nr + 1 ; /* '+1' =3D=3D callchain_context */ + + deferred_event.header.type =3D PERF_RECORD_CALLCHAIN_DEFERRED; + deferred_event.header.misc =3D PERF_RECORD_MISC_USER; + deferred_event.header.size =3D sizeof(deferred_event) + (nr * sizeof(u64)= ); + + deferred_event.nr =3D nr; + + perf_event_header__init_id(&deferred_event.header, &data, event); + + if (perf_output_begin(&handle, &data, event, deferred_event.header.size)) + goto out; + + perf_output_put(&handle, deferred_event); + perf_output_put(&handle, callchain_context); + /* trace.entries[] are not guaranteed to be 64bit */ + for (int i =3D 0; i < trace.nr; i++) { + u64 entry =3D trace.entries[i]; + perf_output_put(&handle, entry); + } + perf_event__output_id_sample(event, &handle, &data); + + perf_output_end(&handle); + +out: + event->pending_unwind_callback =3D 0; + local_dec(&event->ctx->nr_no_switch_fast); + rcuwait_wake_up(&event->pending_unwind_wait); +} + static void perf_free_addr_filters(struct perf_event *event); =20 /* vs perf_event_alloc() error */ @@ -5649,6 +5736,7 @@ static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); irq_work_sync(&event->pending_disable_irq); + perf_pending_unwind_sync(event); =20 unaccount_event(event); =20 @@ -8162,6 +8250,46 @@ static u64 perf_get_page_size(unsigned long addr) =20 static struct perf_callchain_entry __empty_callchain =3D { .nr =3D 0, }; =20 +/* + * Returns: +* > 0 : if already queued. + * 0 : if it performed the queuing + * < 0 : if it did not get queued. + */ +static int deferred_request(struct perf_event *event) +{ + struct callback_head *work =3D &event->pending_unwind_work; + int pending; + int ret; + + /* Only defer for task events */ + if (!event->ctx->task) + return -EINVAL; + + if ((current->flags & (PF_KTHREAD | PF_USER_WORKER)) || + !user_mode(task_pt_regs(current))) + return -EINVAL; + + guard(irqsave)(); + + /* callback already pending? */ + pending =3D READ_ONCE(event->pending_unwind_callback); + if (pending) + return 1; + + /* Claim the work unless an NMI just now swooped in to do so. */ + if (!try_cmpxchg(&event->pending_unwind_callback, &pending, 1)) + return 1; + + /* The work has been claimed, now schedule it. */ + ret =3D task_work_add(current, work, TWA_RESUME); + if (WARN_ON_ONCE(ret)) { + WRITE_ONCE(event->pending_unwind_callback, 0); + return ret; + } + return 0; +} + struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { @@ -8172,6 +8300,9 @@ perf_callchain(struct perf_event *event, struct pt_re= gs *regs) bool crosstask =3D event->ctx->task && event->ctx->task !=3D current; const u32 max_stack =3D event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + /* perf currently only supports deferred in 64bit */ + bool defer_user =3D IS_ENABLED(CONFIG_UNWIND_USER) && user && + event->attr.defer_callchain; =20 if (!current->mm) user =3D false; @@ -8179,8 +8310,21 @@ perf_callchain(struct perf_event *event, struct pt_r= egs *regs) if (!kernel && !user) return &__empty_callchain; =20 - callchain =3D get_perf_callchain(regs, kernel, user, - max_stack, crosstask, true); + /* Disallow cross-task callchains. */ + if (event->ctx->task && event->ctx->task !=3D current) + return &__empty_callchain; + + if (defer_user) { + int ret =3D deferred_request(event); + if (!ret) + local_inc(&event->ctx->nr_no_switch_fast); + else if (ret < 0) + defer_user =3D false; + } + + callchain =3D get_perf_callchain(regs, kernel, user, max_stack, + crosstask, true, defer_user); + return callchain ?: &__empty_callchain; } =20 @@ -12850,6 +12994,8 @@ perf_event_alloc(struct perf_event_attr *attr, int = cpu, event->pending_disable_irq =3D IRQ_WORK_INIT_HARD(perf_pending_disable); init_task_work(&event->pending_task, perf_pending_task); =20 + rcuwait_init(&event->pending_unwind_wait); + mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); =20 @@ -13018,6 +13164,10 @@ perf_event_alloc(struct perf_event_attr *attr, int= cpu, if (err) return ERR_PTR(err); =20 + if (event->attr.defer_callchain) + init_task_work(&event->pending_unwind_work, + perf_event_callchain_deferred); + /* symmetric to unaccount_event() in _free_event() */ account_event(event); =20 diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/lin= ux/perf_event.h index 78a362b80027..184740d1e79d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -463,7 +463,8 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* generate PERF_RECORD_CALLCHAIN_DEFERRED record= s */ + __reserved_1 : 25; =20 union { __u32 wakeup_events; /* wake up every n events */ @@ -1239,6 +1240,21 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1269,6 +1285,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E23522E7623; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=LoxmhdwlhNvHQIRcyVvIGpkeRNa2a+uOBf9xEAEn3w5OlqzOJYjo2Zrg80FIYdUnFCrW+9iZpCH/YwxQlfOTtK4hae3KiO3a4XFlYMPE/S0HbSmIxgmDd92oVK+UxSIiLrdpAkhp78u5/PWXzIzRyHjBZIzhkbrzLI7eWbjt77g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=b1qrpapeXITDwms1r8xfIPMHPQW1hHx33mNis6tA2qs=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=dzqdgvSngbf6BrjCWQM9bVehs+5LGqfc34PJjLsgzWCouv9URXnvMwk0FfJUcEfbdt4m0CMFpx0+UhDOWgckmzw0nD0+6o7E7pf4yx7y51ccXrRcRPgeVkxuN+a1FsU7RvjsMAiiCT3MrGsHcS0heHkS132K11Uey6SUH5xXV2Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=XU/L6LYc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="XU/L6LYc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7E0B0C4CEFB; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=b1qrpapeXITDwms1r8xfIPMHPQW1hHx33mNis6tA2qs=; h=Date:From:To:Cc:Subject:References:From; b=XU/L6LYcjm7hJjQX4M+5JKhGUbLD0KrzlfsFb+7J5Z9uJIqOwNda7rKfnU4ifYUzY xbG7vU5xXaUBgjDX0DMupA/vB/V8wKvMtLW6uUPCzK3cab++kLM0gZuQYCvVld3wiA e/Az1cDHOq769BSZ6xHj3TH/7QOTqqd0tLb/56R6Rug8gx4kvkodmOttC1+iYjXzf0 g41K9HdEZxP4uNvKNJc+HCqxNq+GQzF3A2wMUKu3Pk73J5xdMLGqDrlYaRytKgkjdc BUtAzJTt12Lio6FBvqATTtwxdj9lMxsPowanvYiHD2RccWppc9KlBTIw9kL/dU6I8W xzAng1tVJp7xg== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007JZF-1gua; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164324.253816529@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:26 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 07/11] perf: Support deferred user callchains for per CPU events References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Steven Rostedt The deferred unwinder works fine for task events (events that trace only a specific task), as it can use a task_work from an interrupt or NMI and when the task goes back to user space it will call the event's callback to do the deferred unwinding. But for per CPU events things are not so simple. When a per CPU event wants a deferred unwinding to occur, it cannot simply use a task_work as there's a many to many relationship. If the task migrates and another task is scheduled in where the per CPU event wants a deferred unwinding to occur on that task as well, and the task that migrated to another CPU has that CPU's event want to unwind it too, each CPU may need unwinding from more than one task, and each task may have requests from many CPUs. To solve this, when a per CPU event is created that has defer_callchain attribute set, it will do a lookup from a global list (unwind_deferred_list), for a perf_unwind_deferred descriptor that has the id that matches the PID of the current task's group_leader. If it is not found, then it will create one and add it to the global list. This descriptor contains an array of all possible CPUs, where each element is a perf_unwind_cpu descriptor. The perf_unwind_cpu descriptor has a list of all the per CPU events that is tracing the matching CPU that corresponds to its index in the array, where the events belong to a task that has the same group_leader. It also has a processing bit and rcuwait to handle removal. For each occupied perf_unwind_cpu descriptor in the array, the perf_deferred_unwind descriptor increments its nr_cpu_events. When a perf_unwind_cpu descriptor is empty, the nr_cpu_events is decremented. This is used to know when to free the perf_deferred_unwind descriptor, as when it becomes empty, it is no longer referenced. Finally, the perf_deferred_unwind descriptor has an id that holds the PID of the group_leader for the tasks that the events were created by. When a second (or more) per CPU event is created where the perf_deferred_unwind descriptor is already created, it just adds itself to the perf_unwind_cpu array of that descriptor. Updating the necessary counter. This is used to map different per CPU events to each other based on their group leader PID. Each of these perf_deferred_unwind descriptors have a unwind_work that registers with the deferred unwind infrastructure via unwind_deferred_init(), where it also registers a callback to perf_event_deferred_cpu(). Now when a per CPU event requests a deferred unwinding, it calls unwind_deferred_request() with the associated perf_deferred_unwind descriptor. It is expected that the program that uses this has events on all CPUs, as the deferred trace may not be called on the CPU event that requested it. That is, the task may migrate and its user stack trace will be recorded on the CPU event of the CPU that it exits back to user space on. Signed-off-by: Steven Rostedt (Google) --- Changes since v13: https://lore.kernel.org/20250708020051.098865419@kernel.= org - Added back the cookie field in perf_callchain_deferred_event structure (Note, it was a timestamp before) (Namhyung Kim) - Add the cookie to the comment explaining perf_callchain_deferred_event. - Fixed deferred_unwind_request() to return 1 if the request was already queued or was already executed to not incorrectly increment nr_no_switch_fast. include/linux/perf_event.h | 4 + include/uapi/linux/perf_event.h | 1 + kernel/events/core.c | 319 +++++++++++++++++++++++++++++--- 3 files changed, 296 insertions(+), 28 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c7d474391e51..546a7f81be96 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -733,6 +733,7 @@ struct swevent_hlist { struct bpf_prog; struct perf_cgroup; struct perf_buffer; +struct perf_unwind_deferred; =20 struct pmu_event_list { raw_spinlock_t lock; @@ -885,6 +886,9 @@ struct perf_event { struct callback_head pending_unwind_work; struct rcuwait pending_unwind_wait; =20 + struct perf_unwind_deferred *unwind_deferred; + struct list_head unwind_list; + atomic_t event_limit; =20 /* address range filters */ diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_even= t.h index 184740d1e79d..20b8f890113b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1248,6 +1248,7 @@ enum perf_event_type { * * struct { * struct perf_event_header header; + * u64 cookie; * u64 nr; * u64 ips[nr]; * struct sample_id sample_id; diff --git a/kernel/events/core.c b/kernel/events/core.c index cab5fa238684..b8ef99deab71 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5582,10 +5582,192 @@ static bool exclusive_event_installable(struct per= f_event *event, return true; } =20 +/* Holds a list of per CPU events that registered for deferred unwinding */ +struct perf_unwind_cpu { + struct list_head list; + struct rcuwait pending_unwind_wait; + int processing; +}; + +struct perf_unwind_deferred { + struct list_head list; + struct unwind_work unwind_work; + struct perf_unwind_cpu __rcu *cpu_events; + struct rcu_head rcu_head; + int nr_cpu_events; + int id; +}; + +static DEFINE_MUTEX(unwind_deferred_mutex); +static LIST_HEAD(unwind_deferred_list); + +static void perf_event_deferred_cpu(struct unwind_work *work, + struct unwind_stacktrace *trace, u64 cookie); + +/* + * Add a per CPU event. + * + * The deferred callstack can happen on a different CPU than what was + * requested. If one CPU event requests a deferred callstack, but the + * tasks migrates, it will execute on a different CPU and save the + * stack trace to that CPU event. + * + * In order to map all the CPU events with the same application, + * use the current->gorup_leader->pid as the identifier of what + * events share the same program. + * + * A perf_unwind_deferred descriptor is created for each unique + * group_leader pid, and all the events that have the same group_leader + * pid will be linked to the same deferred descriptor. + * + * If there's no descriptor that matches the current group_leader pid, + * one will be created. + */ +static int perf_add_unwind_deferred(struct perf_event *event) +{ + struct perf_unwind_deferred *defer; + struct perf_unwind_cpu *cpu_events; + int id =3D current->group_leader->pid; + bool found =3D false; + int ret =3D 0; + + if (event->cpu < 0) + return -EINVAL; + + guard(mutex)(&unwind_deferred_mutex); + + list_for_each_entry(defer, &unwind_deferred_list, list) { + if (defer->id =3D=3D id) { + found =3D true; + break; + } + } + + if (!found) { + defer =3D kzalloc(sizeof(*defer), GFP_KERNEL); + if (!defer) + return -ENOMEM; + list_add(&defer->list, &unwind_deferred_list); + defer->id =3D id; + } + + /* + * The deferred desciptor has an array for every CPU. + * Each entry in this array is a link list of all the CPU + * events for the corresponding CPU. This is a quick way to + * find the associated event for a given CPU in + * perf_event_deferred_cpu(). + */ + if (!defer->nr_cpu_events) { + cpu_events =3D kcalloc(num_possible_cpus(), + sizeof(*cpu_events), + GFP_KERNEL); + if (!cpu_events) { + ret =3D -ENOMEM; + goto free; + } + for (int cpu =3D 0; cpu < num_possible_cpus(); cpu++) { + rcuwait_init(&cpu_events[cpu].pending_unwind_wait); + INIT_LIST_HEAD(&cpu_events[cpu].list); + } + + rcu_assign_pointer(defer->cpu_events, cpu_events); + + ret =3D unwind_deferred_init(&defer->unwind_work, + perf_event_deferred_cpu); + if (ret) + goto free; + } + cpu_events =3D rcu_dereference_protected(defer->cpu_events, + lockdep_is_held(&unwind_deferred_mutex)); + + /* + * The defer->nr_cpu_events is the count of the number + * of non-empty lists in the cpu_events array. If the list + * being added to is already occupied, the nr_cpu_events does + * not need to get incremented. + */ + if (list_empty(&cpu_events[event->cpu].list)) + defer->nr_cpu_events++; + list_add_tail_rcu(&event->unwind_list, &cpu_events[event->cpu].list); + + event->unwind_deferred =3D defer; + return 0; +free: + if (found) + return ret; + + list_del(&defer->list); + kfree(cpu_events); + kfree(defer); + return ret; +} + +static void free_unwind_deferred_rcu(struct rcu_head *head) +{ + struct perf_unwind_cpu *cpu_events; + struct perf_unwind_deferred *defer =3D + container_of(head, struct perf_unwind_deferred, rcu_head); + + WARN_ON_ONCE(defer->nr_cpu_events); + /* + * This is called by call_rcu() and there are no more + * references to cpu_events. + */ + cpu_events =3D rcu_dereference_protected(defer->cpu_events, true); + kfree(cpu_events); + kfree(defer); +} + +static void perf_remove_unwind_deferred(struct perf_event *event) +{ + struct perf_unwind_deferred *defer =3D event->unwind_deferred; + struct perf_unwind_cpu *cpu_events, *cpu_unwind; + + if (!defer) + return; + + guard(mutex)(&unwind_deferred_mutex); + list_del_rcu(&event->unwind_list); + + cpu_events =3D rcu_dereference_protected(defer->cpu_events, + lockdep_is_held(&unwind_deferred_mutex)); + cpu_unwind =3D &cpu_events[event->cpu]; + + if (list_empty(&cpu_unwind->list)) { + defer->nr_cpu_events--; + if (!defer->nr_cpu_events) + unwind_deferred_cancel(&defer->unwind_work); + } + + event->unwind_deferred =3D NULL; + + /* + * Make sure perf_event_deferred_cpu() is done with this event. + * That function will set cpu_unwind->processing and then + * call smp_mb() before iterating the list of its events. + * If the event's unwind_deferred is NULL, it will be skipped. + * The smp_mb() in that function matches the mb() in + * rcuwait_wait_event(). + */ + rcuwait_wait_event(&cpu_unwind->pending_unwind_wait, + !cpu_unwind->processing, TASK_UNINTERRUPTIBLE); + + /* Is this still being used by other per CPU events? */ + if (defer->nr_cpu_events) + return; + + list_del(&defer->list); + /* The defer->cpu_events is protected by RCU */ + call_rcu(&defer->rcu_head, free_unwind_deferred_rcu); +} + static void perf_pending_unwind_sync(struct perf_event *event) { might_sleep(); =20 + perf_remove_unwind_deferred(event); + if (!event->pending_unwind_callback) return; =20 @@ -5609,66 +5791,123 @@ static void perf_pending_unwind_sync(struct perf_e= vent *event) =20 struct perf_callchain_deferred_event { struct perf_event_header header; + u64 cookie; u64 nr; u64 ips[]; }; =20 -static void perf_event_callchain_deferred(struct callback_head *work) +static void perf_event_callchain_deferred(struct perf_event *event, + struct unwind_stacktrace *trace, + u64 cookie) { - struct perf_event *event =3D container_of(work, struct perf_event, pendin= g_unwind_work); struct perf_callchain_deferred_event deferred_event; u64 callchain_context =3D PERF_CONTEXT_USER; - struct unwind_stacktrace trace; struct perf_output_handle handle; struct perf_sample_data data; u64 nr; =20 - if (!event->pending_unwind_callback) - return; - - if (unwind_user_faultable(&trace) < 0) - goto out; - - /* - * All accesses to the event must belong to the same implicit RCU - * read-side critical section as the ->pending_unwind_callback reset. - * See comment in perf_pending_unwind_sync(). - */ - guard(rcu)(); - if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) - goto out; + return; =20 - nr =3D trace.nr + 1 ; /* '+1' =3D=3D callchain_context */ + nr =3D trace->nr + 1 ; /* '+1' =3D=3D callchain_context */ =20 deferred_event.header.type =3D PERF_RECORD_CALLCHAIN_DEFERRED; deferred_event.header.misc =3D PERF_RECORD_MISC_USER; deferred_event.header.size =3D sizeof(deferred_event) + (nr * sizeof(u64)= ); =20 + deferred_event.cookie =3D cookie; deferred_event.nr =3D nr; =20 perf_event_header__init_id(&deferred_event.header, &data, event); =20 if (perf_output_begin(&handle, &data, event, deferred_event.header.size)) - goto out; + return; =20 perf_output_put(&handle, deferred_event); perf_output_put(&handle, callchain_context); - /* trace.entries[] are not guaranteed to be 64bit */ - for (int i =3D 0; i < trace.nr; i++) { - u64 entry =3D trace.entries[i]; + /* trace->entries[] are not guaranteed to be 64bit */ + for (int i =3D 0; i < trace->nr; i++) { + u64 entry =3D trace->entries[i]; perf_output_put(&handle, entry); } perf_event__output_id_sample(event, &handle, &data); =20 perf_output_end(&handle); +} + +/* Deferred unwinding callback for task specific events */ +static void perf_event_deferred_task(struct callback_head *work) +{ + struct perf_event *event =3D container_of(work, struct perf_event, pendin= g_unwind_work); + struct unwind_stacktrace trace; + + if (!event->pending_unwind_callback) + return; + + if (unwind_user_faultable(&trace) >=3D 0) { + + /* + * All accesses to the event must belong to the same implicit RCU + * read-side critical section as the ->pending_unwind_callback reset. + * See comment in perf_pending_unwind_sync(). + */ + guard(rcu)(); + perf_event_callchain_deferred(event, &trace, 0); + } =20 -out: event->pending_unwind_callback =3D 0; local_dec(&event->ctx->nr_no_switch_fast); rcuwait_wake_up(&event->pending_unwind_wait); } =20 +/* + * Deferred unwinding callback for per CPU events. + * Note, the request for the deferred unwinding may have happened + * on a different CPU. + */ +static void perf_event_deferred_cpu(struct unwind_work *work, + struct unwind_stacktrace *trace, u64 cookie) +{ + struct perf_unwind_deferred *defer =3D + container_of(work, struct perf_unwind_deferred, unwind_work); + struct perf_unwind_cpu *cpu_events, *cpu_unwind; + struct perf_event *event; + int cpu; + + guard(rcu)(); + guard(preempt)(); + + cpu =3D smp_processor_id(); + cpu_events =3D rcu_dereference(defer->cpu_events); + cpu_unwind =3D &cpu_events[cpu]; + + WRITE_ONCE(cpu_unwind->processing, 1); + /* + * Make sure the above is seen before the event->unwind_deferred + * is checked. This matches the mb() in rcuwait_rcu_wait_event() in + * perf_remove_unwind_deferred(). + */ + smp_mb(); + + list_for_each_entry_rcu(event, &cpu_unwind->list, unwind_list) { + /* If unwind_deferred is NULL the event is going away */ + if (unlikely(!event->unwind_deferred)) + continue; + perf_event_callchain_deferred(event, trace, cookie); + /* Only the first CPU event gets the trace */ + break; + } + + /* + * The perf_event_callchain_deferred() must finish before setting + * cpu_unwind->processing to zero. This is also to synchronize + * with the rcuwait in perf_remove_unwind_deferred(). + */ + smp_mb(); + WRITE_ONCE(cpu_unwind->processing, 0); + rcuwait_wake_up(&cpu_unwind->pending_unwind_wait); +} + static void perf_free_addr_filters(struct perf_event *event); =20 /* vs perf_event_alloc() error */ @@ -8250,6 +8489,18 @@ static u64 perf_get_page_size(unsigned long addr) =20 static struct perf_callchain_entry __empty_callchain =3D { .nr =3D 0, }; =20 + +static int deferred_unwind_request(struct perf_unwind_deferred *defer) +{ + u64 cookie; /* ignored */ + + /* + * Returns 0 for queued, 1 for already queued or executed, + * and negative on error. + */ + return unwind_deferred_request(&defer->unwind_work, &cookie); +} + /* * Returns: * > 0 : if already queued. @@ -8259,17 +8510,22 @@ static struct perf_callchain_entry __empty_callchai= n =3D { .nr =3D 0, }; static int deferred_request(struct perf_event *event) { struct callback_head *work =3D &event->pending_unwind_work; + struct perf_unwind_deferred *defer; int pending; int ret; =20 - /* Only defer for task events */ - if (!event->ctx->task) - return -EINVAL; - if ((current->flags & (PF_KTHREAD | PF_USER_WORKER)) || !user_mode(task_pt_regs(current))) return -EINVAL; =20 + defer =3D READ_ONCE(event->unwind_deferred); + if (defer) + return deferred_unwind_request(defer); + + /* Per CPU events should have had unwind_deferred set! */ + if (WARN_ON_ONCE(!event->ctx->task)) + return -EINVAL; + guard(irqsave)(); =20 /* callback already pending? */ @@ -13160,13 +13416,20 @@ perf_event_alloc(struct perf_event_attr *attr, in= t cpu, } } =20 + /* Setup unwind deferring for per CPU events */ + if (event->attr.defer_callchain && !task) { + err =3D perf_add_unwind_deferred(event); + if (err) + return ERR_PTR(err); + } + err =3D security_perf_event_alloc(event); if (err) return ERR_PTR(err); =20 if (event->attr.defer_callchain) init_task_work(&event->pending_unwind_work, - perf_event_callchain_deferred); + perf_event_deferred_task); =20 /* symmetric to unaccount_event() in _free_event() */ account_event(event); --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 427562E765A; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=tlW3QdaxOSgUgiBWKiXfgVIPRnJp7jSOl337RcFlUQS+ipqi1L+2y3dOMW0ucIrpAi4V8k6xRH96r9bugVQxixYCqxrb7/bqEB9DCRtT7VgH44GGfuIMVfsafHzUpOafmyQvGBHhziR5We0oQE0EZSOhFP2HinzYDufe+EL9DPc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=fU4O+9+pdoSgL8mo1UKUCe/pMZfgVovG/QtWRZ/u3Vg=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=qqupA9KLl76h7zli9P3DYvH/spc9N24YS0ACx01nQUwFzvb2y79DvB7NGXcjhjCk69QcXz/RnM056eqfGhayjeXHmEH+0YFskWFP4/T+0AP7HkGzHzPz/dEhwYIuURwnI4Ubazss0NAbz6EFXKgRRvQrW6TGt8B+2eAN4xtvWcA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=UAURRgu/; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="UAURRgu/" Received: by smtp.kernel.org (Postfix) with ESMTPSA id AD4CDC116C6; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=fU4O+9+pdoSgL8mo1UKUCe/pMZfgVovG/QtWRZ/u3Vg=; h=Date:From:To:Cc:Subject:References:From; b=UAURRgu/XCRhSkDheX5A4MR67575FGCZiAm7SGPskgi8pSGhD3ub8CxgeTP1KPjLV Rd91bOVkd1qoixz62fHlBuqMAjUs1nxTEtg0VZ95tRYwVTWima9eGttFQMQSKP0mMe anDMnHmqSmoIEm8MkUAupDjVOfLcWa6K0dGClBRoT1jaAZefoX2g+SBDxm2CAXjao3 e3KEQghprvEG5fUQ6CJK5Ll630ge5keXooFZVn0imQ3bN/5eK2vV7MDJf0Qzw0EBza Fb+veZ58zJuFUMDabKLkdOQ1wimZGJ6IVZkMXD2QTOt2laXG63XoS1nWK13GzOfrL0 syDlNpsD/grnw== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007JZj-2PI3; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164324.423885737@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:27 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 08/11] perf tools: Minimal CALLCHAIN_DEFERRED support References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Add a new event type for deferred callchains and a new callback for the struct perf_tool. For now it doesn't actually handle the deferred callchains but it just marks the sample if it has the PERF_CONTEXT_ USER_DEFFERED in the callchain array. At least, perf report can dump the raw data with this change. Actually this requires the next commit to enable attr.defer_callchain, but if you already have a data file, it'll show the following result. $ perf report -D ... 0x5fe0@perf.data [0x40]: event: 22 . . ... raw event: size 64 bytes . 0000: 16 00 00 00 02 00 40 00 02 00 00 00 00 00 00 00 ......@.......= .. . 0010: 00 fe ff ff ff ff ff ff 4b d3 3f 25 45 7f 00 00 ........K.?%E.= .. . 0020: 21 03 00 00 21 03 00 00 43 02 12 ab 05 00 00 00 !...!...C.....= .. . 0030: 00 00 00 00 00 00 00 00 09 00 00 00 00 00 00 00 ..............= .. 0 24344920643 0x5fe0 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 801= /801: 0 [300000001] ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 00007f45253fd34b : unhandled! Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- Changes since v13: https://lore.kernel.org/20250708020051.268700027@kernel.= org - Added back the cookie in the deferred event - Display the cookie in the -D output tools/lib/perf/include/perf/event.h | 8 ++++++++ tools/perf/util/event.c | 1 + tools/perf/util/evsel.c | 15 +++++++++++++++ tools/perf/util/machine.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + tools/perf/util/sample.h | 3 ++- tools/perf/util/session.c | 18 ++++++++++++++++++ tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 3 ++- 9 files changed, 49 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/p= erf/event.h index 09b7c643ddac..5801b0124067 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -151,6 +151,13 @@ struct perf_record_switch { __u32 next_prev_tid; }; =20 +struct perf_record_callchain_deferred { + struct perf_event_header header; + __u64 cookie; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; @@ -505,6 +512,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 7544a3104e21..720682fea9be 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -58,6 +58,7 @@ static const char *perf_event__names[] =3D { [PERF_RECORD_CGROUP] =3D "CGROUP", [PERF_RECORD_TEXT_POKE] =3D "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] =3D "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] =3D "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] =3D "TRACING_DATA", diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d55482f094bf..6176c31b57ea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2978,6 +2978,18 @@ int evsel__parse_sample(struct evsel *evsel, union p= erf_event *event, data->data_src =3D PERF_MEM_DATA_SRC_NONE; data->vcpu =3D -1; =20 + if (event->header.type =3D=3D PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr =3D UINT64_MAX / sizeof(u64); + + data->callchain =3D (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type !=3D PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -3108,6 +3120,9 @@ int evsel__parse_sample(struct evsel *evsel, union pe= rf_event *event, if (data->callchain->nr > max_callchain_nr) return -EFAULT; sz =3D data->callchain->nr * sizeof(u64); + if (evsel->core.attr.defer_callchain && data->callchain->nr >=3D 1 && + data->callchain->ips[data->callchain->nr - 1] =3D=3D PERF_CONTEXT_US= ER_DEFERRED) + data->deferred_callchain =3D true; OVERFLOW_CHECK(array, sz, max_size); array =3D (void *)array + sz; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ec12c207970..de6100366eee 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2124,6 +2124,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode =3D PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode =3D PERF_RECORD_MISC_USER; break; default: diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/pe= rf_event_attr_fprintf.c index 66b666d9ce64..abfd9b9a718c 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -343,6 +343,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_even= t_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); =20 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsig= ned, false); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 0e96240052e9..9d6e2f14551c 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -108,7 +108,8 @@ struct perf_sample { u16 p_stage_cyc; u16 retire_lat; }; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a320672c264e..32154d230f6c 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -715,6 +715,7 @@ static perf_event__swap_op perf_event__swap_ops[] =3D { [PERF_RECORD_CGROUP] =3D perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] =3D perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] =3D perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] =3D perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] =3D perf_event__tracing_data_swap, @@ -1118,6 +1119,20 @@ static void dump_sample(struct evsel *evsel, union p= erf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } =20 +static void dump_deferred_callchain(struct evsel *evsel, union perf_event = *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 " [%llx]\n", + event->header.misc, sample->pid, sample->tid, sample->ip, + event->callchain_deferred.cookie); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event =3D &event->read; @@ -1348,6 +1363,9 @@ static int machines__deliver_event(struct machines *m= achines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return tool->callchain_deferred(tool, event, sample, evsel, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 37bd8ac63b01..e1d60abb4e41 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -266,6 +266,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->read =3D process_event_sample_stub; tool->throttle =3D process_event_stub; tool->unthrottle =3D process_event_stub; + tool->callchain_deferred =3D process_event_sample_stub; tool->attr =3D process_event_synth_attr_stub; tool->event_update =3D process_event_synth_event_update_stub; tool->tracing_data =3D process_event_synth_tracing_data_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index db1c7642b0d1..9987bbde6d5e 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -42,7 +42,8 @@ enum show_feature_header { =20 struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 426E92E7659; Fri, 18 Jul 2025 16:43:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=fGACl22i8KksMo3/fQE7yb84JoO6+kB6Gp40PqCzTnGcSr5mGhKD9yH6JW7rs0aZ2pv1WS27WM4z2jWdZ2TxixmDvVn3sKq/uihGyHzkq/ocdxUu7BhO0fDqf4vKcr88dwT8YLl9yIlRqxVwQbDu90jy2ppZLEc+LM9hv4tCQNM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=uOx5DfUnBLxP9fI1uU9zO1GwRE8LdxHgs3BL/mHcvYk=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=m27mrzN/MpcHm42w9Bd/LUAKUyamaf1uTPP2T5y7C4HgrrPDie2ODcvei24KEZfWojGVv2ZmW34km5oADpBTDV5Y2AGNVT+JLImlbEU3pwgdDCLaSaZIpiJDAT/2hXg0j0bR9DcTJoTNS1YMNLhuLSl70OqzJINQW6nSPytTmAQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=sb+iGYCm; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="sb+iGYCm" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DE6BBC4CEEB; Fri, 18 Jul 2025 16:43:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856980; bh=uOx5DfUnBLxP9fI1uU9zO1GwRE8LdxHgs3BL/mHcvYk=; h=Date:From:To:Cc:Subject:References:From; b=sb+iGYCmxdkg6Kdyj3n3YPiKjrGfRKV3fKFSsWxXGZh01mZlf2yEGb3J6Vdsv0bIH gAxbS1oc1ijx5yXz3nfcDA+axtTtgkkQWyvellwMJ/1GelusEJK2csxyephpPuek6g xlQZrQClKzIrEhmCnn3U91aV9wTFIRYD5+Xp4B+gFcbQ8PZ+FZb1EQkFDhQrRS2vCI YCfkYGkA+RBkD779OMgZ2R7fQqeU3sWoLussc9/BGvflCYOuJlzhYBk1NCt2FOPcrC CIYm1ebqv2Hcx0dVjgKxHnPSGAUeOr3uy/30sDaYsRpigpUJj57ajIOGOqMYuknkmP OtPLX3sRxwUJg== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007JaG-36Cz; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164324.593888790@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:28 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 09/11] perf record: Enable defer_callchain for user callchains References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim And add the missing feature detection logic to clear the flag on old kernels. $ perf record -g -vv true ... ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CALLCHAIN|PERIOD read_format ID|LOST disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 sample_id_all 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 defer_callchain 1 ------------------------------------------------------------ sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off deferred callchain support Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 6176c31b57ea..c942983b870e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1076,6 +1076,14 @@ static void __evsel__config_callchain(struct evsel *= evsel, struct record_opts *o } } =20 + if (param->record_mode =3D=3D CALLCHAIN_FP && !attr->exclude_callchain_us= er) { + /* + * Enable deferred callchains optimistically. It'll be switched + * off later if the kernel doesn't support it. + */ + attr->defer_callchain =3D 1; + } + if (function) { pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user =3D 1; @@ -2124,6 +2132,8 @@ static int __evsel__prepare_open(struct evsel *evsel,= struct perf_cpu_map *cpus, =20 static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain) + evsel->core.attr.defer_callchain =3D 0; if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit= && (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) evsel->core.attr.inherit =3D 0; @@ -2398,6 +2408,15 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel, struct perf_cpu =20 /* Please add new feature detection here. */ =20 + attr.defer_callchain =3D true; + attr.sample_type =3D PERF_SAMPLE_CALLCHAIN; + if (has_attr_feature(&attr, /*flags=3D*/0)) + goto found; + perf_missing_features.defer_callchain =3D true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain =3D false; + attr.sample_type =3D 0; + attr.inherit =3D true; attr.sample_type =3D PERF_SAMPLE_READ; if (has_attr_feature(&attr, /*flags=3D*/0)) @@ -2509,6 +2528,11 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel, struct perf_cpu errno =3D old_errno; =20 check: + if (evsel->core.attr.defer_callchain && + evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN && + perf_missing_features.defer_callchain) + return true; + if (evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && perf_missing_features.inherit_sample_read) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6dbc9690e0c9..a01c1de8f95f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -221,6 +221,7 @@ struct perf_missing_features { bool branch_counters; bool aux_action; bool inherit_sample_read; + bool defer_callchain; }; =20 extern struct perf_missing_features perf_missing_features; --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3173F2E764E; Fri, 18 Jul 2025 16:43:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=OeaNro775DF4Zy15IF6WItjPgg2KNxXWYDSdwMK2AIig5i61HzfZwbzSvxj2qHZSYADOcUu2pOUXhEu0wT5kH826E7m0b3+dnVvpNqyYRk9ttWqKygH8QnKag6JKm2aW3qd8bMLmZ2WFWFJsZhyPKHgdAcvRM2fHhsPz0phqw7s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=sGPJyaMt5chGSbAS/TjitWf/JKBtwNhMQ3g3k4X3fJw=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=SPuwdwMgZHn4o0rJ2AW9XAS5QIPkJbPyo4RC5xEkDyn8SYNxr2ODEOaUNfyMsSDWo+SgTAqR0pfX7VC4OoN4a6ryFNz+XX9D1bQddEkMvh1zUip2C+HAGPP8q7F8pAP7554Kxztiz6lcaoxZ2hj0bnaV7rabw3/ygcvgQ34vfIE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=N5gg5cHE; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="N5gg5cHE" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 06C37C4CEF1; Fri, 18 Jul 2025 16:43:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856981; bh=sGPJyaMt5chGSbAS/TjitWf/JKBtwNhMQ3g3k4X3fJw=; h=Date:From:To:Cc:Subject:References:From; b=N5gg5cHEzQ3fYRy97fPsvW+nLf/MGc+J5T+cXhQbocCduQOXgB6FlKraLIfZOhQCs fcXRgZPNykhjFK6e6sWJj6a5NUJNFV5Xev9+5CUZNfszu8moE8N5zUGmqUudGbygAz 3HZ7VPrxemyV/v5mwSFhOLOreOoKR+ClCC8mUGel3cQ9akh+amLbH+I6lXDSaGlMvv Hrj90vbT3t37+GhYd/INl41qhNJurGbqO90z37kHUQ8JLwiHDyRGRd4xTybYPTRJHL tO8ADY0L1eETCQdcAv/zftBVDgpDtrJzbAi0LKtpRL1yRLuZBOnGTt1byYi50gmWwO vp1PZEvHHyZxg== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBA-00000007Jak-3nWu; Fri, 18 Jul 2025 12:43:24 -0400 Message-ID: <20250718164324.758612856@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:29 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 10/11] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Handle the deferred callchains in the script output. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) perf 801 [000] 18.031814: DEFERRED CALLCHAIN 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 6c3bf74dd78c..a6f8209256fe 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2483,6 +2483,93 @@ static int process_sample_event(const struct perf_to= ol *tool, return ret; } =20 +static int process_deferred_sample_event(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct perf_script *scr =3D container_of(tool, struct perf_script, tool); + struct perf_event_attr *attr =3D &evsel->core.attr; + struct evsel_script *es =3D evsel->priv; + unsigned int type =3D output_type(attr->type); + struct addr_location al; + FILE *fp =3D es->fp; + int ret =3D 0; + + if (output[type].fields =3D=3D 0) + return 0; + + /* Set thread to NULL to indicate addr_al and al are not initialized */ + addr_location__init(&al); + + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { + goto out_put; + } + + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; + } + last_timestamp =3D sample->time; + goto out_put; + } + + if (filter_cpu(sample)) + goto out_put; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + ret =3D -1; + goto out_put; + } + + if (al.filtered) + goto out_put; + + if (!show_event(sample, evsel, al.thread, &al, NULL)) + goto out_put; + + if (evswitch__discard(&scr->evswitch, evsel)) + goto out_put; + + perf_sample__fprintf_start(scr, sample, al.thread, evsel, + PERF_RECORD_CALLCHAIN_DEFERRED, fp); + fprintf(fp, "DEFERRED CALLCHAIN"); + + if (PRINT_FIELD(IP)) { + struct callchain_cursor *cursor =3D NULL; + + if (symbol_conf.use_callchain && sample->callchain) { + cursor =3D get_tls_callchain_cursor(); + if (thread__resolve_callchain(al.thread, cursor, evsel, + sample, NULL, NULL, + scripting_max_stack)) { + pr_info("cannot resolve deferred callchains\n"); + cursor =3D NULL; + } + } + + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts, + cursor, symbol_conf.bt_stop_list, fp); + } + + fprintf(fp, "\n"); + + if (verbose > 0) + fflush(fp); + +out_put: + addr_location__exit(&al); + return ret; +} + // Used when scr->per_event_dump is not set static struct evsel_script es_stdout; =20 @@ -4069,6 +4156,7 @@ int cmd_script(int argc, const char **argv) =20 perf_tool__init(&script.tool, !unsorted_dump); script.tool.sample =3D process_sample_event; + script.tool.callchain_deferred =3D process_deferred_sample_event; script.tool.mmap =3D perf_event__process_mmap; script.tool.mmap2 =3D perf_event__process_mmap2; script.tool.comm =3D perf_event__process_comm; @@ -4095,6 +4183,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; + script.tool.merge_deferred_callchains =3D false; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); --=20 2.47.2 From nobody Mon Oct 6 17:02:09 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 677F62E7BAD; Fri, 18 Jul 2025 16:43:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; cv=none; b=T9Bnva2ZAQH/4cgQuuyFPdOydnTG5aPoQ3fB/KhXrpMmDRIlTvZFPaxlUDIDFzYam7wtbjuPR19Kxyusx6tTFqIcUOCCrzgB3ugC6JfUZueA2BIq4zZpxf2AlBy6DHXc2aZd1kCoyNTbnzcfxLNzE+nToPSq5bxZzK5aC/cy5cI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1752856981; c=relaxed/simple; bh=pKVJ6nxkhuu1h2jdrw4lZKM2+MhJH/vy0bmKDWUgMBQ=; h=Message-ID:Date:From:To:Cc:Subject:References:MIME-Version: Content-Type; b=Zr+vPX7Sam6U45WazdzeDXiEZikpcd/ErFwzhKOqILKIfm8qXhVMMf2vOSsR86EEL4RxnDST/DjnARhAbmvvBN/M4as6vM5yNmO+pHQxJACOQBgiG7HwAVxNKiNfmW1zFkq5qD3gEBQXmD2o1ZhnPLEwtCYmysgVQ/AA82P6MZI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=G/G1AQkJ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="G/G1AQkJ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2B8E7C113CF; Fri, 18 Jul 2025 16:43:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1752856981; bh=pKVJ6nxkhuu1h2jdrw4lZKM2+MhJH/vy0bmKDWUgMBQ=; h=Date:From:To:Cc:Subject:References:From; b=G/G1AQkJDQ42jPmiq3Q+gZa/Y5FdhSKnA/pv8yCSW/6fiI71fhT5spsuo4VSNgNeT PL9LbRGcMKrAfZFdtUYQ1vRZzPafaCaMWIjvSw7mYcW9KXqFrZepCNn7R+9jfD73oI ViXLM8e6scLlXLknyYcgmUR/NaccbiictJndsGnMixC09A+XB4mB/D2N1eOztl1PMf SBTOgnJG/6WNe8OYEZUkS4N0RZoaMFu3i/ISUKqXDNC5wSL8/4briw9o5Hr3d+waem J0k2jXntXxn6bllnQGilSpS3qkZfuxXZDsiXDmVbzu6tAw22oy9rHDukc5ORkMVWRm KRW2BSOuYvxMg== Received: from rostedt by gandalf with local (Exim 4.98.2) (envelope-from ) id 1ucoBB-00000007JbG-0INi; Fri, 18 Jul 2025 12:43:25 -0400 Message-ID: <20250718164324.925232448@kernel.org> User-Agent: quilt/0.68 Date: Fri, 18 Jul 2025 12:41:30 -0400 From: Steven Rostedt To: linux-kernel@vger.kernel.org, linux-trace-kernel@vger.kernel.org, bpf@vger.kernel.org, x86@kernel.org Cc: Masami Hiramatsu , Mathieu Desnoyers , Josh Poimboeuf , Peter Zijlstra , Ingo Molnar , Jiri Olsa , Arnaldo Carvalho de Melo , Namhyung Kim , Thomas Gleixner , Andrii Nakryiko , Indu Bhagat , "Jose E. Marchesi" , Beau Belgrave , Jens Remus , Linus Torvalds , Andrew Morton , Jens Axboe , Florian Weimer , Sam James Subject: [PATCH v14 11/11] perf tools: Merge deferred user callchains References: <20250718164119.089692174@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Save samples with deferred callchains in a separate list and deliver them after merging the user callchains. If users don't want to merge they can set tool->merge_deferred_callchains to false to prevent the behavior. With previous result, now perf script will show the merged callchains. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) ... The old output can be get using --no-merge-callchain option. Also perf report can get the user callchain entry at the end. $ perf report --no-children --percent-limit=3D0 --stdio -q -S __intel_pmu= _enable_all.isra.0 # symbol: __intel_pmu_enable_all.isra.0 0.00% perf [kernel.kallsyms] | ---__intel_pmu_enable_all.isra.0 perf_ctx_enable event_function remote_function generic_exec_single smp_call_function_single event_function_call perf_event_for_each_child _perf_ioctl perf_ioctl __x64_sys_ioctl do_syscall_64 entry_SYSCALL_64 __GI___ioctl Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf Signed-off-by: Steven Rostedt (Google) --- tools/perf/Documentation/perf-script.txt | 5 ++ tools/perf/builtin-script.c | 5 +- tools/perf/util/callchain.c | 24 +++++++++ tools/perf/util/callchain.h | 3 ++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/session.c | 63 +++++++++++++++++++++++- tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 1 + 9 files changed, 102 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Document= ation/perf-script.txt index 28bec7e78bc8..03d112960632 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -527,6 +527,11 @@ include::itrace.txt[] The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. =20 +--merge-callchains:: + Enable merging deferred user callchains if available. This is the + default behavior. If you want to see separate CALLCHAIN_DEFERRED + records for some reason, use --no-merge-callchains explicitly. + :GMEXAMPLECMD: script :GMEXAMPLESUBCMD: include::guest-files.txt[] diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a6f8209256fe..b50442cca540 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3775,6 +3775,7 @@ int cmd_script(int argc, const char **argv) bool header_only =3D false; bool script_started =3D false; bool unsorted_dump =3D false; + bool merge_deferred_callchains =3D true; char *rec_script_path =3D NULL; char *rep_script_path =3D NULL; struct perf_session *session; @@ -3928,6 +3929,8 @@ int cmd_script(int argc, const char **argv) "Guest code can be found in hypervisor process"), OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, + "Enable merge deferred user callchains"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -4183,7 +4186,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; - script.tool.merge_deferred_callchains =3D false; + script.tool.merge_deferred_callchains =3D merge_deferred_callchains; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index d7b7eef740b9..6d423d92861b 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1828,3 +1828,27 @@ int sample__for_each_callchain_node(struct thread *t= hread, struct evsel *evsel, } return 0; } + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig =3D sample_orig->callchain->nr - 1; + u64 nr_deferred =3D sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + callchain =3D calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain =3D=3D NULL) { + sample_orig->deferred_callchain =3D false; + return -ENOMEM; + } + + callchain->nr =3D nr_orig + nr_deferred; + /* copy except for the last PERF_CONTEXT_USER_DEFERRED */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)= ); + /* copy deferred use callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain =3D callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 86ed9e4d04f9..89785125ed25 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -317,4 +317,7 @@ int sample__for_each_callchain_node(struct thread *thre= ad, struct evsel *evsel, struct perf_sample *sample, int max_stack, bool symbols, callchain_iter_fn cb, void *data); =20 +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index dcd1130502df..1d5f50f83b31 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -83,6 +83,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_= map *cpus, evlist->ctl_fd.ack =3D -1; evlist->ctl_fd.pos =3D -1; evlist->nr_br_cntr =3D -1; + INIT_LIST_HEAD(&evlist->deferred_samples); } =20 struct evlist *evlist__new(void) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 85859708393e..d2895c8be167 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -86,6 +86,7 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + struct list_head deferred_samples; }; =20 struct evsel_str_handler { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 32154d230f6c..70dca8be72de 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1278,6 +1278,56 @@ static int evlist__deliver_sample(struct evlist *evl= ist, const struct perf_tool per_thread); } =20 +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +static int evlist__deliver_deferred_samples(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret =3D 0; + + if (!tool->merge_deferred_callchains) { + evsel =3D evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret =3D evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid !=3D orig_sample.tid) + continue; + + evsel =3D evlist__id2evsel(evlist, orig_sample.id); + sample__merge_deferred_callchain(&orig_sample, sample); + ret =3D evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de); + + if (ret) + break; + } + return ret; +} + static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, @@ -1306,6 +1356,16 @@ static int machines__deliver_event(struct machines *= machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de =3D malloc(sizeof(*de)); + + if (de =3D=3D NULL) + return -ENOMEM; + + de->event =3D event; + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machin= e); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1365,7 +1425,8 @@ static int machines__deliver_event(struct machines *m= achines, return tool->aux_output_hw_id(tool, event, sample, machine); case PERF_RECORD_CALLCHAIN_DEFERRED: dump_deferred_callchain(evsel, event, sample); - return tool->callchain_deferred(tool, event, sample, evsel, machine); + return evlist__deliver_deferred_samples(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index e1d60abb4e41..61fdba70b0a4 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -245,6 +245,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->cgroup_events =3D false; tool->no_warn =3D false; tool->show_feat_hdr =3D SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains =3D true; =20 tool->sample =3D process_event_sample_stub; tool->mmap =3D process_event_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 9987bbde6d5e..d06580478ab1 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -87,6 +87,7 @@ struct perf_tool { bool cgroup_events; bool no_warn; bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; =20 --=20 2.47.2