From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DFB121465AE; Wed, 22 Jan 2025 02:31:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513113; cv=none; b=csZSVBSD95nuZXZ4BNGgx3/WhnOAc054lbxDX9hgQ1QCZjVBoei+beJNypwrbdcBXslZ1mwSrCggxIvmS9El/i4IV13/zs7KGAb2DuP3+fLCBA5XkToHNASiYrKv7jHZJNt7ZQ4Qsj58A04f9rEzWJaHIgWshhEbBGy1lWSj2Qo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513113; c=relaxed/simple; bh=8KZmk0K4mi+40CnPVJnUplDw7ju6nxeHQ4/nuHlfpKg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mYtR6tfug1U3hh4EKaZ1/lIoMNlcinSLRjzs3dYJfBikXTtNtbBWVHDhAzj12CVSQxiignC5uONFOvNd6I0FHqu26cJntkGxW1DHD/DkOkow7t4N5/5atyAyoCPKHDYeJPzHxtXmVGBnbHtATsBbQKZeaZ4YrT4q3B3+ePfDgaw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=OBEm3bdR; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="OBEm3bdR" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 91DEBC4CEE8; Wed, 22 Jan 2025 02:31:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513112; bh=8KZmk0K4mi+40CnPVJnUplDw7ju6nxeHQ4/nuHlfpKg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=OBEm3bdRCpqONhsGvtNzK8V6nbw/Qmo0U43b7s5g4ha8Rw9JlKPxLDRENW9KtTNZ7 iSga87+dV/IE0W8Gr0gSbim8NrxZQr6ufYggdoD8AbUg2PRXoFc/qHz9CUypGEnUrK UGfcngYdekSR80SLQCwZcle3cyNJSqpj+8UcdcZ0Eza4o5XT6Z8GtFOjyBbLfgE0Xo XrLTxSFG/lefeaLbOZmM3yKUxoFHJZu29tCitAMXyhvg9UnPrKrgIZIqb9xwy7D9YO 10Antf7EWD/Q1n04GzfYHWG5i9fSxFgkfSQ6CLMza+xNzSgg6UGS/LtVtjkNH+ENhD Q94trD+u092UA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 01/39] task_work: Fix TWA_NMI_CURRENT error handling Date: Tue, 21 Jan 2025 18:30:53 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" It's possible for irq_work_queue() to fail if the work has already been claimed. That can happen if a TWA_NMI_CURRENT task work is requested before a previous TWA_NMI_CURRENT IRQ work on the same CPU has gotten a chance to run. The error has to be checked before the write to task->task_works. Also the try_cmpxchg() loop isn't needed in NMI context. The TWA_NMI_CURRENT case really is special, keep things simple by keeping its code all together in one place. Fixes: 466e4d801cd4 ("task_work: Add TWA_NMI_CURRENT as an additional notif= y mode.") Signed-off-by: Josh Poimboeuf --- kernel/task_work.c | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/kernel/task_work.c b/kernel/task_work.c index c969f1f26be5..92024a8bfe12 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -58,25 +58,38 @@ int task_work_add(struct task_struct *task, struct call= back_head *work, int flags =3D notify & TWA_FLAGS; =20 notify &=3D ~TWA_FLAGS; + if (notify =3D=3D TWA_NMI_CURRENT) { - if (WARN_ON_ONCE(task !=3D current)) + if (WARN_ON_ONCE(!in_nmi() || task !=3D current)) return -EINVAL; if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; - } else { - /* - * Record the work call stack in order to print it in KASAN - * reports. - * - * Note that stack allocation can fail if TWAF_NO_ALLOC flag - * is set and new page is needed to expand the stack buffer. - */ - if (flags & TWAF_NO_ALLOC) - kasan_record_aux_stack_noalloc(work); - else - kasan_record_aux_stack(work); +#ifdef CONFIG_IRQ_WORK + head =3D task->task_works; + if (unlikely(head =3D=3D &work_exited)) + return -ESRCH; + + if (!irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume))) + return -EBUSY; + + work->next =3D head; + task->task_works =3D work; +#endif + return 0; } =20 + /* + * Record the work call stack in order to print it in KASAN + * reports. + * + * Note that stack allocation can fail if TWAF_NO_ALLOC flag + * is set and new page is needed to expand the stack buffer. + */ + if (flags & TWAF_NO_ALLOC) + kasan_record_aux_stack_noalloc(work); + else + kasan_record_aux_stack(work); + head =3D READ_ONCE(task->task_works); do { if (unlikely(head =3D=3D &work_exited)) --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id ECA9214AD02; Wed, 22 Jan 2025 02:31:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513114; cv=none; b=gGlAqbd9/DGs0wL3G23V7L7Ix6MLit+X8Ctf3wRjZQzX+jKnm9ywd89Hm2hXMXK7ZbWXm5rI2RGhbPnnv3coSUNuepT3zIZuYS1hsUSIIw2lR/rJZXdsj2X1u0o0LE7CLsQCLYHtttjLbRrNYIm3se/MnQxyw6NKO5az7RFKris= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513114; c=relaxed/simple; bh=/FYrIsePcDCAg2/76OvXHiQ6BEsZk0avCDYiwfXrc5A=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CXgbqheMV3OUKrVy+Uw0TqtRBw6/eXkdsf6uyVN9kwYx/FnbLkLOvRU0yRkTBP14ag61ZbY6xtANl3TzbxgzP4BAkSOknz69IL4eurVbuP9iSJdgWMMRhNkgtb8IIQXZydjX+rTYQpwb1i7t0tpHK1Df/5/RHNsIH3+q5yZi8Dk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=H9qVTnat; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="H9qVTnat" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 93F0FC4CEE5; Wed, 22 Jan 2025 02:31:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513113; bh=/FYrIsePcDCAg2/76OvXHiQ6BEsZk0avCDYiwfXrc5A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=H9qVTnatIttb84SXYhP9jtH2vB1TgJ2sbCWJExDsSPf6gCm1cXAqNhHQq72EQYjt1 DNn2LjPgGXu+4ORZOaWZM7JFB6JP3eZjZnBqTXefVg8ZUMlVJagHR5LqpuNkdAufHo 22+HhIVDdJ5+6SGi24MIMKWpSaTRkLoya0u2ZZ3/VLbsbROQwrBmselQkkYlD6mRPl yLdjPNvv6b5t2raAvH8GCVPN3E3IhMhLZAem3LDLUFeFVQlxk/XIdYLzmx4lf9Vvud Snl+WnQ1oFRhjVP3FeI4WeZdnmQAf7cr1XMAABcPWaCVaZwmMMdzibn/z5GMzVM5Dk 213Kk2SuQMraA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 02/39] task_work: Fix TWA_NMI_CURRENT race with __schedule() Date: Tue, 21 Jan 2025 18:30:54 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" If TWA_NMI_CURRENT task work is queued from an NMI triggered while running in __schedule() with IRQs disabled, task_work_set_notify_irq() ends up inadvertently running on the next scheduled task. So the original task doesn't get its TIF_NOTIFY_RESUME flag set and the task work may get delayed indefinitely, or may not get to run at all. __schedule() // disable irqs task_work_add(current, work, TWA_NMI_CURRENT); // current =3D next; // enable irqs task_work_set_notify_irq() test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); // wrong task! // original task skips task work on its next return to user (or exit!) Fix it by storing the task pointer along with the irq_work struct and passing that task to set_notify_resume(). Fixes: 466e4d801cd4 ("task_work: Add TWA_NMI_CURRENT as an additional notif= y mode.") Signed-off-by: Josh Poimboeuf --- kernel/task_work.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/kernel/task_work.c b/kernel/task_work.c index 92024a8bfe12..f17447f69843 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -7,12 +7,23 @@ static struct callback_head work_exited; /* all we need is ->next =3D=3D N= ULL */ =20 #ifdef CONFIG_IRQ_WORK + +struct nmi_irq_work { + struct irq_work work; + struct task_struct *task; +}; + static void task_work_set_notify_irq(struct irq_work *entry) { - test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); + struct nmi_irq_work *work =3D container_of(entry, struct nmi_irq_work, wo= rk); + + set_notify_resume(work->task); } -static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) =3D - IRQ_WORK_INIT_HARD(task_work_set_notify_irq); + +static DEFINE_PER_CPU(struct nmi_irq_work, nmi_irq_work) =3D { + .work =3D IRQ_WORK_INIT_HARD(task_work_set_notify_irq), +}; + #endif =20 /** @@ -65,15 +76,21 @@ int task_work_add(struct task_struct *task, struct call= back_head *work, if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; #ifdef CONFIG_IRQ_WORK +{ + struct nmi_irq_work *irq_work =3D this_cpu_ptr(&nmi_irq_work); + head =3D task->task_works; if (unlikely(head =3D=3D &work_exited)) return -ESRCH; =20 - if (!irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume))) + if (!irq_work_queue(&irq_work->work)) return -EBUSY; =20 + irq_work->task =3D current; + work->next =3D head; task->task_works =3D work; +} #endif return 0; } @@ -109,11 +126,6 @@ int task_work_add(struct task_struct *task, struct cal= lback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; -#ifdef CONFIG_IRQ_WORK - case TWA_NMI_CURRENT: - irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); - break; -#endif default: WARN_ON_ONCE(1); break; --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0A205152787; Wed, 22 Jan 2025 02:31:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513115; cv=none; b=Rz8ZK5DoVJvqabs/dkObz2N2yWUHNDgWsa2tuQk0lomn+B1w3prGDkC0cBsmbtsUeBTzVYyQ48ma82VQqUgFdB8ExRs1VDItH60RKfGP+zfWLDxG9v2N2iqCgfx5YkFgjhHFDhCTk7kC4jAQP22aApDhRRwI14XDGmTp8yKZJOA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513115; c=relaxed/simple; bh=gEweV+ZvxXqxAtdRcPxlgi9cIku/oRPAY9rngmFOgwU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=GcbJX8AOqXSKgAbuX0gD0TjjZ+k42v5O1lRa9XGdfSZXBwXtcGNRTZiUQCLPX5d71vV2yDe6l7YPMido/mWxc+eOyf0+sl9MfUJNSaX7EghK76O7OpSLOk+OcyL8R7S15QHgl0/2pAoSfujg4ER7LuzxnylpBYJ+XmoQldnHFEk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=uOiS4UWo; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="uOiS4UWo" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9BAF9C4CEE8; Wed, 22 Jan 2025 02:31:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513114; bh=gEweV+ZvxXqxAtdRcPxlgi9cIku/oRPAY9rngmFOgwU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=uOiS4UWolvFcKqdrmMA6w1gW0GCsNGN2ojH/M7P7VTdPhRUF1kmfxiKd+qKLKMw2a VSQLJ4eoMYMkULYxNNBg2xC5xLAOGgs5obEARz2Gg+/VKgORVL0HJ7pRwUbQNrWeYZ TKGorzEf+Jevuz00R6ICLhdWL96MDuBqyCRR1gl7288nvgtt+DwXgI0aTra/PWqdDZ 4hoElUlB+uLv6Yd6eHh1zob98Jt/k2SbwTsxvg1KmBUcqpGIPuC5GLSMphBcFgzx9T s6K6CsI2FEcO7Aczahk5SHfkiXpUcr1VEJaJ8fGyxi+iTSOysP6vmtYu5Rw3H9vt5D YiMzY3cc9fHYA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 03/39] mm: Add guard for mmap_read_lock Date: Tue, 21 Jan 2025 18:30:55 -0800 Message-ID: <8f8f672bec535ce8ed54d79591d98386a4d46c28.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" This is the new way of doing things. Converting all existing mmap_read_lock users is an exercise left for the reader ;-) Suggested-by: Peter Zijlstra Signed-off-by: Josh Poimboeuf --- include/linux/mmap_lock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index de9dc20b01ba..c971c4617060 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -182,4 +182,6 @@ static inline int mmap_lock_is_contended(struct mm_stru= ct *mm) return rwsem_is_contended(&mm->mmap_lock); } =20 +DEFINE_GUARD(mmap_read_lock, struct mm_struct *, mmap_read_lock(_T), mmap_= read_unlock(_T)) + #endif /* _LINUX_MMAP_LOCK_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 16A741465AE; Wed, 22 Jan 2025 02:31:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513116; cv=none; b=dXYYkaNfRAWCDiw9BfmAjw0owSISv5qncB3/3G26aLeFE8fbVeBxYWpYbGUoq2nT+xHjyABGcHYLueSMke3jlt+qQW4ryQDx+cVQkMXpg+qlwJn2qCEBaiqPTUd7wIxIPJc2vZmT843/CFTAWABQ2AtAOPSuJlBh4zrD4+p0Eao= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513116; c=relaxed/simple; bh=4V7fRRbrcaQZQspNP6x9fH3uJzB9WiW6Ms6JvucjOCU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oig99DocXZLrD6XOfrhzvP6p1k+Rx+7I2DgNZsx2FB08QaCtOO494tQYMLAe4xsbeCRnvEZOmO+oc+9w0sIlecgoON84gLEtHS07TcJlNPMNKI/PG6ulbJkljsJB1JeAhuNP5+XKWpQplG+Sdd14O3T/omsceS/o/n5HEFJiw1I= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=IRkjhoWv; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="IRkjhoWv" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A6963C4CEF4; Wed, 22 Jan 2025 02:31:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513115; bh=4V7fRRbrcaQZQspNP6x9fH3uJzB9WiW6Ms6JvucjOCU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=IRkjhoWvqu+HlBmXeEG35Ogzy6lKWw9hXLFV/8qWYWXZ6TTxnCMqzZ9DAI8RVXrMl /nNESA4KJs6N42E3xSvt5tV1uHqeSMPYOGwO+CXXW0ZQHNlA73fA/2dfN+FmxyPGE2 3VUvUmjWHQFkndHaE/nBIkBJxtvaWpo6ITfQC7azxErrRW+TmRZQge8RFysgXfk1Pg RUTcLFcRj3w4J+mvq82BoaMHL6wyyn4hWiZC+nQ+s8oR7Zo2rKZ8KHSobFN6dv7cHK aEZX4xPL47wrO2TThop3U/rP4BxtLXLe8RvWZqfS2ifbIs0NEZrpq4gvDUQOEoSFFk W5E6A8yS3bd4A== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 04/39] x86/vdso: Fix DWARF generation for getrandom() Date: Tue, 21 Jan 2025 18:30:56 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add CFI annotations to the VDSO implementation of getrandom() so it will have valid DWARF unwinding metadata. Fixes: 33385150ac45 ("x86: vdso: Wire up getrandom() vDSO implementation") Signed-off-by: Josh Poimboeuf --- arch/x86/entry/vdso/vgetrandom-chacha.S | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/= vgetrandom-chacha.S index bcba5639b8ee..cc82da9216fb 100644 --- a/arch/x86/entry/vdso/vgetrandom-chacha.S +++ b/arch/x86/entry/vdso/vgetrandom-chacha.S @@ -4,7 +4,7 @@ */ =20 #include -#include +#include =20 .section .rodata, "a" .align 16 @@ -22,7 +22,7 @@ CONSTANTS: .octa 0x6b20657479622d323320646e61707865 * rcx: number of 64-byte blocks to write to output */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) - + CFI_STARTPROC .set output, %rdi .set key, %rsi .set counter, %rdx @@ -175,4 +175,5 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) pxor temp,temp =20 ret + CFI_ENDPROC SYM_FUNC_END(__arch_chacha20_blocks_nostack) --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1576C15F41F; Wed, 22 Jan 2025 02:31:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513117; cv=none; b=Dr5XGTQewrPoIXn0tfKvDwbjADbpLroTESgVB4BhdlTCRELbl3myD/qtFX0mvAJOXYQ8JRJFrzmtori8MqyexG7+gm89tCvXsYuKpp7b+yXcWoZOFoiF9vCR9n+0jmAW+Mr8eo1NaZul33R/M18E8MpIkxYgATeXP6QrAOefJm8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513117; c=relaxed/simple; bh=tsZz8mO1UEv9uF6j2/ReT4Uu/+oVkcBzPEhAei4uj+I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=rYiHcVzHrpaXDEQXXq6A7ZqDGaiY6eKkzDZUSCBwvXNi/bZtm8hUlAvOt/zOOHA/3lKx8oB/NholetnlOXcqb8wSzr8TNlMlAQ8Z/dcXy+LHQSiQr5WksOJsL6AJTNyUzSRxeed/l3lCZeLPg7pwftFKA5W51QuTHZ9QECxmhmY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=XAPosJho; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="XAPosJho" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A5E57C4CEE2; Wed, 22 Jan 2025 02:31:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513116; bh=tsZz8mO1UEv9uF6j2/ReT4Uu/+oVkcBzPEhAei4uj+I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=XAPosJhopwYKNSB+WzO5fg+A5xPSTuwQ/StQkmyd/1ZACtjE+XLJ2WboxPFKaHnCE L3Uymp7EZNPOBtr0l5tnkQZhshvhKh1SDeZLI/46lfTxVR1MNNYR92nqfwup90rEj+ yiSNNfKmZOQnlAxiNN8ZLlLGoW1b3aod87P3EmnXH0zBUCx9JfP9cdRimiQoD9C7NY bQWAVOx6rpgcKptQK3JkMEGqbeHu7xtLmI8pOG4J5O6J6o7sw2iRWaEaVJechx+qax zw1lppZYfsnAZZACN4wmysr66HFUt7RQcpuEWnGRrm0M7dx3rwpOahrjDZHVFbBG38 7O0V07JHHPZTw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 05/39] x86/asm: Avoid emitting DWARF CFI for non-VDSO Date: Tue, 21 Jan 2025 18:30:57 -0800 Message-ID: <0c6c20d81164ba1dd375465f31bd51a8e1af164d.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" It was decided years ago that .cfi_* annotations aren't maintainable in the kernel. They were replaced by objtool unwind hints. For the kernel proper, ensure the CFI_* macros don't do anything. On the other hand the VDSO library *does* use them, so user space can unwind through it. Make sure these macros only work for VDSO. They aren't actually being used outside of VDSO anyway, so there's no functional change. Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/dwarf2.h | 51 ++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 430fca13bb56..b195b3c8677e 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,6 +6,15 @@ #warning "asm/dwarf2.h should be only included in pure assembly files" #endif =20 +#ifdef BUILD_VDSO + + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ + + .cfi_sections .eh_frame, .debug_frame + #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc #define CFI_DEF_CFA .cfi_def_cfa @@ -21,21 +30,31 @@ #define CFI_UNDEFINED .cfi_undefined #define CFI_ESCAPE .cfi_escape =20 -#ifndef BUILD_VDSO - /* - * Emit CFI data in .debug_frame sections, not .eh_frame sections. - * The latter we currently just discard since we don't do DWARF - * unwinding at runtime. So only the offline DWARF information is - * useful to anyone. Note we should not use this directive if we - * ever decide to enable DWARF unwinding at runtime. - */ - .cfi_sections .debug_frame -#else - /* - * For the vDSO, emit both runtime unwind information and debug - * symbols for the .dbg file. - */ - .cfi_sections .eh_frame, .debug_frame -#endif +#else /* !BUILD_VDSO */ + +/* + * On x86, these macros aren't used outside VDSO. As well they shouldn't = be: + * they're fragile and very difficult to maintain. + */ + +.macro nocfi args:vararg +.endm + +#define CFI_STARTPROC nocfi +#define CFI_ENDPROC nocfi +#define CFI_DEF_CFA nocfi +#define CFI_DEF_CFA_REGISTER nocfi +#define CFI_DEF_CFA_OFFSET nocfi +#define CFI_ADJUST_CFA_OFFSET nocfi +#define CFI_OFFSET nocfi +#define CFI_REL_OFFSET nocfi +#define CFI_REGISTER nocfi +#define CFI_RESTORE nocfi +#define CFI_REMEMBER_STATE nocfi +#define CFI_RESTORE_STATE nocfi +#define CFI_UNDEFINED nocfi +#define CFI_ESCAPE nocfi + +#endif /* !BUILD_VDSO */ =20 #endif /* _ASM_X86_DWARF2_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8B9F8126BFA; Wed, 22 Jan 2025 02:31:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513118; cv=none; b=shBQLwt8sbeoHXFw9jS2T7ItRqwv773OKIOXLcLoIZWupliLTvojNND821nhDEl7PMiciTcB+Awae71xmTktTq9bg6Q/pz1mz7QQQ+doWQNgV7V1ftFDd/+jusUUxWbIjJp72AV/FTGcTf7O5t6JwkQiN4OGtXDoVKRHqPs/K+I= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513118; c=relaxed/simple; bh=yPFN2z9o6f/lwIY6xtHqYS7ejGXFLr8o4N6QW7j+xB8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=DCBOljayVCxDzaKI6nWeO6ngqIAEVazgxBsF523yCUps361/2RVKzWl69RrIHrCAcnsBAqfzFWVyP+IL54Ixysxz9X5EGAUhOqavfjhVR5SRKa5foMVHENv3r7g3gTcGEFY6p3l1BhWIWJNp7cdlezz4VhasWGcXjKuIpUFClsU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=CpC8k8ER; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="CpC8k8ER" Received: by smtp.kernel.org (Postfix) with ESMTPSA id AD43FC4CEF0; Wed, 22 Jan 2025 02:31:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513118; bh=yPFN2z9o6f/lwIY6xtHqYS7ejGXFLr8o4N6QW7j+xB8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=CpC8k8ERg2WZsMIAmSMoBmz3smLO6bjRfqAfkJom5dGkFkLxi3tcHE8XKFRR7f55z sLFP5egRY2MTPd4sPKv7PsYJ79rC1iUfzswjPuUKLjMEgGtjNeZnPbkmAsTy8jm6Z0 HGmEziSUneU3yNGLq03CjmI0uSnW3zouOKHvEk/r3U/De2Su0hyIs1SdfqKR2vUa/p cCnyAATq75vD4W6s2Vf6fhVrady4/7og00gLWyrOcwD6/qI7GGazd3LA5ud2lZmBOD DwXjTGOOJrZKOc0VTznHS03YygsA07X1sWmoOCWFsI40RkGsBNir6uoWjOd0JdxWWx ehChCBcZtxzjw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 06/39] x86/asm: Fix VDSO DWARF generation with kernel IBT enabled Date: Tue, 21 Jan 2025 18:30:58 -0800 Message-ID: <5c7992c111adee94e242fbca2b3e64ab8e96e595.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The DWARF .cfi_startproc annotation needs to be at the very beginning of a function. But with kernel IBT that doesn't happen as ENDBR is sneakily embedded in SYM_FUNC_START. As a result the DWARF unwinding info is wrong at the beginning of all the VDSO functions. Fix it by adding CFI_STARTPROC and CFI_ENDPROC to SYM_FUNC_START_* and SYM_FUNC_END respectively. Note this only affects VDSO, as the CFI_* macros are empty for the kernel proper. Fixes: c4691712b546 ("x86/linkage: Add ENDBR to SYM_FUNC_START*()") Signed-off-by: Josh Poimboeuf --- arch/x86/entry/vdso/vdso-layout.lds.S | 2 +- arch/x86/entry/vdso/vgetrandom-chacha.S | 2 -- arch/x86/entry/vdso/vsgx.S | 4 ---- arch/x86/include/asm/linkage.h | 29 +++++++++++++++++++------ arch/x86/include/asm/vdso.h | 1 - 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vd= so-layout.lds.S index 872947c1004c..506c9800a5aa 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include +#include #include =20 /* diff --git a/arch/x86/entry/vdso/vgetrandom-chacha.S b/arch/x86/entry/vdso/= vgetrandom-chacha.S index cc82da9216fb..a33212594731 100644 --- a/arch/x86/entry/vdso/vgetrandom-chacha.S +++ b/arch/x86/entry/vdso/vgetrandom-chacha.S @@ -22,7 +22,6 @@ CONSTANTS: .octa 0x6b20657479622d323320646e61707865 * rcx: number of 64-byte blocks to write to output */ SYM_FUNC_START(__arch_chacha20_blocks_nostack) - CFI_STARTPROC .set output, %rdi .set key, %rsi .set counter, %rdx @@ -175,5 +174,4 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack) pxor temp,temp =20 ret - CFI_ENDPROC SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S index 37a3d4c02366..c0342238c976 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vsgx.S @@ -24,8 +24,6 @@ .section .text, "ax" =20 SYM_FUNC_START(__vdso_sgx_enter_enclave) - /* Prolog */ - .cfi_startproc push %rbp .cfi_adjust_cfa_offset 8 .cfi_rel_offset %rbp, 0 @@ -143,8 +141,6 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave) jle .Lout jmp .Lenter_enclave =20 - .cfi_endproc - _ASM_VDSO_EXTABLE_HANDLE(.Lenclu_eenter_eresume, .Lhandle_exception) =20 SYM_FUNC_END(__vdso_sgx_enter_enclave) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index dc31b13b87a0..2866d57ef907 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -40,6 +40,10 @@ =20 #ifdef __ASSEMBLY__ =20 +#ifndef LINKER_SCRIPT +#include +#endif + #if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && != defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_MITIGATION_RETPOLINE */ @@ -112,40 +116,51 @@ # define SYM_FUNC_ALIAS_MEMFUNC SYM_FUNC_ALIAS #endif =20 +#define __SYM_FUNC_START \ + CFI_STARTPROC ASM_NL \ + ENDBR + +#define __SYM_FUNC_END \ + CFI_ENDPROC ASM_NL + /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type = */ #define SYM_TYPED_FUNC_START(name) \ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START -- use for global functions */ #define SYM_FUNC_START(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ #define SYM_FUNC_START_NOALIGN(name) \ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START_LOCAL -- use for local functions */ #define SYM_FUNC_START_LOCAL(name) \ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment = */ #define SYM_FUNC_START_LOCAL_NOALIGN(name) \ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START_WEAK -- use for weak functions */ #define SYM_FUNC_START_WEAK(name) \ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \ - ENDBR + __SYM_FUNC_START =20 /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ #define SYM_FUNC_START_WEAK_NOALIGN(name) \ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ - ENDBR + __SYM_FUNC_START + +#define SYM_FUNC_END(name) \ + __SYM_FUNC_END \ + SYM_END(name, SYM_T_FUNC) =20 #endif /* _ASM_X86_LINKAGE_H */ =20 diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index d7f6592b74a9..0111c349bbc5 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h @@ -2,7 +2,6 @@ #ifndef _ASM_X86_VDSO_H #define _ASM_X86_VDSO_H =20 -#include #include #include =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 079D318C936; Wed, 22 Jan 2025 02:31:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513120; cv=none; b=pJ29Pe1pdsBQ/NL5PWyDWU33OIy4scAtVF6fAzlZ6aGIBA7KuOZ/bUjl3U2ut3wmjy5tCLzvfATQlFC9HQ0Rll05ZBwhMY1p7+BKT2yUDmInbingIAGAj3wXcgMAsN6eSpscG7+4ZeZHMu17qnEuFD6r4dHtbSgv6at6BK7NGwU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513120; c=relaxed/simple; bh=OfVdBysbrIhFvkBQECz1nIAHL1MaijSMXmygC3lhj8k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oIKy/3k/LpNL0TkT9SEaBDR5KJ3IRaP3gfEDq+f7wb8+OCj/GouQLmNE1auHqGo5AQdBJfXl+xebdJIDsXUb84740OCogIF8rH/27+8ezp0Dw43RvnSUn9fBHpvKMkPBOUqQSAeBCbcAZ6skulRZSXBJTzUQhKbFpsxB+rW5CrQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=TLq3709R; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="TLq3709R" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 31CCDC4CEE2; Wed, 22 Jan 2025 02:31:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513119; bh=OfVdBysbrIhFvkBQECz1nIAHL1MaijSMXmygC3lhj8k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TLq3709RcEAFENn+N/qu0k2YlUUNpNpAEUhIjTfg6+7ey8x9Phjb3QPupCvr/yLzY FHgyvNt6KlvKTt3co4hG7F/P96roepnqetfbD5+FPoMm8R1L7JkgO21s21ip8ZyZ1p FaA+1XAQrHVNyTGs4qdOMbiX2vnMCP3wEtHsTkkd1FRj52Iyo6wo+ENLOXMzzE7v3i CWJCL7G/keP+AfBisOgkxH2lxtNAWzmS1VWaIpx9goObaHO68o/r+p26EX0cRrKtle NOfgGC2PfEtrOUMx9wwjg80lQ6PSuyXQZweu2FThxmRLduP4DAzlQGFdgkQhkmjNOo N30wLdUGKDhrg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 07/39] x86/vdso: Use SYM_FUNC_{START,END} in __kernel_vsyscall() Date: Tue, 21 Jan 2025 18:30:59 -0800 Message-ID: <524fd06bc23b29719b22f8a23f1db802f871bfaa.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Use SYM_FUNC_{START,END} instead of all the boilerplate. No functional change. Signed-off-by: Josh Poimboeuf --- arch/x86/entry/vdso/vdso32/system_call.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso= /vdso32/system_call.S index d33c6513fd2c..bdc576548240 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -9,11 +9,7 @@ #include =20 .text - .globl __kernel_vsyscall - .type __kernel_vsyscall,@function - ALIGN -__kernel_vsyscall: - CFI_STARTPROC +SYM_FUNC_START(__kernel_vsyscall) /* * Reshuffle regs so that all of any of the entry instructions * will preserve enough state. @@ -79,7 +75,5 @@ SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL) CFI_RESTORE ecx CFI_ADJUST_CFA_OFFSET -4 RET - CFI_ENDPROC - - .size __kernel_vsyscall,.-__kernel_vsyscall +SYM_FUNC_END(__kernel_vsyscall) .previous --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 13D47192B86; Wed, 22 Jan 2025 02:32:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513121; cv=none; b=AFLTGVmfppbn2zakb1vylglevW2sA1hoAqHQH5WvsThrUmqq8hmKxRJf5BpGiEDX6FlAI+BWWC0X2+YJ7DF0KxPxJVTRZQlMuxK4Ipkn3MSjkUUGU5xPOGeVPYB1tA6V4oy236GzjeQ+RyZbnr3IJ4eDdu/f6wH4wmgz/R/rlZw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513121; c=relaxed/simple; bh=uMF4+TnDKOaSbuY20FZjJhd0apIR9OxAuehwgAGZ9PQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=SMbYdM/BJUL0bJiE2KiR/Q6RK2eVUNiT2S2agHKslkuB7oYzp9nQEA8cfDDpcvYfs+wAVnWGptHwEHagaKGrxEpOJzwBF+KKzUYiFA4ZPJgZLNv0YurpYHxTy449Zi2COsH/l2egh5oUvvj5taa/XPhKXC/HWbMTgkXeiNuatdw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=og8zjOL4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="og8zjOL4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id A516FC4CEED; Wed, 22 Jan 2025 02:31:59 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513120; bh=uMF4+TnDKOaSbuY20FZjJhd0apIR9OxAuehwgAGZ9PQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=og8zjOL4MZb9DzhJMGIxdHicAFOsIuziimkgHnA7I6ZKlliIu7kpWkdom5gcA/7YR izSpCxxNRKOOqDAUnVFsTjCd7oH0Mh7auCPn4PyccAsXFu2S8GHOyRViiMULpTka2L q+l95aX8VJvDZOlheEBcreiZGv5Z2uAhapF/HYY0DgL2eqBmUCnIXAadhzYdmHiwVt eDw/AX1s7Jlwl7aWBfGZo6aS62F6rKjg+y0ScDMZcAB6AI20wUQwb9oZkgZaj2YabM a0Gzxie7O5vzwIJ2pgNl5A3pyoJUHlY+xUBmiilZQjjBIgX1fsZ89dm7e5c+IRi6kP FpB13wDND6n7Q== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 08/39] x86/vdso: Use CFI macros in __vdso_sgx_enter_enclave() Date: Tue, 21 Jan 2025 18:31:00 -0800 Message-ID: <688fecff7dda7650a4ccf193ce59affa516c8250.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Use the CFI macros instead of the raw .cfi_* directives to be consistent with the rest of the VDSO asm. It's also easier on the eyes. No functional changes. Signed-off-by: Josh Poimboeuf --- arch/x86/entry/vdso/vsgx.S | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/entry/vdso/vsgx.S b/arch/x86/entry/vdso/vsgx.S index c0342238c976..8d7b8eb45c50 100644 --- a/arch/x86/entry/vdso/vsgx.S +++ b/arch/x86/entry/vdso/vsgx.S @@ -24,13 +24,14 @@ .section .text, "ax" =20 SYM_FUNC_START(__vdso_sgx_enter_enclave) + SYM_F_ALIGN push %rbp - .cfi_adjust_cfa_offset 8 - .cfi_rel_offset %rbp, 0 + CFI_ADJUST_CFA_OFFSET 8 + CFI_REL_OFFSET %rbp, 0 mov %rsp, %rbp - .cfi_def_cfa_register %rbp + CFI_DEF_CFA_REGISTER %rbp push %rbx - .cfi_rel_offset %rbx, -8 + CFI_REL_OFFSET %rbx, -8 =20 mov %ecx, %eax .Lenter_enclave: @@ -77,13 +78,11 @@ SYM_FUNC_START(__vdso_sgx_enter_enclave) .Lout: pop %rbx leave - .cfi_def_cfa %rsp, 8 + CFI_DEF_CFA %rsp, 8 RET =20 - /* The out-of-line code runs with the pre-leave stack frame. */ - .cfi_def_cfa %rbp, 16 - .Linvalid_input: + CFI_DEF_CFA %rbp, 16 mov $(-EINVAL), %eax jmp .Lout =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 297091AF0DB; Wed, 22 Jan 2025 02:32:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513122; cv=none; b=WaSguUddKSOEcvguDC+QgcCKqrUHXAb3DM0oqNIBbucRc3qUzo55V2qnVfBHMsFsMjZnoKI7oivDtjQtMa/GVtqD5+EO6q4jVRm4n+E+H0GPcijDW9GggPRjM0mG6KNbaj+hdasviUH0upjK9rOzrLVkxk1PQhsvOVBsNYWzxks= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513122; c=relaxed/simple; bh=naZUMNTiJIsZIoJxWyFoddFok9zA5MJhKH8ft2VkwuY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Gt9ihRDw3yGExZDcdeepVAbflQoXMjTNbaV2SFIwYGvAnNnS5N9ZRzMGMfNmrNEovvq/qgTGfWGU+yRbv1LDlpnj/zLmY/tSjb+/uwK0/lqizKU+D6OM1bBQZf008xVh9jOOdsvHjmgU4xpmn0a3d9t9yb5QOPemcj058cbqwB0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=RTSmJTAa; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="RTSmJTAa" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B1917C4CEE2; Wed, 22 Jan 2025 02:32:00 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513121; bh=naZUMNTiJIsZIoJxWyFoddFok9zA5MJhKH8ft2VkwuY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=RTSmJTAa4oSV8B6jl7fwZqUSn5ddX9/9lbsfq1ANEnOyCyCQvWTxaOS/FXioAYpjY WzIx7pOAEPDW0ZfS5BJlkpIZYx9+JYqGz2mPXYsTTjdwhiFjoYXhQV7wJuyVToCWJW C6WLv1NBYrJ5b34Cj0uHkZLNvmNMSvsyfMmbLtaJcyV7ukgtf6jTabGmWMbLQCECsW F1kNZFGkzttIcTTlSfeiRDKjZ/Jcc9lULYXtMoEFCd79D8oFzPzm9o5egnSVphSjmv DOG8+FJJyKFwV9GvOTS7N53Pk8jtY0qRbVh0qK/v0OVMqEZLrJaBqiocpEmU17J9Gz ZOX15ey84LmEA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 09/39] x86/vdso: Enable sframe generation in VDSO Date: Tue, 21 Jan 2025 18:31:01 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Enable sframe generation in the VDSO library so kernel and user space can unwind through it. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 3 +++ arch/x86/entry/vdso/Makefile | 10 +++++++--- arch/x86/entry/vdso/vdso-layout.lds.S | 3 +++ arch/x86/include/asm/dwarf2.h | 5 ++++- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 6682b2a53e34..65228c78fef0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -435,6 +435,9 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH It uses the same command line parameters, and sysctl interface, as the generic hardlockup detectors. =20 +config AS_SFRAME + def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) + config HAVE_PERF_REGS bool help diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c9216ac4fb1e..478de89029d1 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -47,13 +47,17 @@ quiet_cmd_vdso2c =3D VDSO2C $@ $(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c F= ORCE $(call if_changed,vdso2c) =20 +#ifdef CONFIG_AS_SFRAME +SFRAME_CFLAGS :=3D -Wa$(comma)-gsframe +#endif + # # Don't omit frame pointers for ease of userspace debugging, but do # optimize sibling calls. # CFL :=3D $(PROFILING) -mcmodel=3Dsmall -fPIC -O2 -fasynchronous-unwind-tab= les -m64 \ $(filter -g%,$(KBUILD_CFLAGS)) -fno-stack-protector \ - -fno-omit-frame-pointer -foptimize-sibling-calls \ + -fno-omit-frame-pointer $(SFRAME_CFLAGS) -foptimize-sibling-calls \ -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO =20 ifdef CONFIG_MITIGATION_RETPOLINE @@ -63,7 +67,7 @@ endif endif =20 $(vobjs): KBUILD_CFLAGS :=3D $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO= ) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CF= LAGS),$(KBUILD_CFLAGS)) $(CFL) -$(vobjs): KBUILD_AFLAGS +=3D -DBUILD_VDSO +$(vobjs): KBUILD_AFLAGS +=3D -DBUILD_VDSO $(SFRAME_CFLAGS) =20 # # vDSO code runs in userspace and -pg doesn't help with profiling anyway. @@ -104,7 +108,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE =20 targets +=3D vdsox32.lds $(vobjx32s-y) =20 -$(obj)/%.so: OBJCOPYFLAGS :=3D -S --remove-section __ex_table +$(obj)/%.so: OBJCOPYFLAGS :=3D -g --remove-section __ex_table $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) =20 diff --git a/arch/x86/entry/vdso/vdso-layout.lds.S b/arch/x86/entry/vdso/vd= so-layout.lds.S index 506c9800a5aa..4dcde4747b07 100644 --- a/arch/x86/entry/vdso/vdso-layout.lds.S +++ b/arch/x86/entry/vdso/vdso-layout.lds.S @@ -63,6 +63,7 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text =20 + .sframe : { *(.sframe) } :text :sframe =20 /* * Text is well-separated from actual data: there's plenty of @@ -91,6 +92,7 @@ SECTIONS * Very old versions of ld do not recognize this name token; use the const= ant. */ #define PT_GNU_EH_FRAME 0x6474e550 +#define PT_GNU_SFRAME 0x6474e554 =20 /* * We must supply the ELF program headers explicitly to get just one @@ -102,4 +104,5 @@ PHDRS dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ note PT_NOTE FLAGS(4); /* PF_R */ eh_frame_hdr PT_GNU_EH_FRAME; + sframe PT_GNU_SFRAME; } diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index b195b3c8677e..1c354f648505 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -12,8 +12,11 @@ * For the vDSO, emit both runtime unwind information and debug * symbols for the .dbg file. */ - +#ifdef __x86_64__ + .cfi_sections .eh_frame, .debug_frame, .sframe +#else .cfi_sections .eh_frame, .debug_frame +#endif =20 #define CFI_STARTPROC .cfi_startproc #define CFI_ENDPROC .cfi_endproc --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1C8F61B6CE0; Wed, 22 Jan 2025 02:32:02 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513123; cv=none; b=ncyFZ2kRBO5lcC8Gxssg3vbprKp0pYZmmdp/k9OwGsWemhvAH/iZ6g7kf3RlsQrma8Ror/TxmAJACOf9bGTeaA+SEgFUdZ1cldRrUWTXXvoN+iB3LR21Ua537D6rh5giK8/gv+XLcCeSrNsX6mCgURiJehg8oc6tGDtpTObya08= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513123; c=relaxed/simple; bh=Rb/Fdy6T2RvQ4lc9AaJaiRjGQahqOtSOdksdssQWDWA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=q1lGBEbK4iqCAwdWvzWDmGMTgXidQb8qerflVvrjQhoYi9bFW59k2Vr1dxWP0pOAX8zKgiYIUlukZhsXWCzxTROgC0QxXSer51yWsg7U4EwOuCXkwW4BaX/HDG6Stoy3JVITqtb05CsyPeLu6xCbp5oeplSa3SF1uwbdHaSQFqQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=BM+YI4AG; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="BM+YI4AG" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BE35EC4CEE4; Wed, 22 Jan 2025 02:32:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513122; bh=Rb/Fdy6T2RvQ4lc9AaJaiRjGQahqOtSOdksdssQWDWA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=BM+YI4AGwvkVjpQMlBvBgOnJsDNeM2cGUoYOee2uR/CUO33NnZohdIUtSA/CacY3l iVOfE4isUyPIKGBmecbXZ2d/Abqfk8xen9wnbsLe0wWU9sM4foOA+TMIuBJ/LjsFyV Mh6klpM7MCfiY7AfJIc7VvcrmD/ElbMQShPCaw0xTvappUagYABXPfT5Juy1nO2y91 xRLR+qqxs+TOIwWx8wu75/8/CpWe9wbiUMUtKb7jH/cEdOEkAo+Uq73dDdoDegrQ8/ 3e1E893BM+jghHeY/GLP+Bf32Q/Lq5oUixUuBpGOgLopDIc0UI9lgNQGIw2CmlE1S6 YP8A6XXSJbovw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 10/39] x86/uaccess: Add unsafe_copy_from_user() implementation Date: Tue, 21 Jan 2025 18:31:02 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add an x86 implementation of unsafe_copy_from_user() similar to the existing unsafe_copy_to_user(). Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/uaccess.h | 39 +++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 3a7755c1a441..a3148865bc57 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -599,23 +599,42 @@ _label: \ * We want the unsafe accessors to always be inlined and use * the error labels - thus the macro games. */ -#define unsafe_copy_loop(dst, src, len, type, label) \ +#define unsafe_copy_to_user_loop(dst, src, len, type, label) \ while (len >=3D sizeof(type)) { \ - unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + unsafe_put_user(*(type *)(src), (type __user *)(dst), label); \ dst +=3D sizeof(type); \ src +=3D sizeof(type); \ len -=3D sizeof(type); \ } =20 -#define unsafe_copy_to_user(_dst,_src,_len,label) \ +#define unsafe_copy_to_user(_dst, _src, _len, label) \ do { \ - char __user *__ucu_dst =3D (_dst); \ - const char *__ucu_src =3D (_src); \ - size_t __ucu_len =3D (_len); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ - unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ + void __user *__dst =3D (_dst); \ + const void *__src =3D (_src); \ + size_t __len =3D (_len); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u64, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u32, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u16, label); \ + unsafe_copy_to_user_loop(__dst, __src, __len, u8, label); \ +} while (0) + +#define unsafe_copy_from_user_loop(dst, src, len, type, label) \ + while (len >=3D sizeof(type)) { \ + unsafe_get_user(*(type *)(dst),(type __user *)(src),label); \ + dst +=3D sizeof(type); \ + src +=3D sizeof(type); \ + len -=3D sizeof(type); \ + } + +#define unsafe_copy_from_user(_dst, _src, _len, label) \ +do { \ + void *__dst =3D (_dst); \ + void __user *__src =3D (_src); \ + size_t __len =3D (_len); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u64, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u32, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u16, label); \ + unsafe_copy_from_user_loop(__dst, __src, __len, u8, label); \ } while (0) =20 #ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 308CF192B86; Wed, 22 Jan 2025 02:32:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513124; cv=none; b=t6QOJe2IlIdzARU/G0mJ0HbE//NOogxRsuKclQl9+tCbqv5h/Q+el3g0QcEauwVq2sYWB8XkzhSGmF6/s7SwApVpvDCvww67ZTAv7FIP0qbMmHUiQuA54CLvlFpcnyq5qvnN3ixTBhJYn5EV5i+FB46L3AfXjBiwAr3ojMvryRM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513124; c=relaxed/simple; bh=ix2XcZ5VMYtQ68FRQqmYx9QXXGvDBdWOmsT7awQxxoQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jYJ+wl0tAGRL4+DBI/crfYsXVFuh8iIsVoD9e3EODXyiazrEx40h2bSdH3JjC3KTeuUhrm/wmfKjMyFx7s1SX6nzIArooQ2/JguqqgXNxZJnOsgbit2WK4B8KNgRbRriuGpDEo2uyHFObA1rjR9Y7PwdWkRhVhYbPUGjUsFLZVM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=AQcjOkc4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="AQcjOkc4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C6A0DC4CEE9; Wed, 22 Jan 2025 02:32:02 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513123; bh=ix2XcZ5VMYtQ68FRQqmYx9QXXGvDBdWOmsT7awQxxoQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=AQcjOkc4g8JYkvLVn78eY7+6n69/l2G6sOo79t3OzD1vkNwN5Z5PhfZFQc50ZTv96 o4b7Y0kLuKZB6e3DaOQA1BmSgUg9tTmoOXKqsB48CVG0HZIFzPHwyJhjQ60Bg2nzZA BEx8ryPRQCmjTFityV1e4AUMIhs97pJg1JZJQxsh6atoVncnS7022xSY/pRvFJMzkI VmZCScom//sxBruSnzBnnXEpSpaGyZHfE7/cbPMH/N5ARUJuUfyJ3A8DlXyJVx99HW ageLCG+gdG2BbnbnH9Qm+VDRWYrQm3FCHxwZXkLrtl60H0H6VcNcMm/zd006MxdoSw VQ2hHnQdzulvg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 11/39] unwind_user: Add user space unwinding API Date: Tue, 21 Jan 2025 18:31:03 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Introduce a generic API for unwinding user stacks. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 3 ++ include/linux/unwind_user.h | 15 ++++++++ include/linux/unwind_user_types.h | 31 ++++++++++++++++ kernel/Makefile | 1 + kernel/unwind/Makefile | 1 + kernel/unwind/user.c | 59 +++++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+) create mode 100644 include/linux/unwind_user.h create mode 100644 include/linux/unwind_user_types.h create mode 100644 kernel/unwind/Makefile create mode 100644 kernel/unwind/user.c diff --git a/arch/Kconfig b/arch/Kconfig index 65228c78fef0..c6fa2b3ecbc6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -435,6 +435,9 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH It uses the same command line parameters, and sysctl interface, as the generic hardlockup detectors. =20 +config UNWIND_USER + bool + config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) =20 diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h new file mode 100644 index 000000000000..aa7923c1384f --- /dev/null +++ b/include/linux/unwind_user.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_H +#define _LINUX_UNWIND_USER_H + +#include + +int unwind_user_start(struct unwind_user_state *state); +int unwind_user_next(struct unwind_user_state *state); + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries); + +#define for_each_user_frame(state) \ + for (unwind_user_start((state)); !(state)->done; unwind_user_next((state)= )) + +#endif /* _LINUX_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_= types.h new file mode 100644 index 000000000000..6ed1b4ae74e1 --- /dev/null +++ b/include/linux/unwind_user_types.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_TYPES_H +#define _LINUX_UNWIND_USER_TYPES_H + +#include + +enum unwind_user_type { + UNWIND_USER_TYPE_NONE, +}; + +struct unwind_stacktrace { + unsigned int nr; + unsigned long *entries; +}; + +struct unwind_user_frame { + s32 cfa_off; + s32 ra_off; + s32 fp_off; + bool use_fp; +}; + +struct unwind_user_state { + unsigned long ip; + unsigned long sp; + unsigned long fp; + enum unwind_user_type type; + bool done; +}; + +#endif /* _LINUX_UNWIND_USER_TYPES_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 87866b037fbe..6cb4b0e02a34 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -50,6 +50,7 @@ obj-y +=3D rcu/ obj-y +=3D livepatch/ obj-y +=3D dma/ obj-y +=3D entry/ +obj-y +=3D unwind/ obj-$(CONFIG_MODULES) +=3D module/ =20 obj-$(CONFIG_KCMP) +=3D kcmp.o diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile new file mode 100644 index 000000000000..349ce3677526 --- /dev/null +++ b/kernel/unwind/Makefile @@ -0,0 +1 @@ + obj-$(CONFIG_UNWIND_USER) +=3D user.o diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c new file mode 100644 index 000000000000..456539635e49 --- /dev/null +++ b/kernel/unwind/user.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* +* Generic interfaces for unwinding user space +*/ +#include +#include +#include +#include + +int unwind_user_next(struct unwind_user_state *state) +{ + struct unwind_user_frame _frame; + struct unwind_user_frame *frame =3D &_frame; + unsigned long cfa =3D 0, fp, ra =3D 0; + + /* no implementation yet */ + -EINVAL; +} + +int unwind_user_start(struct unwind_user_state *state) +{ + struct pt_regs *regs =3D task_pt_regs(current); + + memset(state, 0, sizeof(*state)); + + if (!current->mm || !user_mode(regs)) { + state->done =3D true; + return -EINVAL; + } + + state->type =3D UNWIND_USER_TYPE_NONE; + + state->ip =3D instruction_pointer(regs); + state->sp =3D user_stack_pointer(regs); + state->fp =3D frame_pointer(regs); + + return 0; +} + +int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries) +{ + struct unwind_user_state state; + + trace->nr =3D 0; + + if (!max_entries) + return -EINVAL; + + if (!current->mm) + return 0; + + for_each_user_frame(&state) { + trace->entries[trace->nr++] =3D state.ip; + if (trace->nr >=3D max_entries) + break; + } + + return 0; +} --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3FAAB1BD9FA; Wed, 22 Jan 2025 02:32:04 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513125; cv=none; b=rVWA1T8tEeKJzZiBMyHCvWSZdZX1RPU9h2Zv37WB+il2QnN5O1mZwv6gBJsM38MadTV5g9i3vHQGJS0RSa5eZjaEFYfKfRWV3KhSaXuFDgsKmy2s7N7t/UEemO4+CMrDTDa7M1mNbKIC48P0DhVBGG9s48UftYYWyUSgqP6xNWU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513125; c=relaxed/simple; bh=NjIdqBVmYAQ7mWEQSrSZ02FTmhQECUp5P6XRtsIceGU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qrrYsXFpbqBOWb2oqWyMXRkiF4QZ7C0qE8fZZJsUz6EfhmA5Oa6hab0vJxF8Mlsz8r0R/5MAZiggJ0sol0y6Z0kYVsnagT6HKb/AgLVaMgzNJA21HoMep56NDJbb35TuTK+9S1tN8nqgKCXeBWK0EMdBjUe+UQUC4vmrIpPMw7k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pckVlrQ1; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pckVlrQ1" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D18D0C4CEE7; Wed, 22 Jan 2025 02:32:03 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513124; bh=NjIdqBVmYAQ7mWEQSrSZ02FTmhQECUp5P6XRtsIceGU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pckVlrQ1vgKOpT0BbFE97cjfgOkHHWnebyOfei5aVlM170fXzkmOAfMBzUb6kWE2z uJkw3IZYZ8238jnksj44OY0h8/m/xOVR7Qpv8YQUWTY1zMrV5UGsOWM0oe38zeP/Av zhGGURsI7lrnGLJSG358a/2Z01TuBss2o1PYU8XAa3lI89aL3/YQa2uWmkqxb5ldVl I90m6IwWzfxss7JHZ+Uo4EJe1X+9wMhE25ZtYNwRxlVkr0JpM+bJ56ZRWhs0XbDPO4 7XN7p8pSEA47/CenVpcERxc9jA3dC36LaM+BC/TahLjZ6KBmIkdVyE38s/D2Jmn1j0 ck0Mn157WDzDA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 12/39] unwind_user: Add frame pointer support Date: Tue, 21 Jan 2025 18:31:04 -0800 Message-ID: <10b92f2fd065b67e6e3fd473ca145c34ea74b73a.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add optional support for user space frame pointer unwinding. If supported, the arch needs to enable CONFIG_HAVE_UNWIND_USER_FP and define ARCH_INIT_USER_FP_FRAME. By encoding the frame offsets in struct unwind_user_frame, much of this code can also be reused for future unwinder implementations like sframe. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 4 +++ include/asm-generic/unwind_user.h | 9 ++++++ include/linux/unwind_user_types.h | 1 + kernel/unwind/user.c | 49 +++++++++++++++++++++++++++++-- 4 files changed, 60 insertions(+), 3 deletions(-) create mode 100644 include/asm-generic/unwind_user.h diff --git a/arch/Kconfig b/arch/Kconfig index c6fa2b3ecbc6..cf996cbb8142 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -438,6 +438,10 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH config UNWIND_USER bool =20 +config HAVE_UNWIND_USER_FP + bool + select UNWIND_USER + config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) =20 diff --git a/include/asm-generic/unwind_user.h b/include/asm-generic/unwind= _user.h new file mode 100644 index 000000000000..832425502fb3 --- /dev/null +++ b/include/asm-generic/unwind_user.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_UNWIND_USER_H +#define _ASM_GENERIC_UNWIND_USER_H + +#ifndef ARCH_INIT_USER_FP_FRAME + #define ARCH_INIT_USER_FP_FRAME +#endif + +#endif /* _ASM_GENERIC_UNWIND_USER_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_= types.h index 6ed1b4ae74e1..65bd070eb6b0 100644 --- a/include/linux/unwind_user_types.h +++ b/include/linux/unwind_user_types.h @@ -6,6 +6,7 @@ =20 enum unwind_user_type { UNWIND_USER_TYPE_NONE, + UNWIND_USER_TYPE_FP, }; =20 struct unwind_stacktrace { diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c index 456539635e49..73fd4e150dfd 100644 --- a/kernel/unwind/user.c +++ b/kernel/unwind/user.c @@ -6,6 +6,18 @@ #include #include #include +#include +#include + +static struct unwind_user_frame fp_frame =3D { + ARCH_INIT_USER_FP_FRAME +}; + +static inline bool fp_state(struct unwind_user_state *state) +{ + return IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP) && + state->type =3D=3D UNWIND_USER_TYPE_FP; +} =20 int unwind_user_next(struct unwind_user_state *state) { @@ -13,8 +25,36 @@ int unwind_user_next(struct unwind_user_state *state) struct unwind_user_frame *frame =3D &_frame; unsigned long cfa =3D 0, fp, ra =3D 0; =20 - /* no implementation yet */ - -EINVAL; + if (state->done) + return -EINVAL; + + if (fp_state(state)) + frame =3D &fp_frame; + else + goto the_end; + + cfa =3D (frame->use_fp ? state->fp : state->sp) + frame->cfa_off; + + /* stack going in wrong direction? */ + if (cfa <=3D state->sp) + goto the_end; + + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) + goto the_end; + + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->f= p_off))) + goto the_end; + + state->ip =3D ra; + state->sp =3D cfa; + if (frame->fp_off) + state->fp =3D fp; + + return 0; + +the_end: + state->done =3D true; + return -EINVAL; } =20 int unwind_user_start(struct unwind_user_state *state) @@ -28,7 +68,10 @@ int unwind_user_start(struct unwind_user_state *state) return -EINVAL; } =20 - state->type =3D UNWIND_USER_TYPE_NONE; + if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) + state->type =3D UNWIND_USER_TYPE_FP; + else + state->type =3D UNWIND_USER_TYPE_NONE; =20 state->ip =3D instruction_pointer(regs); state->sp =3D user_stack_pointer(regs); --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 508E41B6CE0; Wed, 22 Jan 2025 02:32:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513126; cv=none; b=BKzJ1eHiWMNJse6uwH35hpHlXd+3paGnFNZ1nHUS+nB3tqaMsymsjayWiqVkB1QqbAPMZ1yCPTQIbzTy+UU5X+HXKAaIGfGL+Ts5wSSU1GoWs0nMZEUxP16YR0MVYu/FO1OsH3DiSL9WdKlq9iHo8JrUqxcbpfUzzxmwsIWu2Vk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513126; c=relaxed/simple; bh=rLJ7KZsRwojmk/ejrgM3MQsN9tDy+0R5j1wM8gUNGQw=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Rap1zCUqRUwPOzdNAjpiTsojXbu1TF4FF+E/cAL2Sc6knv1pXuTb86e+6zEDf4T2l8y7YeiwIBu4SxJ6wDgb8u/SQ3TJT5v17cVy48aR7hwSEjfwSmfNhj4QsD0yO+3EZWt2W1Tr9ZP4K40iTN/moJAWNfwF5uvqYFmqKRojBz8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cFVuknDX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cFVuknDX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D7DFCC4CEE9; Wed, 22 Jan 2025 02:32:04 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513125; bh=rLJ7KZsRwojmk/ejrgM3MQsN9tDy+0R5j1wM8gUNGQw=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cFVuknDXNZ+8il6w7f6ksU+OCs2tR4iE5Tvn8UaSWPOwyaWBOAfxp9Q4nKTPovAYD YN/azRavYZ6DC95cZ0VDRx7gl4K4Z05A+qsPurWBIN83t5u8RivmGMIAMZCOk1rsHe ZLeZEeGMQUvMQm8Veqki974p0ssevzOBBOThEyqLQiXO1c7IiEZ3z2ZoOYj7f1n9l5 1P7IBkA2WkjXA4sFaVZNmhdz5eXqc/XS7Nl5u5DoD9e3rwXvtphhnHkv2WUmYfgsnf 3Dub6p0YFIXEzNzb1x30A6y2ISJDxR0bV2quydbpVCaxfFIqVy/eplYEjauMf9Azcb ul4lZABjNzb3g== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 13/39] unwind_user/x86: Enable frame pointer unwinding on x86 Date: Tue, 21 Jan 2025 18:31:05 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Use ARCH_INIT_USER_FP_FRAME to describe how frame pointers are unwound on x86, and enable CONFIG_HAVE_UNWIND_USER_FP accordingly so the unwind_user interfaces can be used. Signed-off-by: Josh Poimboeuf --- arch/x86/Kconfig | 1 + arch/x86/include/asm/unwind_user.h | 11 +++++++++++ 2 files changed, 12 insertions(+) create mode 100644 arch/x86/include/asm/unwind_user.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ef6cfea9df73..f938b957a927 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -291,6 +291,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_UNWIND_USER_FP if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO select VDSO_GETRANDOM if X86_64 diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwi= nd_user.h new file mode 100644 index 000000000000..8597857bf896 --- /dev/null +++ b/arch/x86/include/asm/unwind_user.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_UNWIND_USER_H +#define _ASM_X86_UNWIND_USER_H + +#define ARCH_INIT_USER_FP_FRAME \ + .cfa_off =3D (s32)sizeof(long) * 2, \ + .ra_off =3D (s32)sizeof(long) * -1, \ + .fp_off =3D (s32)sizeof(long) * -2, \ + .use_fp =3D true, + +#endif /* _ASM_X86_UNWIND_USER_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 478101C2304; Wed, 22 Jan 2025 02:32:06 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513127; cv=none; b=H3Tn/WxT0561kKjf/kI3wCLpEo/tI6FgPmNglBqOVsWVhD1V2fn0ssnP7UVjOqkVp44C5RvTe/NTKmvXBaYsWqUjpfjsWauj0Y1qXg7tEd/bZazpXVjxwQ0v5hCzl1qYQq2xT3KFQsVeWb2zbNTl8lxwKbdDXpmyRQr7OwSLqNg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513127; c=relaxed/simple; bh=GXuPggdOdnrO7sypEn586aKnLMgz6Grse8Wk7Tzz2xk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=LZ2umTpam248s/7I/ERzhwuQDPl6AnNIdSRjjMDdRMLGAH4RSQApdbiZLGej/FBh4i1XQbvGXeUEl82zCOPPtayM/JgWpsXHTHrGys9cScqXmcmS1+/n3Jx2892eD0NZZpwnTfCuk5wSTXy70VM7+TceBjl4UaNsDJTxinQRORk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Dd+hZewj; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Dd+hZewj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DD98DC4CEE0; Wed, 22 Jan 2025 02:32:05 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513126; bh=GXuPggdOdnrO7sypEn586aKnLMgz6Grse8Wk7Tzz2xk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Dd+hZewjudMKRsvbtsDX3Q6bDYlpOmRBdrM8soW2pRqP151QF/RJmRnvhkZnHjMGv UR13luqOtqrGAfvEcxZaBmGz23L8ZskIWY13W+XrdgOBAR3G6EEs92Qx90Yw2lcW7y WaLlGl+JTvG4GHY5vVirFq4M2z4hNxOPvB+fiaFxTqPDjcJvRNvNch4GWCYkZ7rRrC M8kz7zq+GUpS0spYKqvVWI4IBFXpXoM5hMP+yfW1RJin5EDqzQgz+VjQxftcuRGWt2 nAMMbvkTHPYFG2Owh+/Q4Wl/uig1RNBWCNDHNQGdWYlXjQj3RP++7Hszi4Xm2CDZen QvmLsHM7jdM2w== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 14/39] perf/x86: Rename get_segment_base() and make it global Date: Tue, 21 Jan 2025 18:31:06 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" get_segment_base() will be used by the unwind_user code, so make it global and rename it so it doesn't conflict with a KVM function of the same name. Signed-off-by: Josh Poimboeuf --- arch/x86/events/core.c | 10 +++++----- arch/x86/include/asm/perf_event.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index c75c482d4c52..23ac6343cf86 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2790,7 +2790,7 @@ valid_user_frame(const void __user *fp, unsigned long= size) return __access_ok(fp, size); } =20 -static unsigned long get_segment_base(unsigned int segment) +unsigned long segment_base_address(unsigned int segment) { struct desc_struct *desc; unsigned int idx =3D segment >> 3; @@ -2874,8 +2874,8 @@ perf_callchain_user32(struct pt_regs *regs, struct pe= rf_callchain_entry_ctx *ent if (user_64bit_mode(regs)) return 0; =20 - cs_base =3D get_segment_base(regs->cs); - ss_base =3D get_segment_base(regs->ss); + cs_base =3D segment_base_address(regs->cs); + ss_base =3D segment_base_address(regs->ss); =20 fp =3D compat_ptr(ss_base + regs->bp); pagefault_disable(); @@ -2994,11 +2994,11 @@ static unsigned long code_segment_base(struct pt_re= gs *regs) return 0x10 * regs->cs; =20 if (user_mode(regs) && regs->cs !=3D __USER_CS) - return get_segment_base(regs->cs); + return segment_base_address(regs->cs); #else if (user_mode(regs) && !user_64bit_mode(regs) && regs->cs !=3D __USER32_CS) - return get_segment_base(regs->cs); + return segment_base_address(regs->cs); #endif return 0; } diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_= event.h index d95f902acc52..75956c68356f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -639,4 +639,6 @@ static __always_inline void perf_lopwr_cb(bool lopwr_in) =20 #define arch_perf_out_copy_user copy_from_user_nmi =20 +unsigned long segment_base_address(unsigned int segment); + #endif /* _ASM_X86_PERF_EVENT_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 674611C3C18; Wed, 22 Jan 2025 02:32:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513128; cv=none; b=XBhcEe8eCx+ocORbOzqVP9CNkJOG4as/toDfsD6/Xsawit5bh5zqYIQEWPjCPhMI/FIjIMHHPGK3vEUodrq8OX4eN4PzgAjxoKFPbbcRTXjYkMi1lianiB4Cngnyt6ScKq9eSG9LmMrnBBFszADweU6J9r9lvBLIWmebHABIvpo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513128; c=relaxed/simple; bh=Yq6Sy9F8GLI0+jx6f9TZDl5/tz4xxEMoL/ncBzNK5EU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=DWxOdMyIaDaND9789eh72t81X3cupXpImYmVpBBjP8k8NVYvQscQvblzhvKCntatjOiz48mPd5YcWHvdT7l3QqisG4XaBc4Xf2tsnTTHRa9VgLAntv9CFCiuixiiZ5Ao1rx7DKhMZ/7UWg+t+L0RM6UE71Iyf+ZabXnqCT1+XkU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=leVaio9w; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="leVaio9w" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EEEC9C4CEE6; Wed, 22 Jan 2025 02:32:06 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513127; bh=Yq6Sy9F8GLI0+jx6f9TZDl5/tz4xxEMoL/ncBzNK5EU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=leVaio9wlFZK8kle7zzEDn1b/dLayauALuaQlno+M7AUPnlfLcT1PvoY0bqEaogoP nypOCVPGKMKOmQCnVVYpBaHcNYdN5sdtfpfEaRmAvMkB4B1kuBesoQpTuUFlIjhdBk uh4MiTZa90qDhdjXfFw7rQezwhhnC0teKoz72n2+vsEOUIDLiiBcOpx//w5W6RkFqK d9GsvKhahcg1zzcSeBrhNU8CZiqKh4sIwizW9Fq9DnIiv7nht1VJRg3OS4/YTHgm+0 00WKv4ISj+nwCE13ToXvY5Ue1wnzBULztwV7aEd0EP0TiPdQXGUZl7jf0PjADwOhJ6 OO22hc85iyzyA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 15/39] unwind_user: Add compat mode frame pointer support Date: Tue, 21 Jan 2025 18:31:07 -0800 Message-ID: <252c260bc65005fc3091b2bb9e0aabc2e5a6cea3.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add optional support for user space compat mode frame pointer unwinding. If supported, the arch needs to enable CONFIG_HAVE_UNWIND_USER_COMPAT_FP and define ARCH_INIT_USER_COMPAT_FP_FRAME. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 4 +++ include/asm-generic/Kbuild | 2 ++ include/asm-generic/unwind_user.h | 15 +++++++++++ include/asm-generic/unwind_user_types.h | 9 +++++++ include/linux/unwind_user_types.h | 3 +++ kernel/unwind/user.c | 36 ++++++++++++++++++++++--- 6 files changed, 65 insertions(+), 4 deletions(-) create mode 100644 include/asm-generic/unwind_user_types.h diff --git a/arch/Kconfig b/arch/Kconfig index cf996cbb8142..f1f7a3857c97 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -442,6 +442,10 @@ config HAVE_UNWIND_USER_FP bool select UNWIND_USER =20 +config HAVE_UNWIND_USER_COMPAT_FP + bool + depends on HAVE_UNWIND_USER_FP + config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) =20 diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 1b43c3a77012..2f3e4e2d8610 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -58,6 +58,8 @@ mandatory-y +=3D tlbflush.h mandatory-y +=3D topology.h mandatory-y +=3D trace_clock.h mandatory-y +=3D uaccess.h +mandatory-y +=3D unwind_user.h +mandatory-y +=3D unwind_user_types.h mandatory-y +=3D vermagic.h mandatory-y +=3D vga.h mandatory-y +=3D video.h diff --git a/include/asm-generic/unwind_user.h b/include/asm-generic/unwind= _user.h index 832425502fb3..385638ce4aec 100644 --- a/include/asm-generic/unwind_user.h +++ b/include/asm-generic/unwind_user.h @@ -2,8 +2,23 @@ #ifndef _ASM_GENERIC_UNWIND_USER_H #define _ASM_GENERIC_UNWIND_USER_H =20 +#include + #ifndef ARCH_INIT_USER_FP_FRAME #define ARCH_INIT_USER_FP_FRAME #endif =20 +#ifndef ARCH_INIT_USER_COMPAT_FP_FRAME + #define ARCH_INIT_USER_COMPAT_FP_FRAME + #define in_compat_mode(regs) false +#endif + +#ifndef arch_unwind_user_init +static inline void arch_unwind_user_init(struct unwind_user_state *state, = struct pt_regs *reg) {} +#endif + +#ifndef arch_unwind_user_next +static inline void arch_unwind_user_next(struct unwind_user_state *state) = {} +#endif + #endif /* _ASM_GENERIC_UNWIND_USER_H */ diff --git a/include/asm-generic/unwind_user_types.h b/include/asm-generic/= unwind_user_types.h new file mode 100644 index 000000000000..ee803de7c998 --- /dev/null +++ b/include/asm-generic/unwind_user_types.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_GENERIC_UNWIND_USER_TYPES_H +#define _ASM_GENERIC_UNWIND_USER_TYPES_H + +#ifndef arch_unwind_user_state +struct arch_unwind_user_state {}; +#endif + +#endif /* _ASM_GENERIC_UNWIND_USER_TYPES_H */ diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_= types.h index 65bd070eb6b0..3ec4a097a3dd 100644 --- a/include/linux/unwind_user_types.h +++ b/include/linux/unwind_user_types.h @@ -3,10 +3,12 @@ #define _LINUX_UNWIND_USER_TYPES_H =20 #include +#include =20 enum unwind_user_type { UNWIND_USER_TYPE_NONE, UNWIND_USER_TYPE_FP, + UNWIND_USER_TYPE_COMPAT_FP, }; =20 struct unwind_stacktrace { @@ -25,6 +27,7 @@ struct unwind_user_state { unsigned long ip; unsigned long sp; unsigned long fp; + struct arch_unwind_user_state arch; enum unwind_user_type type; bool done; }; diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c index 73fd4e150dfd..92963f129c6a 100644 --- a/kernel/unwind/user.c +++ b/kernel/unwind/user.c @@ -13,12 +13,32 @@ static struct unwind_user_frame fp_frame =3D { ARCH_INIT_USER_FP_FRAME }; =20 +static struct unwind_user_frame compat_fp_frame =3D { + ARCH_INIT_USER_COMPAT_FP_FRAME +}; + static inline bool fp_state(struct unwind_user_state *state) { return IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP) && state->type =3D=3D UNWIND_USER_TYPE_FP; } =20 +static inline bool compat_state(struct unwind_user_state *state) +{ + return IS_ENABLED(CONFIG_HAVE_UNWIND_USER_COMPAT_FP) && + state->type =3D=3D UNWIND_USER_TYPE_COMPAT_FP; +} + +#define UNWIND_GET_USER_LONG(to, from, state) \ +({ \ + int __ret; \ + if (compat_state(state)) \ + __ret =3D get_user(to, (u32 __user *)(from)); \ + else \ + __ret =3D get_user(to, (u64 __user *)(from)); \ + __ret; \ +}) + int unwind_user_next(struct unwind_user_state *state) { struct unwind_user_frame _frame; @@ -28,7 +48,9 @@ int unwind_user_next(struct unwind_user_state *state) if (state->done) return -EINVAL; =20 - if (fp_state(state)) + if (compat_state(state)) + frame =3D &compat_fp_frame; + else if (fp_state(state)) frame =3D &fp_frame; else goto the_end; @@ -39,10 +61,10 @@ int unwind_user_next(struct unwind_user_state *state) if (cfa <=3D state->sp) goto the_end; =20 - if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) + if (UNWIND_GET_USER_LONG(ra, cfa + frame->ra_off, state)) goto the_end; =20 - if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->f= p_off))) + if (frame->fp_off && UNWIND_GET_USER_LONG(fp, cfa + frame->fp_off, state)) goto the_end; =20 state->ip =3D ra; @@ -50,6 +72,8 @@ int unwind_user_next(struct unwind_user_state *state) if (frame->fp_off) state->fp =3D fp; =20 + arch_unwind_user_next(state); + return 0; =20 the_end: @@ -68,7 +92,9 @@ int unwind_user_start(struct unwind_user_state *state) return -EINVAL; } =20 - if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) + if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_COMPAT_FP) && in_compat_mode(regs)) + state->type =3D UNWIND_USER_TYPE_COMPAT_FP; + else if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) state->type =3D UNWIND_USER_TYPE_FP; else state->type =3D UNWIND_USER_TYPE_NONE; @@ -77,6 +103,8 @@ int unwind_user_start(struct unwind_user_state *state) state->sp =3D user_stack_pointer(regs); state->fp =3D frame_pointer(regs); =20 + arch_unwind_user_init(state, regs); + return 0; } =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5B1A91C4A26; Wed, 22 Jan 2025 02:32:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513129; cv=none; b=OFXO7dL7SlK2HFO9HLNl7by7RXG1PgY4/2lXnUbmSCyltph0R8kDu08yqGk4SYXb8t+Kg1blDCs5biN52MMkOwRaxkcKc6jpG/OAddvoPzQki7P8B02n58kRaFbgCRUSu/3/ng4TMxSeC8HKYTO9Zmz7kDTMpVmCr/nLK0YkrM4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513129; c=relaxed/simple; bh=t52ezAu10lmnZqY9cRbRZaguDtc+L+Sgw2qfNRQTEsE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Bi8Q7cYrVMp6hfCGX9pz+Pm28fpMhV0cCFEPMTFTqBFZ4qn3x0um2DrPCYDPp/TkDdRD1Sc3+wRvSBbO78+Tq/VghEDWt2Toh+JkGvNSSCgGaBZiNMZU6TSrCYXMpWV0CAMHmbgifXfnw8b1JwywdxCfYEI8DajhuQMJojLV8dY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=hFgOz+tW; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="hFgOz+tW" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0B71BC4CEE0; Wed, 22 Jan 2025 02:32:08 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513128; bh=t52ezAu10lmnZqY9cRbRZaguDtc+L+Sgw2qfNRQTEsE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=hFgOz+tWSPmnTQrzGGwrfLgXFljDgDbpnXYTp4IZi420uDzdW7FEpGlTS1CRbW0ZW ao6ZslnLexANtEKuKz1xf+oRf0ZtsmjOm3sBk4+28I37msM/Sle3nFEPXR/AUuVrd0 U9BugDjFFpuSTqH2DENNGMgb8W+3kIhUAZwGG2HammSaTh7yt8Yg5RSfKdZmbtUXbE mzZVHDPdWieuSJPrI6o6fcCkCy1ZJJ8RVeeicdSLrCZrEnN2SM8sAcHwm60d7W1/cb pLV6mZPuMiQmAwV+gJ4ouWf6qIcL26whZfiRbomnaC4ySK4fRBTvwF/ZjPriCbJmOR qtN7ByEVX5CTw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 16/39] unwind_user/x86: Enable compat mode frame pointer unwinding on x86 Date: Tue, 21 Jan 2025 18:31:08 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Use ARCH_INIT_USER_COMPAT_FP_FRAME to describe how frame pointers are unwound on x86, and implement the hooks needed to add the segment base addresses. Enable HAVE_UNWIND_USER_COMPAT_FP if the system has compat mode compiled in. Signed-off-by: Josh Poimboeuf --- arch/x86/Kconfig | 1 + arch/x86/include/asm/unwind_user.h | 50 ++++++++++++++++++++++++ arch/x86/include/asm/unwind_user_types.h | 17 ++++++++ 3 files changed, 68 insertions(+) create mode 100644 arch/x86/include/asm/unwind_user_types.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f938b957a927..08c44db0fefb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -291,6 +291,7 @@ config X86 select HAVE_SYSCALL_TRACEPOINTS select HAVE_UACCESS_VALIDATION if HAVE_OBJTOOL select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_UNWIND_USER_COMPAT_FP if IA32_EMULATION select HAVE_UNWIND_USER_FP if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO diff --git a/arch/x86/include/asm/unwind_user.h b/arch/x86/include/asm/unwi= nd_user.h index 8597857bf896..bb1148111259 100644 --- a/arch/x86/include/asm/unwind_user.h +++ b/arch/x86/include/asm/unwind_user.h @@ -2,10 +2,60 @@ #ifndef _ASM_X86_UNWIND_USER_H #define _ASM_X86_UNWIND_USER_H =20 +#include +#include +#include + #define ARCH_INIT_USER_FP_FRAME \ .cfa_off =3D (s32)sizeof(long) * 2, \ .ra_off =3D (s32)sizeof(long) * -1, \ .fp_off =3D (s32)sizeof(long) * -2, \ .use_fp =3D true, =20 +#ifdef CONFIG_IA32_EMULATION + +#define ARCH_INIT_USER_COMPAT_FP_FRAME \ + .cfa_off =3D (s32)sizeof(u32) * 2, \ + .ra_off =3D (s32)sizeof(u32) * -1, \ + .fp_off =3D (s32)sizeof(u32) * -2, \ + .use_fp =3D true, + +#define in_compat_mode(regs) !user_64bit_mode(regs) + +static inline void arch_unwind_user_init(struct unwind_user_state *state, + struct pt_regs *regs) +{ + unsigned long cs_base, ss_base; + + if (state->type !=3D UNWIND_USER_TYPE_COMPAT_FP) + return; + + scoped_guard(irqsave) { + cs_base =3D segment_base_address(regs->cs); + ss_base =3D segment_base_address(regs->ss); + } + + state->arch.cs_base =3D cs_base; + state->arch.ss_base =3D ss_base; + + state->ip +=3D cs_base; + state->sp +=3D ss_base; + state->fp +=3D ss_base; +} +#define arch_unwind_user_init arch_unwind_user_init + +static inline void arch_unwind_user_next(struct unwind_user_state *state) +{ + if (state->type !=3D UNWIND_USER_TYPE_COMPAT_FP) + return; + + state->ip +=3D state->arch.cs_base; + state->fp +=3D state->arch.ss_base; +} +#define arch_unwind_user_next arch_unwind_user_next + +#endif /* CONFIG_IA32_EMULATION */ + +#include + #endif /* _ASM_X86_UNWIND_USER_H */ diff --git a/arch/x86/include/asm/unwind_user_types.h b/arch/x86/include/as= m/unwind_user_types.h new file mode 100644 index 000000000000..d7074dc5f0ce --- /dev/null +++ b/arch/x86/include/asm/unwind_user_types.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_UNWIND_USER_TYPES_H +#define _ASM_UNWIND_USER_TYPES_H + +#ifdef CONFIG_IA32_EMULATION + +struct arch_unwind_user_state { + unsigned long ss_base; + unsigned long cs_base; +}; +#define arch_unwind_user_state arch_unwind_user_state + +#endif /* CONFIG_IA32_EMULATION */ + +#include + +#endif /* _ASM_UNWIND_USER_TYPES_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 103A21C54BF; Wed, 22 Jan 2025 02:32:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513130; cv=none; b=pHwyqkB1gZ7Z5xWOJC7bVqwYT3kwGCvIim/7nTHW0V+okYn6MvodVxlpjiLowsF3gxmYX7mc272JiF5f6I+q1+o1t+q4LqunY36YwR02VuOUVmCkp52RX91F0LXiTdBzgRSLZz17wdJLH40P9eYu5iZTl8ZwMCbJGeQZ73McQNo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513130; c=relaxed/simple; bh=2Ntk3c9MZ4SXlv4NsY0mftQqSrLIfAtrWvzvtb5Jf4k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=SSiHvP2qF2TMIPZ/Oo7+MlprQNcz8EfW9MXZVNoZctw7nJYN/y2SyExbBY5aQWfBlGpVBvLUkcdXCCS0gAehFnE+x89mXaoQyhmgaEm/xD07TouLsxJCpg2bK2Rqy9mMavgHn3IQWbA+UmXLj9bHDVmnNYaGagiab6dr2hxlVLU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=bjabxiwY; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="bjabxiwY" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0B729C4CEE6; Wed, 22 Jan 2025 02:32:09 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513129; bh=2Ntk3c9MZ4SXlv4NsY0mftQqSrLIfAtrWvzvtb5Jf4k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=bjabxiwYNNM8iLD2jy70fXJGO+BPTrVJAKRnqrmtf568RH7Dkck4hjesGd1tKnVFU wwnlsz3e0x2fwzCO2H8L5ZGXvBe28vc0LcIMuVmWDxNlm9xaW2peUvHauznlUtQI1G cCAFw8zBPE1ShnrCA1Lq286q4wwXAXvCKJ/y0omrZSQoopaWiyFM4nuWGHklEm5jeE 4TdnzlwxHlxk2Gmefax70vDRv8wTf87xFlCl2L4qyck/aybHgznek3mJTjkZ4z0icw RjsFH0fBvQcyBvD33GAOFSL5VpiMsafmkwbHjDpWw38zDvLHkvcIgW4Y73AE+HRzrN quRcweQBu6/oA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 17/39] unwind_user/sframe: Add support for reading .sframe headers Date: Tue, 21 Jan 2025 18:31:09 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In preparation for unwinding user space stacks with sframe, add basic sframe compile infrastructure and support for reading the .sframe section header. sframe_add_section() reads the header and unconditionally returns an error, so it's not very useful yet. A subsequent patch will improve that. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 3 + include/linux/sframe.h | 36 +++++++++++ kernel/unwind/Makefile | 3 +- kernel/unwind/sframe.c | 136 +++++++++++++++++++++++++++++++++++++++++ kernel/unwind/sframe.h | 71 +++++++++++++++++++++ 5 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 include/linux/sframe.h create mode 100644 kernel/unwind/sframe.c create mode 100644 kernel/unwind/sframe.h diff --git a/arch/Kconfig b/arch/Kconfig index f1f7a3857c97..23edd0e4e16a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -446,6 +446,9 @@ config HAVE_UNWIND_USER_COMPAT_FP bool depends on HAVE_UNWIND_USER_FP =20 +config HAVE_UNWIND_USER_SFRAME + bool + config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) =20 diff --git a/include/linux/sframe.h b/include/linux/sframe.h new file mode 100644 index 000000000000..3bfaf21869c2 --- /dev/null +++ b/include/linux/sframe.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_SFRAME_H +#define _LINUX_SFRAME_H + +#include +#include + +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + +struct sframe_section { + unsigned long sframe_start; + unsigned long sframe_end; + unsigned long text_start; + unsigned long text_end; + + unsigned long fdes_start; + unsigned long fres_start; + unsigned long fres_end; + unsigned int num_fdes; + + signed char ra_off; + signed char fp_off; +}; + +extern int sframe_add_section(unsigned long sframe_start, unsigned long sf= rame_end, + unsigned long text_start, unsigned long text_end); +extern int sframe_remove_section(unsigned long sframe_addr); + +#else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */ + +static inline int sframe_add_section(unsigned long sframe_start, unsigned = long sframe_end, unsigned long text_start, unsigned long text_end) { return= -ENOSYS; } +static inline int sframe_remove_section(unsigned long sframe_addr) { retur= n -ENOSYS; } + +#endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ + +#endif /* _LINUX_SFRAME_H */ diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile index 349ce3677526..f70380d7a6a6 100644 --- a/kernel/unwind/Makefile +++ b/kernel/unwind/Makefile @@ -1 +1,2 @@ - obj-$(CONFIG_UNWIND_USER) +=3D user.o + obj-$(CONFIG_UNWIND_USER) +=3D user.o + obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME) +=3D sframe.o diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c new file mode 100644 index 000000000000..20287f795b36 --- /dev/null +++ b/kernel/unwind/sframe.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Userspace sframe access functions + */ + +#define pr_fmt(fmt) "sframe: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sframe.h" + +#define dbg(fmt, ...) \ + pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + +static void free_section(struct sframe_section *sec) +{ + kfree(sec); +} + +static int sframe_read_header(struct sframe_section *sec) +{ + unsigned long header_end, fdes_start, fdes_end, fres_start, fres_end; + struct sframe_header shdr; + unsigned int num_fdes; + + if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))= ) { + dbg("header usercopy failed\n"); + return -EFAULT; + } + + if (shdr.preamble.magic !=3D SFRAME_MAGIC || + shdr.preamble.version !=3D SFRAME_VERSION_2 || + !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) || + shdr.auxhdr_len) { + dbg("bad/unsupported sframe header\n"); + return -EINVAL; + } + + if (!shdr.num_fdes || !shdr.num_fres) { + dbg("no fde/fre entries\n"); + return -EINVAL; + } + + header_end =3D sec->sframe_start + SFRAME_HEADER_SIZE(shdr); + if (header_end >=3D sec->sframe_end) { + dbg("header doesn't fit in section\n"); + return -EINVAL; + } + + num_fdes =3D shdr.num_fdes; + fdes_start =3D header_end + shdr.fdes_off; + fdes_end =3D fdes_start + (num_fdes * sizeof(struct sframe_fde)); + + fres_start =3D header_end + shdr.fres_off; + fres_end =3D fres_start + shdr.fre_len; + + if (fres_start < fdes_end || fres_end > sec->sframe_end) { + dbg("inconsistent fde/fre offsets\n"); + return -EINVAL; + } + + sec->num_fdes =3D num_fdes; + sec->fdes_start =3D fdes_start; + sec->fres_start =3D fres_start; + sec->fres_end =3D fres_end; + + sec->ra_off =3D shdr.cfa_fixed_ra_offset; + sec->fp_off =3D shdr.cfa_fixed_fp_offset; + + return 0; +} + +int sframe_add_section(unsigned long sframe_start, unsigned long sframe_en= d, + unsigned long text_start, unsigned long text_end) +{ + struct maple_tree *sframe_mt =3D ¤t->mm->sframe_mt; + struct vm_area_struct *sframe_vma, *text_vma; + struct mm_struct *mm =3D current->mm; + struct sframe_section *sec; + int ret; + + if (!sframe_start || !sframe_end || !text_start || !text_end) { + dbg("zero-length sframe/text address\n"); + return -EINVAL; + } + + scoped_guard(mmap_read_lock, mm) { + sframe_vma =3D vma_lookup(mm, sframe_start); + if (!sframe_vma || sframe_end > sframe_vma->vm_end) { + dbg("bad sframe address (0x%lx - 0x%lx)\n", + sframe_start, sframe_end); + return -EINVAL; + } + + text_vma =3D vma_lookup(mm, text_start); + if (!text_vma || + !(text_vma->vm_flags & VM_EXEC) || + text_end > text_vma->vm_end) { + dbg("bad text address (0x%lx - 0x%lx)\n", + text_start, text_end); + return -EINVAL; + } + } + + sec =3D kzalloc(sizeof(*sec), GFP_KERNEL); + if (!sec) + return -ENOMEM; + + sec->sframe_start =3D sframe_start; + sec->sframe_end =3D sframe_end; + sec->text_start =3D text_start; + sec->text_end =3D text_end; + + ret =3D sframe_read_header(sec); + if (ret) + goto err_free; + + /* TODO nowhere to store it yet - just free it and return an error */ + ret =3D -ENOSYS; + +err_free: + free_section(sec); + return ret; +} + +int sframe_remove_section(unsigned long sframe_start) +{ + return -ENOSYS; +} diff --git a/kernel/unwind/sframe.h b/kernel/unwind/sframe.h new file mode 100644 index 000000000000..e9bfccfaf5b4 --- /dev/null +++ b/kernel/unwind/sframe.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * From https://www.sourceware.org/binutils/docs/sframe-spec.html + */ +#ifndef _SFRAME_H +#define _SFRAME_H + +#include + +#define SFRAME_VERSION_1 1 +#define SFRAME_VERSION_2 2 +#define SFRAME_MAGIC 0xdee2 + +#define SFRAME_F_FDE_SORTED 0x1 +#define SFRAME_F_FRAME_POINTER 0x2 + +#define SFRAME_ABI_AARCH64_ENDIAN_BIG 1 +#define SFRAME_ABI_AARCH64_ENDIAN_LITTLE 2 +#define SFRAME_ABI_AMD64_ENDIAN_LITTLE 3 + +#define SFRAME_FDE_TYPE_PCINC 0 +#define SFRAME_FDE_TYPE_PCMASK 1 + +struct sframe_preamble { + u16 magic; + u8 version; + u8 flags; +} __packed; + +struct sframe_header { + struct sframe_preamble preamble; + u8 abi_arch; + s8 cfa_fixed_fp_offset; + s8 cfa_fixed_ra_offset; + u8 auxhdr_len; + u32 num_fdes; + u32 num_fres; + u32 fre_len; + u32 fdes_off; + u32 fres_off; +} __packed; + +#define SFRAME_HEADER_SIZE(header) \ + ((sizeof(struct sframe_header) + header.auxhdr_len)) + +#define SFRAME_AARCH64_PAUTH_KEY_A 0 +#define SFRAME_AARCH64_PAUTH_KEY_B 1 + +struct sframe_fde { + s32 start_addr; + u32 func_size; + u32 fres_off; + u32 fres_num; + u8 info; + u8 rep_size; + u16 padding; +} __packed; + +#define SFRAME_FUNC_FRE_TYPE(data) (data & 0xf) +#define SFRAME_FUNC_FDE_TYPE(data) ((data >> 4) & 0x1) +#define SFRAME_FUNC_PAUTH_KEY(data) ((data >> 5) & 0x1) + +#define SFRAME_BASE_REG_FP 0 +#define SFRAME_BASE_REG_SP 1 + +#define SFRAME_FRE_CFA_BASE_REG_ID(data) (data & 0x1) +#define SFRAME_FRE_OFFSET_COUNT(data) ((data >> 1) & 0xf) +#define SFRAME_FRE_OFFSET_SIZE(data) ((data >> 5) & 0x3) +#define SFRAME_FRE_MANGLED_RA_P(data) ((data >> 7) & 0x1) + +#endif /* _SFRAME_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8E9EB1C5D6E; Wed, 22 Jan 2025 02:32:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513131; cv=none; b=TFE6ybQr1SqyvZnOdnvX/cvC1rFytUDSKLcGtDV+SdZN8hMSmOJ9LArOgn5inv03ymcVDtiCdd+wkpmjJ+rKnjBvVWRDWsZJmGfTLGVtCNGyI2di1tfGK+hfYDWch04dEcAXhEBtOgbH0ZEKHAL3b7uQU+AgesXT9sOC3XENDpE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513131; c=relaxed/simple; bh=0yEC8f2n9k7rhNfh9nzAfZgNkwUxpX8xe2XBt/l1zxQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=CanXpbxUQ/giI+ZamsrBWFOIDlXCS2cFoa70RLs72AIdmmYAt/MJH6YvSnYQnskMZH2Xa6U/wCHuJdLVxaNQRmYOFtHsHOho+RcCZRgZZ4qcPzRqD/tgawbaBM9rraTzNxYAepMOEpeAGg2oPtFUz2v9BFFcqMdFBP6dD/yNuP8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=JKXsUrcL; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="JKXsUrcL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 205D1C4CEE2; Wed, 22 Jan 2025 02:32:10 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513131; bh=0yEC8f2n9k7rhNfh9nzAfZgNkwUxpX8xe2XBt/l1zxQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=JKXsUrcLN6zTJk5n+CrfY4WZ5COOLBYpRYamTMwakMqSvS2ohSQ1ImlvdHMKBFLJs qh51gGuJti29sDRSOUFwxOtkiGt4YiPWAJehdiMltN5qGY7/XsrNPz7qXHDZurzvOc ruI8P0BSwhnG0zznWagpZrciOU9mivCdx91madHIdfgAOfmISovqK1Vv3fZfQFQ+i7 W4eRBKcwS+PPCc66bBEpPTPJ/6l7Z/UUQnAFNnXuvTbAggPD2WcARhf8SN7mnb5qvC HOWTkvTD5RL82aqkG3ppKHgHtj/vzmcbSVz7NFoNYBeclIb7IzctZMw4P+5PwfGnfi Gke3ZkN7gfdvg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 18/39] unwind_user/sframe: Store sframe section data in per-mm maple tree Date: Tue, 21 Jan 2025 18:31:10 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Associate an sframe section with its mm by adding it to a per-mm maple tree which is indexed by the corresponding text address range. A single sframe section can be associated with multiple text ranges. Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/mmu.h | 2 +- include/linux/mm_types.h | 3 +++ include/linux/sframe.h | 13 +++++++++ kernel/fork.c | 10 +++++++ kernel/unwind/sframe.c | 55 +++++++++++++++++++++++++++++++++++--- mm/init-mm.c | 2 ++ 6 files changed, 81 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index ce4677b8b735..12ea831978cc 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -73,7 +73,7 @@ typedef struct { .context =3D { \ .ctx_id =3D 1, \ .lock =3D __MUTEX_INITIALIZER(mm.context.lock), \ - } + }, =20 void leave_mm(void); #define leave_mm leave_mm diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 332cee285662..aba0d67fda9a 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1064,6 +1064,9 @@ struct mm_struct { #endif } lru_gen; #endif /* CONFIG_LRU_GEN_WALKS_MMU */ +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + struct maple_tree sframe_mt; +#endif } __randomize_layout; =20 /* diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 3bfaf21869c2..ff4b9d1dbd00 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -22,14 +22,27 @@ struct sframe_section { signed char fp_off; }; =20 +#define INIT_MM_SFRAME .sframe_mt =3D MTREE_INIT(sframe_mt, 0), +extern void sframe_free_mm(struct mm_struct *mm); + extern int sframe_add_section(unsigned long sframe_start, unsigned long sf= rame_end, unsigned long text_start, unsigned long text_end); extern int sframe_remove_section(unsigned long sframe_addr); =20 +static inline bool current_has_sframe(void) +{ + struct mm_struct *mm =3D current->mm; + + return mm && !mtree_empty(&mm->sframe_mt); +} + #else /* !CONFIG_HAVE_UNWIND_USER_SFRAME */ =20 +#define INIT_MM_SFRAME +static inline void sframe_free_mm(struct mm_struct *mm) {} static inline int sframe_add_section(unsigned long sframe_start, unsigned = long sframe_end, unsigned long text_start, unsigned long text_end) { return= -ENOSYS; } static inline int sframe_remove_section(unsigned long sframe_addr) { retur= n -ENOSYS; } +static inline bool current_has_sframe(void) { return false; } =20 #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ =20 diff --git a/kernel/fork.c b/kernel/fork.c index 9b301180fd41..88753f8bbdd3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -105,6 +105,7 @@ #include #include #include +#include =20 #include #include @@ -925,6 +926,7 @@ void __mmdrop(struct mm_struct *mm) mm_pasid_drop(mm); mm_destroy_cid(mm); percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); + sframe_free_mm(mm); =20 free_mm(mm); } @@ -1252,6 +1254,13 @@ static void mm_init_uprobes_state(struct mm_struct *= mm) #endif } =20 +static void mm_init_sframe(struct mm_struct *mm) +{ +#ifdef CONFIG_HAVE_UNWIND_USER_SFRAME + mt_init(&mm->sframe_mt); +#endif +} + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct = *p, struct user_namespace *user_ns) { @@ -1283,6 +1292,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm= , struct task_struct *p, mm->pmd_huge_pte =3D NULL; #endif mm_init_uprobes_state(mm); + mm_init_sframe(mm); hugetlb_count_init(mm); =20 if (current->mm) { diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 20287f795b36..fa7d87ffd00a 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -122,15 +122,64 @@ int sframe_add_section(unsigned long sframe_start, un= signed long sframe_end, if (ret) goto err_free; =20 - /* TODO nowhere to store it yet - just free it and return an error */ - ret =3D -ENOSYS; + ret =3D mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec= , GFP_KERNEL); + if (ret) { + dbg("mtree_insert_range failed: text=3D%lx-%lx\n", + sec->text_start, sec->text_end); + goto err_free; + } + + return 0; =20 err_free: free_section(sec); return ret; } =20 +static int __sframe_remove_section(struct mm_struct *mm, + struct sframe_section *sec) +{ + if (!mtree_erase(&mm->sframe_mt, sec->text_start)) { + dbg("mtree_erase failed: text=3D%lx\n", sec->text_start); + return -EINVAL; + } + + free_section(sec); + + return 0; +} + int sframe_remove_section(unsigned long sframe_start) { - return -ENOSYS; + struct mm_struct *mm =3D current->mm; + struct sframe_section *sec; + unsigned long index =3D 0; + bool found =3D false; + int ret =3D 0; + + mt_for_each(&mm->sframe_mt, sec, index, ULONG_MAX) { + if (sec->sframe_start =3D=3D sframe_start) { + found =3D true; + ret |=3D __sframe_remove_section(mm, sec); + } + } + + if (!found || ret) + return -EINVAL; + + return 0; +} + +void sframe_free_mm(struct mm_struct *mm) +{ + struct sframe_section *sec; + unsigned long index =3D 0; + + if (!mm) + return; + + mt_for_each(&mm->sframe_mt, sec, index, ULONG_MAX) + free_section(sec); + + mtree_destroy(&mm->sframe_mt); } diff --git a/mm/init-mm.c b/mm/init-mm.c index 24c809379274..feb01fcd32f0 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include =20 #ifndef INIT_MM_CONTEXT @@ -45,6 +46,7 @@ struct mm_struct init_mm =3D { .user_ns =3D &init_user_ns, .cpu_bitmap =3D CPU_BITS_NONE, INIT_MM_CONTEXT(init_mm) + INIT_MM_SFRAME }; =20 void setup_initial_init_mm(void *start_code, void *end_code, --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 97D331C5F26; Wed, 22 Jan 2025 02:32:12 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513132; cv=none; b=fMVukgPcYV/C66Ujhymwg+PSpEx3zZXQkqERjn+pDmQroKpq9uCVMgzIiPlfV/IzdaZ8MdBJe4ahCnhD2+EX4zzJ+9DQwotOxTieCT8VxVrxKzrNiT9oOgtTvmBqq9zPV0BsfJ6bJ4ON12oTddQU1nNDWLLOSK1U1Lse3uxtBSI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513132; c=relaxed/simple; bh=mYS+Vw6Dq5D1ZdtZnJxrSTmiPbgmYZPrXZ+DGFIIgIE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=bjQNBruPwaH0yYnpPQNwzlYTLHX7y+qN+5pT+68p+YjGE6xWAXSPqHklSpxn+RrjCoDGy1cQHi4jiacWvGKZ5mKFlwOoRIJ5kF8rlTx4N20bZdEzgjygPVIaMsvXA40ivmjiC8h6hLzOH/fUF3niF+6ipC8OujCx6zI4wPkSypo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SvdM798C; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SvdM798C" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 29A78C4CEE7; Wed, 22 Jan 2025 02:32:11 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513132; bh=mYS+Vw6Dq5D1ZdtZnJxrSTmiPbgmYZPrXZ+DGFIIgIE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SvdM798Cc3MUELCKPZiFmXeAaWElaCetp2tNu414E5MU/BSBi26/N9zN90kAwNGa2 CX2nWVjbB8IQ018lPFTlv+MeKVvi8A0KzqDKP/YRdPZVnCsWaR7Che6M57ipfNGCm7 DE9pWjMFAbYvHjayFjEssF2Z0vtenQaS+BHYTdRHUhW2zpucRuj5Es0yzitLpaPZDm wRuUA6Aq/GM8epEviWAnrBXICLFwOMfykeYMvOpvoFA1262DVyLeH1lS//kQ60LF2U hkQhDfyfgWWh/DiDcmz7OxLKcx4AZDXU4E2ati1d+pYgahepIpBhIo7h7yNF2Uxla2 tKGD73MpFGOcw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 19/39] unwind_user/sframe: Add support for reading .sframe contents Date: Tue, 21 Jan 2025 18:31:11 -0800 Message-ID: <77c0d1ec143bf2a53d66c4ecb190e7e0a576fbfd.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" In preparation for using sframe to unwind user space stacks, add an sframe_find() interface for finding the sframe information associated with a given text address. For performance, use user_read_access_begin() and the corresponding unsafe_*() accessors. Note that use of pr_debug() in uaccess-enabled regions would break noinstr validation, so there aren't any debug messages yet. That will be added in a subsequent commit. Signed-off-by: Josh Poimboeuf --- include/linux/sframe.h | 5 + kernel/unwind/sframe.c | 295 ++++++++++++++++++++++++++++++++++- kernel/unwind/sframe_debug.h | 35 +++++ 3 files changed, 331 insertions(+), 4 deletions(-) create mode 100644 kernel/unwind/sframe_debug.h diff --git a/include/linux/sframe.h b/include/linux/sframe.h index ff4b9d1dbd00..2e70085a1e89 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -3,11 +3,14 @@ #define _LINUX_SFRAME_H =20 #include +#include #include =20 #ifdef CONFIG_HAVE_UNWIND_USER_SFRAME =20 struct sframe_section { + struct rcu_head rcu; + unsigned long sframe_start; unsigned long sframe_end; unsigned long text_start; @@ -28,6 +31,7 @@ extern void sframe_free_mm(struct mm_struct *mm); extern int sframe_add_section(unsigned long sframe_start, unsigned long sf= rame_end, unsigned long text_start, unsigned long text_end); extern int sframe_remove_section(unsigned long sframe_addr); +extern int sframe_find(unsigned long ip, struct unwind_user_frame *frame); =20 static inline bool current_has_sframe(void) { @@ -42,6 +46,7 @@ static inline bool current_has_sframe(void) static inline void sframe_free_mm(struct mm_struct *mm) {} static inline int sframe_add_section(unsigned long sframe_start, unsigned = long sframe_end, unsigned long text_start, unsigned long text_end) { return= -ENOSYS; } static inline int sframe_remove_section(unsigned long sframe_addr) { retur= n -ENOSYS; } +static inline int sframe_find(unsigned long ip, struct unwind_user_frame *= frame) { return -ENOSYS; } static inline bool current_has_sframe(void) { return false; } =20 #endif /* CONFIG_HAVE_UNWIND_USER_SFRAME */ diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index fa7d87ffd00a..1a35615a361e 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -15,9 +15,287 @@ #include =20 #include "sframe.h" +#include "sframe_debug.h" =20 -#define dbg(fmt, ...) \ - pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) +struct sframe_fre { + unsigned int size; + s32 ip_off; + s32 cfa_off; + s32 ra_off; + s32 fp_off; + u8 info; +}; + +DEFINE_STATIC_SRCU(sframe_srcu); + +static __always_inline unsigned char fre_type_to_size(unsigned char fre_ty= pe) +{ + if (fre_type > 2) + return 0; + return 1 << fre_type; +} + +static __always_inline unsigned char offset_size_enum_to_size(unsigned cha= r off_size) +{ + if (off_size > 2) + return 0; + return 1 << off_size; +} + +static __always_inline int __read_fde(struct sframe_section *sec, + unsigned int fde_num, + struct sframe_fde *fde) +{ + unsigned long fde_addr, ip; + + fde_addr =3D sec->fdes_start + (fde_num * sizeof(struct sframe_fde)); + unsafe_copy_from_user(fde, (void __user *)fde_addr, + sizeof(struct sframe_fde), Efault); + + ip =3D sec->sframe_start + fde->start_addr; + if (ip < sec->text_start || ip > sec->text_end) + return -EINVAL; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fde(struct sframe_section *sec, + unsigned long ip, + struct sframe_fde *fde) +{ + s32 ip_off, func_off_low =3D S32_MIN, func_off_high =3D S32_MAX; + struct sframe_fde __user *first, *low, *high, *found =3D NULL; + int ret; + + ip_off =3D ip - sec->sframe_start; + + first =3D (void __user *)sec->fdes_start; + low =3D first; + high =3D first + sec->num_fdes - 1; + + while (low <=3D high) { + struct sframe_fde __user *mid; + s32 func_off; + + mid =3D low + ((high - low) / 2); + + unsafe_get_user(func_off, (s32 __user *)mid, Efault); + + if (ip_off >=3D func_off) { + if (func_off < func_off_low) + return -EFAULT; + + func_off_low =3D func_off; + + found =3D mid; + low =3D mid + 1; + } else { + if (func_off > func_off_high) + return -EFAULT; + + func_off_high =3D func_off; + + high =3D mid - 1; + } + } + + if (!found) + return -EINVAL; + + ret =3D __read_fde(sec, found - first, fde); + if (ret) + return ret; + + /* make sure it's not in a gap */ + if (ip_off < fde->start_addr || ip_off >=3D fde->start_addr + fde->func_s= ize) + return -EINVAL; + + return 0; + +Efault: + return -EFAULT; +} + +#define __UNSAFE_GET_USER_INC(to, from, type, label) \ +({ \ + type __to; \ + unsafe_get_user(__to, (type __user *)from, label); \ + from +=3D sizeof(__to); \ + to =3D (typeof(to))__to; \ +}) + +#define UNSAFE_GET_USER_INC(to, from, size, label) \ +({ \ + switch (size) { \ + case 1: \ + __UNSAFE_GET_USER_INC(to, from, u8, label); \ + break; \ + case 2: \ + __UNSAFE_GET_USER_INC(to, from, u16, label); \ + break; \ + case 4: \ + __UNSAFE_GET_USER_INC(to, from, u32, label); \ + break; \ + default: \ + return -EFAULT; \ + } \ +}) + +static __always_inline int __read_fre(struct sframe_section *sec, + struct sframe_fde *fde, + unsigned long fre_addr, + struct sframe_fre *fre) +{ + unsigned char fde_type =3D SFRAME_FUNC_FDE_TYPE(fde->info); + unsigned char fre_type =3D SFRAME_FUNC_FRE_TYPE(fde->info); + unsigned char offset_count, offset_size; + s32 ip_off, cfa_off, ra_off, fp_off; + unsigned long cur =3D fre_addr; + unsigned char addr_size; + u8 info; + + addr_size =3D fre_type_to_size(fre_type); + if (!addr_size) + return -EFAULT; + + if (fre_addr + addr_size + 1 > sec->fres_end) + return -EFAULT; + + UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault); + if (fde_type =3D=3D SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) + return -EFAULT; + + UNSAFE_GET_USER_INC(info, cur, 1, Efault); + offset_count =3D SFRAME_FRE_OFFSET_COUNT(info); + offset_size =3D offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info)); + if (!offset_count || !offset_size) + return -EFAULT; + + if (cur + (offset_count * offset_size) > sec->fres_end) + return -EFAULT; + + fre->size =3D addr_size + 1 + (offset_count * offset_size); + + UNSAFE_GET_USER_INC(cfa_off, cur, offset_size, Efault); + offset_count--; + + ra_off =3D sec->ra_off; + if (!ra_off) { + if (!offset_count--) + return -EFAULT; + + UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault); + } + + fp_off =3D sec->fp_off; + if (!fp_off && offset_count) { + offset_count--; + UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault); + } + + if (offset_count) + return -EFAULT; + + fre->ip_off =3D ip_off; + fre->cfa_off =3D cfa_off; + fre->ra_off =3D ra_off; + fre->fp_off =3D fp_off; + fre->info =3D info; + + return 0; + +Efault: + return -EFAULT; +} + +static __always_inline int __find_fre(struct sframe_section *sec, + struct sframe_fde *fde, unsigned long ip, + struct unwind_user_frame *frame) +{ + unsigned char fde_type =3D SFRAME_FUNC_FDE_TYPE(fde->info); + struct sframe_fre *fre, *prev_fre =3D NULL; + struct sframe_fre fres[2]; + unsigned long fre_addr; + bool which =3D false; + unsigned int i; + s32 ip_off; + + ip_off =3D (s32)(ip - sec->sframe_start) - fde->start_addr; + + if (fde_type =3D=3D SFRAME_FDE_TYPE_PCMASK) + ip_off %=3D fde->rep_size; + + fre_addr =3D sec->fres_start + fde->fres_off; + + for (i =3D 0; i < fde->fres_num; i++) { + int ret; + + /* + * Alternate between the two fre_addr[] entries for 'fre' and + * 'prev_fre'. + */ + fre =3D which ? fres : fres + 1; + which =3D !which; + + ret =3D __read_fre(sec, fde, fre_addr, fre); + if (ret) + return ret; + + fre_addr +=3D fre->size; + + if (prev_fre && fre->ip_off <=3D prev_fre->ip_off) + return -EFAULT; + + if (fre->ip_off > ip_off) + break; + + prev_fre =3D fre; + } + + if (!prev_fre) + return -EINVAL; + fre =3D prev_fre; + + frame->cfa_off =3D fre->cfa_off; + frame->ra_off =3D fre->ra_off; + frame->fp_off =3D fre->fp_off; + frame->use_fp =3D SFRAME_FRE_CFA_BASE_REG_ID(fre->info) =3D=3D SFRAME_BA= SE_REG_FP; + + return 0; +} + +int sframe_find(unsigned long ip, struct unwind_user_frame *frame) +{ + struct mm_struct *mm =3D current->mm; + struct sframe_section *sec; + struct sframe_fde fde; + int ret; + + if (!mm) + return -EINVAL; + + guard(srcu)(&sframe_srcu); + + sec =3D mtree_load(&mm->sframe_mt, ip); + if (!sec) + return -EINVAL; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) + return -EFAULT; + + ret =3D __find_fde(sec, ip, &fde); + if (ret) + goto end; + + ret =3D __find_fre(sec, &fde, ip, frame); +end: + user_read_access_end(); + return ret; +} =20 static void free_section(struct sframe_section *sec) { @@ -119,8 +397,10 @@ int sframe_add_section(unsigned long sframe_start, uns= igned long sframe_end, sec->text_end =3D text_end; =20 ret =3D sframe_read_header(sec); - if (ret) + if (ret) { + dbg_print_header(sec); goto err_free; + } =20 ret =3D mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec= , GFP_KERNEL); if (ret) { @@ -136,6 +416,13 @@ int sframe_add_section(unsigned long sframe_start, uns= igned long sframe_end, return ret; } =20 +static void sframe_free_srcu(struct rcu_head *rcu) +{ + struct sframe_section *sec =3D container_of(rcu, struct sframe_section, r= cu); + + free_section(sec); +} + static int __sframe_remove_section(struct mm_struct *mm, struct sframe_section *sec) { @@ -144,7 +431,7 @@ static int __sframe_remove_section(struct mm_struct *mm, return -EINVAL; } =20 - free_section(sec); + call_srcu(&sframe_srcu, &sec->rcu, sframe_free_srcu); =20 return 0; } diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h new file mode 100644 index 000000000000..055c8c8fae24 --- /dev/null +++ b/kernel/unwind/sframe_debug.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _SFRAME_DEBUG_H +#define _SFRAME_DEBUG_H + +#include +#include "sframe.h" + +#ifdef CONFIG_DYNAMIC_DEBUG + +#define dbg(fmt, ...) \ + pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) + +static __always_inline void dbg_print_header(struct sframe_section *sec) +{ + unsigned long fdes_end; + + fdes_end =3D sec->fdes_start + (sec->num_fdes * sizeof(struct sframe_fde)= ); + + dbg("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " + "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " + "ra_off:%d fp_off:%d\n", + sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, + sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, + sec->ra_off, sec->fp_off); +} + +#else /* !CONFIG_DYNAMIC_DEBUG */ + +#define dbg(args...) no_printk(args) + +static inline void dbg_print_header(struct sframe_section *sec) {} + +#endif /* !CONFIG_DYNAMIC_DEBUG */ + +#endif /* _SFRAME_DEBUG_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 22AB61C5F39; Wed, 22 Jan 2025 02:32:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513133; cv=none; b=MHLe6SXuv6O2qOLRCczFJv6OuE4cJ7KRSYrI6JY+ZxA7hcG7OMMF2WSGGtwrSUVsSkWhwn7cUluXb3ddI6gqMnb2pZLu/THFHkXJ1ZBn19azz38Q2HY0Se5vBDXaNsTWRpovHAsrVSviY50BneO/W0wshk5o8++RmJflj1BD470= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513133; c=relaxed/simple; bh=0lbd2qu1rhFADP15o4ZX88OlHxQN+oJb9mzbgZWD9R0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=RVgJm1y2YLzWpW7TRgz9KVIi6T03zWixSLOalh1rPZ+3oz/Yp1Z4E7lsX2iNF9jIO1c6d3xd3/gy1VvvtagdW+nXTbBEFbdvZj+r1ttPClKZ0tR9AGgL/Msl2wVsVZrqx5pIXV1DsQbWJYRMkU3w93iD6s0+Dd8xK7QzkHkH4Y4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=h7yFrwy5; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="h7yFrwy5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2A4F8C4CEE8; Wed, 22 Jan 2025 02:32:12 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513133; bh=0lbd2qu1rhFADP15o4ZX88OlHxQN+oJb9mzbgZWD9R0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=h7yFrwy5IU+2pveZa193HkI+Q65/KCeBe0Eot9OcmGVsWCL0P4hNoGRX667uTCEUP VxBxVG0UI7lWdra4csT6HClAtawbjtJTX3wXe6TiyVoS4qdWj8nFy4SBL5VfWZrT1K pJVlQWyITUamM3Lsnv0nBXQwWrWlw0IVQdxONrHwjvkUChqrxonPZ6NT/Tu/enNyAR cyIBu5SX5Hq4C+71rbwh1eeeBXDd31ArXFEGbeq/hGgLJQKTDfruBlg++UmgFnIz1J a5HcS+UTP1m8e6UM+WXahPi1jD4M+O2U7iP1fP9xapOnL2e/9Bfc2z6R6QKkv5EaBQ duMygIL1T/1jw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 20/39] unwind_user/sframe: Detect .sframe sections in executables Date: Tue, 21 Jan 2025 18:31:12 -0800 Message-ID: <3d4d6fc48a766666b82f415ebb4aa8cc435f30ae.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When loading an ELF executable, automatically detect an .sframe section and associate it with the mm_struct. Signed-off-by: Josh Poimboeuf --- fs/binfmt_elf.c | 49 +++++++++++++++++++++++++++++++++++++--- include/uapi/linux/elf.h | 1 + 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 106f0e8af177..90cd745e5bd6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include =20 @@ -629,6 +630,21 @@ static inline int make_prot(u32 p_flags, struct arch_e= lf_state *arch_state, return arch_elf_adjust_prot(prot, arch_state, has_interp, is_interp); } =20 +static void elf_add_sframe(struct elf_phdr *text, struct elf_phdr *sframe, + unsigned long base_addr) +{ + unsigned long sframe_start, sframe_end, text_start, text_end; + + sframe_start =3D base_addr + sframe->p_vaddr; + sframe_end =3D sframe_start + sframe->p_memsz; + + text_start =3D base_addr + text->p_vaddr; + text_end =3D text_start + text->p_memsz; + + /* Ignore return value, sframe section isn't critical */ + sframe_add_section(sframe_start, sframe_end, text_start, text_end); +} + /* This is much more generalized than the library routine read function, so we keep this separate. Technically the library read function is only provided so that we can read a.out libraries that have @@ -639,7 +655,7 @@ static unsigned long load_elf_interp(struct elfhdr *int= erp_elf_ex, unsigned long no_base, struct elf_phdr *interp_elf_phdata, struct arch_elf_state *arch_state) { - struct elf_phdr *eppnt; + struct elf_phdr *eppnt, *sframe_phdr =3D NULL; unsigned long load_addr =3D 0; int load_addr_set =3D 0; unsigned long error =3D ~0UL; @@ -665,7 +681,8 @@ static unsigned long load_elf_interp(struct elfhdr *int= erp_elf_ex, =20 eppnt =3D interp_elf_phdata; for (i =3D 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { - if (eppnt->p_type =3D=3D PT_LOAD) { + switch (eppnt->p_type) { + case PT_LOAD: { int elf_type =3D MAP_PRIVATE; int elf_prot =3D make_prot(eppnt->p_flags, arch_state, true, true); @@ -704,6 +721,20 @@ static unsigned long load_elf_interp(struct elfhdr *in= terp_elf_ex, error =3D -ENOMEM; goto out; } + break; + } + case PT_GNU_SFRAME: + sframe_phdr =3D eppnt; + break; + } + } + + if (sframe_phdr) { + eppnt =3D interp_elf_phdata; + for (i =3D 0; i < interp_elf_ex->e_phnum; i++, eppnt++) { + if (eppnt->p_flags & PF_X) { + elf_add_sframe(eppnt, sframe_phdr, load_addr); + } } } =20 @@ -829,7 +860,7 @@ static int load_elf_binary(struct linux_binprm *bprm) int first_pt_load =3D 1; unsigned long error; struct elf_phdr *elf_ppnt, *elf_phdata, *interp_elf_phdata =3D NULL; - struct elf_phdr *elf_property_phdata =3D NULL; + struct elf_phdr *elf_property_phdata =3D NULL, *sframe_phdr =3D NULL; unsigned long elf_brk; int retval, i; unsigned long elf_entry; @@ -937,6 +968,10 @@ static int load_elf_binary(struct linux_binprm *bprm) executable_stack =3D EXSTACK_DISABLE_X; break; =20 + case PT_GNU_SFRAME: + sframe_phdr =3D elf_ppnt; + break; + case PT_LOPROC ... PT_HIPROC: retval =3D arch_elf_pt_proc(elf_ex, elf_ppnt, bprm->file, false, @@ -1227,6 +1262,14 @@ static int load_elf_binary(struct linux_binprm *bprm) elf_brk =3D k; } =20 + if (sframe_phdr) { + for (i =3D 0, elf_ppnt =3D elf_phdata; + i < elf_ex->e_phnum; i++, elf_ppnt++) { + if ((elf_ppnt->p_flags & PF_X)) + elf_add_sframe(elf_ppnt, sframe_phdr, load_bias); + } + } + e_entry =3D elf_ex->e_entry + load_bias; phdr_addr +=3D load_bias; elf_brk +=3D load_bias; diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h index b44069d29cec..026978cddc2e 100644 --- a/include/uapi/linux/elf.h +++ b/include/uapi/linux/elf.h @@ -39,6 +39,7 @@ typedef __s64 Elf64_Sxword; #define PT_GNU_STACK (PT_LOOS + 0x474e551) #define PT_GNU_RELRO (PT_LOOS + 0x474e552) #define PT_GNU_PROPERTY (PT_LOOS + 0x474e553) +#define PT_GNU_SFRAME (PT_LOOS + 0x474e554) =20 =20 /* ARM MTE memory tag segment type */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9340A1CAA6B; Wed, 22 Jan 2025 02:32:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513134; cv=none; b=aX06OKEGRyhLi9cFoVRTrezxH7hK8PdEnnNn152OftZ4qCEhVrLI0/EeONC6k8lQ8OBDTnVy248qz9RAbb0fog0Zu9EroWzXeq7CNqoHlq+CNsaooPvWo0z/Ms+Cdo1wML9wurhwV0e4r5NM3bPlszOjZvNq5Zaro0UtntlC/dY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513134; c=relaxed/simple; bh=W29l9qey3ChAqB78fMRVvzdYJy8KJI4PhdoLFfE4Oos=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=fSv22ql4+1xaZhq6joK3JuEs9sNNF/RPE3sx4zs8f8YnNJO2QjOytIkfkNT1ImOPwVSftznTtwA/Zg8hFau74/Thw1AbKC//6FHD+55Bzl3ndO0/kMdxX7O7fbocwd8By5dzpZ7KSTtXlXM081SlCAly2j5zWaSdDOJcevs0iog= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=h0TwhpOT; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="h0TwhpOT" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2C23EC4CEE2; Wed, 22 Jan 2025 02:32:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513134; bh=W29l9qey3ChAqB78fMRVvzdYJy8KJI4PhdoLFfE4Oos=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=h0TwhpOTDa5ZT+nHd5+YGB9ajqEgjRA+nFIME4pcWlshoVRAPQvBH6LGtKWyAGqSN OpjpMLpjy58vSe+1LbYbflPql0DXz8oUnLaM5D1+zR49Wo2ZaTdGgQtgRO3LXSBObI HQlgpC2HLoKJlZgOKW5/EDlqmkcRPY4R1UP6qgxzaTzM6FbaDZb4BgkGIU2z+RR0e7 UmmY03L1+L3y+cfkkE6JuQJ3n5acPzDHez0fq/X6JmCxYP/wXGmdMCQgrk+Ha/ZWfv qGsdqE4BWlTnRE40lLlaF1wquuQvkAvEud72Ucm/MfdECobfcKW/4sVdU/95TwSWby mQbeitzUf7t7g== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 21/39] unwind_user/sframe: Add prctl() interface for registering .sframe sections Date: Tue, 21 Jan 2025 18:31:13 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The kernel doesn't have direct visibility to the ELF contents of shared libraries. Add some prctl() interfaces which allow glibc to tell the kernel where to find .sframe sections. Signed-off-by: Josh Poimboeuf --- include/uapi/linux/prctl.h | 5 ++++- kernel/sys.c | 9 +++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb2..4a52e3f9ccc9 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -351,6 +351,9 @@ struct prctl_mm_map { * configuration. All bits may be locked via this call, including * undefined bits. */ -#define PR_LOCK_SHADOW_STACK_STATUS 76 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +#define PR_ADD_SFRAME 77 +#define PR_REMOVE_SFRAME 78 =20 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index c4c701c6f0b4..414dfd6ee9fa 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -64,6 +64,7 @@ #include #include #include +#include =20 #include =20 @@ -2809,6 +2810,14 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, a= rg2, unsigned long, arg3, return -EINVAL; error =3D arch_lock_shadow_stack_status(me, arg2); break; + case PR_ADD_SFRAME: + error =3D sframe_add_section(arg2, arg3, arg4, arg5); + break; + case PR_REMOVE_SFRAME: + if (arg3 || arg4 || arg5) + return -EINVAL; + error =3D sframe_remove_section(arg2); + break; default: error =3D -EINVAL; break; --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 948041CCB40; Wed, 22 Jan 2025 02:32:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513136; cv=none; b=NOBdX1DG0lMJWGoU0RPeg7rThSFemHnzX9SApLqRuujXmRHV717VUlejWUg8p5Wj1jjToSAIQnWbGSciPooHyFO/VJk34A2ybGVurTkPGASdORxXlMc9yF7DZ5S//yyASoqUfyggCRonZBEEJ+N2jrTHsGnE+agbg7BZ+PsclxU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513136; c=relaxed/simple; bh=3l4otAURi7kB3h4hFwmhDwPOJru/SqxpPAZlyFNo3VI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AFkdF/enMgPmf5KP3tu2TBvmG+hG4yysZ/gRB2CNu8JVeguWReqTun6vQl+acd8ugnNSY6x/8QwKwhpojRRpxTOZtTG0s/OtWnrpg/q75SiDpc89tKlG9bPccWnS7eKfZK8Hoj0O9FnoqLbxrr3ltow2i+RH3UGoefNnxzz40o8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=d68pAy6U; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="d68pAy6U" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3866BC4CEDF; Wed, 22 Jan 2025 02:32:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513135; bh=3l4otAURi7kB3h4hFwmhDwPOJru/SqxpPAZlyFNo3VI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=d68pAy6UiYIsXp8LITx076zVTZTyrm1OXBRC5OuHcsdfD+Q7V30hrc+0QVV1+7nd5 c1S+S6nAjhyDe7YuVxMTNiIcXi0jsQQgPoFSGK1H193Uk+YUYj8EZo5fcqE+P+jEdX No5ap9GE61FQ6YpTt1ytrYTu2nSoEImZrIosqJ4HibfkrWtXbqIFSTlaqDJCBx3N5y DqU2AXa1GHeSPJHNo/foKQ90cJK67s+4lzT5oHUkPOHLg5mmzdkZ5YJ2sSAJMhgtqP p5oYApcj3+fTMRsuyi//ZZ+xmLThJZnzhX0X5dTPxI6cP0Q4iUWpFjiLlcqY9ccxxV u9AWNC5nAMqFA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 22/39] unwind_user/sframe: Wire up unwind_user to sframe Date: Tue, 21 Jan 2025 18:31:14 -0800 Message-ID: <25c75930c310cb4e9d3878276f00fe7e6523f4e1.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Now that the sframe infrastructure is fully in place, make it work by hooking it up to the unwind_user interface. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 1 + include/linux/unwind_user_types.h | 1 + kernel/unwind/user.c | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 23edd0e4e16a..12a3b73cbe66 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -448,6 +448,7 @@ config HAVE_UNWIND_USER_COMPAT_FP =20 config HAVE_UNWIND_USER_SFRAME bool + select UNWIND_USER =20 config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_= types.h index 3ec4a097a3dd..5558558948b7 100644 --- a/include/linux/unwind_user_types.h +++ b/include/linux/unwind_user_types.h @@ -9,6 +9,7 @@ enum unwind_user_type { UNWIND_USER_TYPE_NONE, UNWIND_USER_TYPE_FP, UNWIND_USER_TYPE_COMPAT_FP, + UNWIND_USER_TYPE_SFRAME, }; =20 struct unwind_stacktrace { diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c index 92963f129c6a..fc0c75da81f6 100644 --- a/kernel/unwind/user.c +++ b/kernel/unwind/user.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include =20 @@ -29,6 +30,12 @@ static inline bool compat_state(struct unwind_user_state= *state) state->type =3D=3D UNWIND_USER_TYPE_COMPAT_FP; } =20 +static inline bool sframe_state(struct unwind_user_state *state) +{ + return IS_ENABLED(CONFIG_HAVE_UNWIND_USER_SFRAME) && + state->type =3D=3D UNWIND_USER_TYPE_SFRAME; +} + #define UNWIND_GET_USER_LONG(to, from, state) \ ({ \ int __ret; \ @@ -48,12 +55,19 @@ int unwind_user_next(struct unwind_user_state *state) if (state->done) return -EINVAL; =20 - if (compat_state(state)) + if (compat_state(state)) { frame =3D &compat_fp_frame; - else if (fp_state(state)) + } else if (sframe_state(state)) { + if (sframe_find(state->ip, frame)) { + if (!IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) + goto the_end; + frame =3D &fp_frame; + } + } else if (fp_state(state)) { frame =3D &fp_frame; - else + } else { goto the_end; + } =20 cfa =3D (frame->use_fp ? state->fp : state->sp) + frame->cfa_off; =20 @@ -94,6 +108,8 @@ int unwind_user_start(struct unwind_user_state *state) =20 if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_COMPAT_FP) && in_compat_mode(regs)) state->type =3D UNWIND_USER_TYPE_COMPAT_FP; + else if (current_has_sframe()) + state->type =3D UNWIND_USER_TYPE_SFRAME; else if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP)) state->type =3D UNWIND_USER_TYPE_FP; else --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 333F41CB337; Wed, 22 Jan 2025 02:32:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513136; cv=none; b=cbAj10MURfCQbfX5Tn87CFVn75Xok0xJ/QxFaReF4FfpKXD0xKKzKlGoUFPsdK5OuuPdgGLazOuMjdWiedQLho1dN9iAJw+rQxvLqqZ7PQ/wCGolRHPuIfN1gQtII4ah43sUA9XucdAacRA5gIEzvuaGICVIPfaYtGyi8eZpVW8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513136; c=relaxed/simple; bh=SJtVtbvkLcEvN5WibexRkaVRWjXt1N5t9/+LsSeWysQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=FkGFM0dBuOdr44lJl3RQygsMYP2htd/+mQR62vc0ed3WrzzLJ/ryQWf8CXQBo4uPbzCkqY9YqfNfvts+MR8W6tJFHLO72I6v93zH3WvNp3PT9LfKzarYBlst0mNGALGAbuRzdFjtOD/RtaFH0pDolVy8IaPw4676AdGJ1VpbemM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=KITmzbbo; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="KITmzbbo" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3C342C4CEE5; Wed, 22 Jan 2025 02:32:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513136; bh=SJtVtbvkLcEvN5WibexRkaVRWjXt1N5t9/+LsSeWysQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=KITmzbboQkeH902l5uema+VjX6uIubfMNZW56mKn1L5OG0Fy3rzwFjatLLRvL/npn sAsSAyPqa/9fhaOJJ/vBgq08p7jDcZyb8GitAc8El5z6WsJTlRBHj3UqzSgWph9Niw GOshUPpWklvCTTulnQUAHYdCUYMIkeaviND/4Mx6GYl8rN+W215KKG8SHF8hT2iasY L8HUVtha1/NP0YhiyocDV+2mLCRdRu9iRhd5F69fzzN3WUnuvqY/zTgyBT0x3M79bz 6uFYRkdzVc9mfLZCq2PZVB4Tlb12cmpYgIo9uPXo2/BZmAh5cPOC0gBpnoDL4fL4/i FTB4trh0yulXA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 23/39] unwind_user/sframe/x86: Enable sframe unwinding on x86 Date: Tue, 21 Jan 2025 18:31:15 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The x86 sframe 2.0 implementation works fairly well, starting with binutils 2.41 (though some bugs are getting fixed in later versions). Enable it. Signed-off-by: Josh Poimboeuf --- arch/x86/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 08c44db0fefb..1016f8f80447 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -293,6 +293,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_UNWIND_USER_COMPAT_FP if IA32_EMULATION select HAVE_UNWIND_USER_FP if X86_64 + select HAVE_UNWIND_USER_SFRAME if X86_64 select HAVE_USER_RETURN_NOTIFIER select HAVE_GENERIC_VDSO select VDSO_GETRANDOM if X86_64 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9AC471D014E; Wed, 22 Jan 2025 02:32:17 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513138; cv=none; b=hYKF28yfnghU9Lz6Ay7zA1Aw76AD2k8p4avW4EY3E7Ym+AGr7jp+vSLeNv6DuqKw/JKAjIDO8or+iPTcOo3jf0pI6qHBjMAXLJ3jI4ViupCWmM8xpPinb9ABXiXtbWJM8SDP3Z7KtsuZzqZ//VlIY2s2YIVxwRicsnAgyclPRCI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513138; c=relaxed/simple; bh=u+eqCpNqrTS/L8RyCIiLzVLvZ3FuAzNIAJa3OUbMdgU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PCWAWl73VBmWuOAlI5Gn1s2pmfAuq0Ou4Ljw/GVVcXxV1xx7zgYRTI1eUNnGnFDvPFlU7FQuzfZHGu3L1ALNn0bLCFMVGmSMiK2AQPX45EY+9oUHnVoNS6Owbq2qHU9xglEdxsnmajLfCy4eLPD4TvyEuv1ypnDqit85v9yJHk4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=mQCVzyB3; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="mQCVzyB3" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3E89FC4AF0E; Wed, 22 Jan 2025 02:32:16 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513137; bh=u+eqCpNqrTS/L8RyCIiLzVLvZ3FuAzNIAJa3OUbMdgU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=mQCVzyB3yJ5ZiMZgnDQHZwolfL1MvQvbDYFL/JtQ/yHkNxTIClS4zbY2zhm1llIqw Dem6EFyOqO5meqdy/TKrO52/LqnGaJw7rrG34g3XwVx9c3FGxc/e20ac3/TP125Z4D JwIrx8eFj9ju3ZUFIkJ14C+uZYqablwbLgUlOgHwb5ntWIfZ1V7X84hZFzzdcQsCf5 m5mBTL1MFSebghzb8jD3qBr5QzysHDngvn9xq8kzSFfpUZz5SnQyrUAn+s+msn0DhL dVDaaVpaBl96R7jPZ6cNBBisEiHhakHI8gmUBP3B3noUy3fIxlLHlwN/u75zYQoigs jaoj8MUK7hxsQ== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 24/39] unwind_user/sframe: Remove .sframe section on detected corruption Date: Tue, 21 Jan 2025 18:31:16 -0800 Message-ID: <970bf7b27e7fa3d05a90d5762cc29cf9d2260cfc.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" To avoid continued attempted use of a bad .sframe section, remove it on demand when the first sign of corruption is detected. Signed-off-by: Josh Poimboeuf --- kernel/unwind/sframe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 1a35615a361e..66b920441692 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -294,6 +294,10 @@ int sframe_find(unsigned long ip, struct unwind_user_f= rame *frame) ret =3D __find_fre(sec, &fde, ip, frame); end: user_read_access_end(); + + if (ret =3D=3D -EFAULT) + WARN_ON_ONCE(sframe_remove_section(sec->sframe_start)); + return ret; } =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3F0741CEAD8; Wed, 22 Jan 2025 02:32:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513138; cv=none; b=RVQIyHzxecaIkmvCQM+UFF4wZERnirdorHQ+QDxzCnOqvamy5tWkOfkt0y1MQsL8fp8MzR8yZO697jgSQWZi4MGSvfxmF9r46PVCwAi3SzWJ6oh3/Y6eiR1FiTJtXPfSvdUb9ZuvgTnmCIVmmeBoZiF9DGcmQJIG45gOSSy30xo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513138; c=relaxed/simple; bh=EszPaQitQ6vk4pAWHbHTmWoDPqRPjbKYb1AQBUCFYns=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ClTTT+g3Nqq4m2p1nPeWvZR070du5YwxFmHwjJ+CxoJCX2jCdaZxStCvrwG9AStxbT3XByT2fQQIpZXU57Y+N596jNZG5wPxkdW8Sc9ww4OtKFktUvIyDZpw+izZbN78VDpuJ8i5LfsSWAnRvbhKrMTyFPrH5S75uC4uzzFk4/Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LatzYg7q; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LatzYg7q" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3A6FEC4CEE8; Wed, 22 Jan 2025 02:32:17 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513138; bh=EszPaQitQ6vk4pAWHbHTmWoDPqRPjbKYb1AQBUCFYns=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LatzYg7q5LodYyDEjCscsl+C7yxyjLsyY1jOcvt+hpM4fDi5ISm8xSFVSeO5yw9QX AqqHPiwwRazGhHK1h6G0gu6N4tC1i3HhEXDMhK+rxX6+xrma4DPP2hiioSvtHpn1lV FH7CDCWTbh3f009eEIuNs4ITlOKvVb/9q37tHFVCwAEOrFsv12b2QNRWWam1O27f6f 0N9fAYsVbnr8ZILgkvWJy5tQ6RLPRm92yaX0PLBsfBsf9ZibilDdPXcVtdMdE0Mz8u IsDuSpEHJKuKakYlKx4prR2rKG3Pqx1l1lLEdGhXyBk00eqgWnF++xavv/oUGMptEZ r/b/Iy+ktszsw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 25/39] unwind_user/sframe: Show file name in debug output Date: Tue, 21 Jan 2025 18:31:17 -0800 Message-ID: <7bfb7ffad5fe384fe3dbe6c8ca985f35b98b48bb.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" When debugging sframe issues, the error messages aren't all that helpful without knowing what file a corresponding .sframe section belongs to. Prefix debug output strings with the file name. Signed-off-by: Josh Poimboeuf --- include/linux/sframe.h | 4 +++- kernel/unwind/sframe.c | 23 ++++++++++++-------- kernel/unwind/sframe_debug.h | 41 ++++++++++++++++++++++++++++++------ 3 files changed, 52 insertions(+), 16 deletions(-) diff --git a/include/linux/sframe.h b/include/linux/sframe.h index 2e70085a1e89..18a04d574090 100644 --- a/include/linux/sframe.h +++ b/include/linux/sframe.h @@ -10,7 +10,9 @@ =20 struct sframe_section { struct rcu_head rcu; - +#ifdef CONFIG_DYNAMIC_DEBUG + const char *filename; +#endif unsigned long sframe_start; unsigned long sframe_end; unsigned long text_start; diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index 66b920441692..f463123f9afe 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -295,14 +295,17 @@ int sframe_find(unsigned long ip, struct unwind_user_= frame *frame) end: user_read_access_end(); =20 - if (ret =3D=3D -EFAULT) + if (ret =3D=3D -EFAULT) { + dbg_sec("removing bad .sframe section\n"); WARN_ON_ONCE(sframe_remove_section(sec->sframe_start)); + } =20 return ret; } =20 static void free_section(struct sframe_section *sec) { + dbg_free(sec); kfree(sec); } =20 @@ -313,7 +316,7 @@ static int sframe_read_header(struct sframe_section *se= c) unsigned int num_fdes; =20 if (copy_from_user(&shdr, (void __user *)sec->sframe_start, sizeof(shdr))= ) { - dbg("header usercopy failed\n"); + dbg_sec("header usercopy failed\n"); return -EFAULT; } =20 @@ -321,18 +324,18 @@ static int sframe_read_header(struct sframe_section *= sec) shdr.preamble.version !=3D SFRAME_VERSION_2 || !(shdr.preamble.flags & SFRAME_F_FDE_SORTED) || shdr.auxhdr_len) { - dbg("bad/unsupported sframe header\n"); + dbg_sec("bad/unsupported sframe header\n"); return -EINVAL; } =20 if (!shdr.num_fdes || !shdr.num_fres) { - dbg("no fde/fre entries\n"); + dbg_sec("no fde/fre entries\n"); return -EINVAL; } =20 header_end =3D sec->sframe_start + SFRAME_HEADER_SIZE(shdr); if (header_end >=3D sec->sframe_end) { - dbg("header doesn't fit in section\n"); + dbg_sec("header doesn't fit in section\n"); return -EINVAL; } =20 @@ -344,7 +347,7 @@ static int sframe_read_header(struct sframe_section *se= c) fres_end =3D fres_start + shdr.fre_len; =20 if (fres_start < fdes_end || fres_end > sec->sframe_end) { - dbg("inconsistent fde/fre offsets\n"); + dbg_sec("inconsistent fde/fre offsets\n"); return -EINVAL; } =20 @@ -400,6 +403,8 @@ int sframe_add_section(unsigned long sframe_start, unsi= gned long sframe_end, sec->text_start =3D text_start; sec->text_end =3D text_end; =20 + dbg_init(sec); + ret =3D sframe_read_header(sec); if (ret) { dbg_print_header(sec); @@ -408,8 +413,8 @@ int sframe_add_section(unsigned long sframe_start, unsi= gned long sframe_end, =20 ret =3D mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec= , GFP_KERNEL); if (ret) { - dbg("mtree_insert_range failed: text=3D%lx-%lx\n", - sec->text_start, sec->text_end); + dbg_sec("mtree_insert_range failed: text=3D%lx-%lx\n", + sec->text_start, sec->text_end); goto err_free; } =20 @@ -431,7 +436,7 @@ static int __sframe_remove_section(struct mm_struct *mm, struct sframe_section *sec) { if (!mtree_erase(&mm->sframe_mt, sec->text_start)) { - dbg("mtree_erase failed: text=3D%lx\n", sec->text_start); + dbg_sec("mtree_erase failed: text=3D%lx\n", sec->text_start); return -EINVAL; } =20 diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h index 055c8c8fae24..4d121cdbb760 100644 --- a/kernel/unwind/sframe_debug.h +++ b/kernel/unwind/sframe_debug.h @@ -10,26 +10,55 @@ #define dbg(fmt, ...) \ pr_debug("%s (%d): " fmt, current->comm, current->pid, ##__VA_ARGS__) =20 +#define dbg_sec(fmt, ...) \ + dbg("%s: " fmt, sec->filename, ##__VA_ARGS__) + static __always_inline void dbg_print_header(struct sframe_section *sec) { unsigned long fdes_end; =20 fdes_end =3D sec->fdes_start + (sec->num_fdes * sizeof(struct sframe_fde)= ); =20 - dbg("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " - "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " - "ra_off:%d fp_off:%d\n", - sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, - sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, - sec->ra_off, sec->fp_off); + dbg_sec("SEC: sframe:0x%lx-0x%lx text:0x%lx-0x%lx " + "fdes:0x%lx-0x%lx fres:0x%lx-0x%lx " + "ra_off:%d fp_off:%d\n", + sec->sframe_start, sec->sframe_end, sec->text_start, sec->text_end, + sec->fdes_start, fdes_end, sec->fres_start, sec->fres_end, + sec->ra_off, sec->fp_off); +} + +static inline void dbg_init(struct sframe_section *sec) +{ + struct mm_struct *mm =3D current->mm; + struct vm_area_struct *vma; + + guard(mmap_read_lock)(mm); + vma =3D vma_lookup(mm, sec->sframe_start); + if (!vma) + sec->filename =3D kstrdup("(vma gone???)", GFP_KERNEL); + else if (vma->vm_file) + sec->filename =3D kstrdup_quotable_file(vma->vm_file, GFP_KERNEL); + else if (!vma->vm_mm) + sec->filename =3D kstrdup("(vdso)", GFP_KERNEL); + else + sec->filename =3D kstrdup("(anonymous)", GFP_KERNEL); +} + +static inline void dbg_free(struct sframe_section *sec) +{ + kfree(sec->filename); } =20 #else /* !CONFIG_DYNAMIC_DEBUG */ =20 #define dbg(args...) no_printk(args) +#define dbg_sec(args... ) no_printk(args) =20 static inline void dbg_print_header(struct sframe_section *sec) {} =20 +static inline void dbg_init(struct sframe_section *sec) {} +static inline void dbg_free(struct sframe_section *sec) {} + #endif /* !CONFIG_DYNAMIC_DEBUG */ =20 #endif /* _SFRAME_DEBUG_H */ --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A293A1D47AE; Wed, 22 Jan 2025 02:32:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513139; cv=none; b=Q6jlOZSi7/GZC3ntlk1EHv1jC5WvwYXfZ7IzcXFWaKJzqcG6gbo/iDZcifbbeyd2tcb5MGI/Z1B0V4SwWnv2hSNT4YqgSzVUH+sQHRbk5y4Bi0TsY/FNflPP7v1YJMf4uo2oj++MlB+MZROozw6flRRtuaTb5EpxaNjdwF4pOQQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513139; c=relaxed/simple; bh=B3rzqWK+3dbuAMW6Yd/wHbo0urS3X1S4CoqxAu7Q/kk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=iU/Hjkzhqkxy3Rg3isCtRc2X6jsX9j9CgjvMkUwE4pqPYzcHLoj/1unhF2tuG0B17+ze+PLL1OsThSnqlWJRiY2Rln1YNfbr9i8RGNwh9Q4cp1S84sQIMpeRr6G5e7Z29p9CXD7Yh1QW4YmvFQgeghiGCoXRUr+T5/rnnsfYFzI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=jOs7iLC+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="jOs7iLC+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 482D1C4CEE2; Wed, 22 Jan 2025 02:32:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513139; bh=B3rzqWK+3dbuAMW6Yd/wHbo0urS3X1S4CoqxAu7Q/kk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=jOs7iLC+5Y5SrGTs+eo1KrPvKXGsuXcr6f96sJ+5cNCVKFqeCSgWZOfXyOSj+RxxN GVMtk7EWG6cvLgFcM5uDToqIKjSMYcN2WGCMOTlx/8smeoo0SFRVHCeaEIBd6KD2bB 3bS5cpt4MN8xVWDNzlc2VldfshL979OlZH2Tr655aqQ35ZTE1YQe71gzifVOUFvl5V Wzr6ve+JcAEpG6dQftuBERuN1RsPlGb/bODQsLn1kSkfTKGR1sX82b4rEMAzWJ8DyA MWnbv/FjR8YnvXWGc0T6y5lyU3QlJD4p6Cn0jElKRpb8Uyy1mf3YnK1q3J4+d21lM+ nI+R1/HzDkmhQ== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 26/39] unwind_user/sframe: Enable debugging in uaccess regions Date: Tue, 21 Jan 2025 18:31:18 -0800 Message-ID: <990b28ae7855b67c5e6d6385b9de78ffa336dd73.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Objtool warns about calling pr_debug() from uaccess-enabled regions, and rightfully so. Add a dbg_sec_uaccess() macro which temporarily disables uaccess before doing the dynamic printk, and use that to add debug messages throughout the uaccess-enabled regions. Signed-off-by: Josh Poimboeuf --- kernel/unwind/sframe.c | 59 ++++++++++++++++++++++++++++-------- kernel/unwind/sframe_debug.h | 31 +++++++++++++++++++ 2 files changed, 77 insertions(+), 13 deletions(-) diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index f463123f9afe..a2ca26b952d3 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -53,12 +53,15 @@ static __always_inline int __read_fde(struct sframe_sec= tion *sec, sizeof(struct sframe_fde), Efault); =20 ip =3D sec->sframe_start + fde->start_addr; - if (ip < sec->text_start || ip > sec->text_end) + if (ip < sec->text_start || ip > sec->text_end) { + dbg_sec_uaccess("bad fde num %d\n", fde_num); return -EINVAL; + } =20 return 0; =20 Efault: + dbg_sec_uaccess("fde %d usercopy failed\n", fde_num); return -EFAULT; } =20 @@ -85,16 +88,22 @@ static __always_inline int __find_fde(struct sframe_sec= tion *sec, unsafe_get_user(func_off, (s32 __user *)mid, Efault); =20 if (ip_off >=3D func_off) { - if (func_off < func_off_low) + if (func_off < func_off_low) { + dbg_sec_uaccess("fde %u not sorted\n", + (unsigned int)(mid - first)); return -EFAULT; + } =20 func_off_low =3D func_off; =20 found =3D mid; low =3D mid + 1; } else { - if (func_off > func_off_high) + if (func_off > func_off_high) { + dbg_sec_uaccess("fde %u not sorted\n", + (unsigned int)(mid - first)); return -EFAULT; + } =20 func_off_high =3D func_off; =20 @@ -140,6 +149,8 @@ static __always_inline int __find_fde(struct sframe_sec= tion *sec, __UNSAFE_GET_USER_INC(to, from, u32, label); \ break; \ default: \ + dbg_sec_uaccess("%d: bad UNSAFE_GET_USER_INC size %u\n",\ + __LINE__, size); \ return -EFAULT; \ } \ }) @@ -158,24 +169,34 @@ static __always_inline int __read_fre(struct sframe_s= ection *sec, u8 info; =20 addr_size =3D fre_type_to_size(fre_type); - if (!addr_size) + if (!addr_size) { + dbg_sec_uaccess("bad addr_size in fde info %u\n", fde->info); return -EFAULT; + } =20 - if (fre_addr + addr_size + 1 > sec->fres_end) + if (fre_addr + addr_size + 1 > sec->fres_end) { + dbg_sec_uaccess("fre addr+info goes past end of subsection\n"); return -EFAULT; + } =20 UNSAFE_GET_USER_INC(ip_off, cur, addr_size, Efault); - if (fde_type =3D=3D SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) + if (fde_type =3D=3D SFRAME_FDE_TYPE_PCINC && ip_off > fde->func_size) { + dbg_sec_uaccess("fre starts past end of function: ip_off=3D0x%x, func_si= ze=3D0x%x\n", + ip_off, fde->func_size); return -EFAULT; + } =20 UNSAFE_GET_USER_INC(info, cur, 1, Efault); offset_count =3D SFRAME_FRE_OFFSET_COUNT(info); offset_size =3D offset_size_enum_to_size(SFRAME_FRE_OFFSET_SIZE(info)); - if (!offset_count || !offset_size) + if (!offset_count || !offset_size) { + dbg_sec_uaccess("zero offset_count or size in fre info %u\n",info); return -EFAULT; - - if (cur + (offset_count * offset_size) > sec->fres_end) + } + if (cur + (offset_count * offset_size) > sec->fres_end) { + dbg_sec_uaccess("fre goes past end of subsection\n"); return -EFAULT; + } =20 fre->size =3D addr_size + 1 + (offset_count * offset_size); =20 @@ -184,8 +205,10 @@ static __always_inline int __read_fre(struct sframe_se= ction *sec, =20 ra_off =3D sec->ra_off; if (!ra_off) { - if (!offset_count--) + if (!offset_count--) { + dbg_sec_uaccess("zero offset_count, can't find ra_off\n"); return -EFAULT; + } =20 UNSAFE_GET_USER_INC(ra_off, cur, offset_size, Efault); } @@ -196,8 +219,10 @@ static __always_inline int __read_fre(struct sframe_se= ction *sec, UNSAFE_GET_USER_INC(fp_off, cur, offset_size, Efault); } =20 - if (offset_count) + if (offset_count) { + dbg_sec_uaccess("non-zero offset_count after reading fre\n"); return -EFAULT; + } =20 fre->ip_off =3D ip_off; fre->cfa_off =3D cfa_off; @@ -208,6 +233,7 @@ static __always_inline int __read_fre(struct sframe_sec= tion *sec, return 0; =20 Efault: + dbg_sec_uaccess("fre usercopy failed\n"); return -EFAULT; } =20 @@ -241,13 +267,20 @@ static __always_inline int __find_fre(struct sframe_s= ection *sec, which =3D !which; =20 ret =3D __read_fre(sec, fde, fre_addr, fre); - if (ret) + if (ret) { + dbg_sec_uaccess("fde addr 0x%x: __read_fre(%u) failed\n", + fde->start_addr, i); + dbg_print_fde_uaccess(sec, fde); return ret; + } =20 fre_addr +=3D fre->size; =20 - if (prev_fre && fre->ip_off <=3D prev_fre->ip_off) + if (prev_fre && fre->ip_off <=3D prev_fre->ip_off) { + dbg_sec_uaccess("fde addr 0x%x: fre %u not sorted\n", + fde->start_addr, i); return -EFAULT; + } =20 if (fre->ip_off > ip_off) break; diff --git a/kernel/unwind/sframe_debug.h b/kernel/unwind/sframe_debug.h index 4d121cdbb760..3bb3c5574aee 100644 --- a/kernel/unwind/sframe_debug.h +++ b/kernel/unwind/sframe_debug.h @@ -13,6 +13,26 @@ #define dbg_sec(fmt, ...) \ dbg("%s: " fmt, sec->filename, ##__VA_ARGS__) =20 +#define __dbg_sec_descriptor(fmt, ...) \ + __dynamic_pr_debug(&descriptor, "sframe: %s: " fmt, \ + sec->filename, ##__VA_ARGS__) + +/* + * To avoid breaking uaccess rules, temporarily disable uaccess + * before calling printk. + */ +#define dbg_sec_uaccess(fmt, ...) \ +({ \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (DYNAMIC_DEBUG_BRANCH(descriptor)) { \ + user_read_access_end(); \ + __dbg_sec_descriptor(fmt, ##__VA_ARGS__); \ + BUG_ON(!user_read_access_begin( \ + (void __user *)sec->sframe_start, \ + sec->sframe_end - sec->sframe_start)); \ + } \ +}) + static __always_inline void dbg_print_header(struct sframe_section *sec) { unsigned long fdes_end; @@ -27,6 +47,15 @@ static __always_inline void dbg_print_header(struct sfra= me_section *sec) sec->ra_off, sec->fp_off); } =20 +static __always_inline void dbg_print_fde_uaccess(struct sframe_section *s= ec, + struct sframe_fde *fde) +{ + dbg_sec_uaccess("FDE: start_addr:0x%x func_size:0x%x " + "fres_off:0x%x fres_num:%d info:%u rep_size:%u\n", + fde->start_addr, fde->func_size, + fde->fres_off, fde->fres_num, fde->info, fde->rep_size); +} + static inline void dbg_init(struct sframe_section *sec) { struct mm_struct *mm =3D current->mm; @@ -53,8 +82,10 @@ static inline void dbg_free(struct sframe_section *sec) =20 #define dbg(args...) no_printk(args) #define dbg_sec(args... ) no_printk(args) +#define dbg_sec_uaccess(args...) no_printk(args) =20 static inline void dbg_print_header(struct sframe_section *sec) {} +static inline void dbg_print_fde_uaccess(struct sframe_section *sec, struc= t sframe_fde *fde) {} =20 static inline void dbg_init(struct sframe_section *sec) {} static inline void dbg_free(struct sframe_section *sec) {} --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B24391D5CCC; Wed, 22 Jan 2025 02:32:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513140; cv=none; b=FptLNt/NPCO2ooSlKnvmMg/TiN6r6oLX+4ekK3vwo14EbtjDyYf/P3WoApBvo12b/zrvePC5+UZ9wFTBuWAV/Hnj92c7EDDmHd9wGuFJnHLuNfBqbB5lrqt+VFqggp/jtcuJnwSf0NlytXrEwsg+LOnSok6UD/r/GOrINHgQl1o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513140; c=relaxed/simple; bh=YdokVxORLyw0FxMyJUC9g39qf45hCSMsY+ha4iDY2z4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=bfPmgXN64vMZrcKFWuQRQIdBWH1aasyHT+b+4ytz68KHWoxDvFZpyHjJQi6GEJYjSoK3r/pBRnbN/eAyddCPnleF8wp/sjhA/y0aIB/Oe/63hlKUjwfq3o4p3OUZwVnJTtUM4ItgTf5c/Uo8FdVZdkFRgUk74GpALkOfT+VZrwQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ONBj9H+M; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ONBj9H+M" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 4C119C4CEDF; Wed, 22 Jan 2025 02:32:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513140; bh=YdokVxORLyw0FxMyJUC9g39qf45hCSMsY+ha4iDY2z4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ONBj9H+M9B5Xi79oMeluW0MFsqv4B12Wm9Kgs0AtvwcMN91PavPN4dw38XCl0hfQd ymjxZG+WXlLYfq15A0AR1cBwMEKvuENcTnmsU7rD6wqawyQONeBREoEsgLUOcu23gs BhQ4XENgRZRoqa0+uXYxh5rJ7uV9E38JSLzQwx3clDr4RvbI937eXR4Rpbe1Nzz6b0 COXlynXlGTfKZtqi6k3/zm/mWS+ZjKUhfbrkcqMUCRKM6rZsRS/UkoY6UEUqmgDR+X +V1Yi5qZpO1DNTQgFKZ8qCk/OADJ+bvjqdYBJQrsOxD94MFcsQLJmKtQbuh5+8GnC1 TJx+QYOUDo5Nw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 27/39] unwind_user/sframe: Add .sframe validation option Date: Tue, 21 Jan 2025 18:31:19 -0800 Message-ID: <3c7be3db0795ba8d660ee21ef5980e8ab7780c12.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add a debug feature to validate all .sframe sections when first loading the file rather than on demand. Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 19 ++++++++++ kernel/unwind/sframe.c | 81 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 12a3b73cbe66..b3676605bab6 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -453,6 +453,25 @@ config HAVE_UNWIND_USER_SFRAME config AS_SFRAME def_bool $(as-instr,.cfi_sections .sframe\n.cfi_startproc\n.cfi_endproc) =20 +config SFRAME_VALIDATION + bool "Enable .sframe section debugging" + depends on HAVE_UNWIND_USER_SFRAME + depends on DYNAMIC_DEBUG + help + When adding an .sframe section for a task, validate the entire + section immediately rather than on demand. + + This is a debug feature which is helpful for rooting out .sframe + section issues. If the .sframe section is corrupt, it will fail to + load immediately, with more information provided in dynamic printks. + + This has a significant page cache footprint due to its reading of the + entire .sframe section for every loaded executable and shared + library. Also, it's done for all processes, even those which don't + get stack traced by the kernel. Not recommended for general use. + + If unsure, say N. + config HAVE_PERF_REGS bool help diff --git a/kernel/unwind/sframe.c b/kernel/unwind/sframe.c index a2ca26b952d3..bba14c5fe0f5 100644 --- a/kernel/unwind/sframe.c +++ b/kernel/unwind/sframe.c @@ -336,6 +336,83 @@ int sframe_find(unsigned long ip, struct unwind_user_f= rame *frame) return ret; } =20 +#ifdef CONFIG_SFRAME_VALIDATION + +static __always_inline int __sframe_validate_section(struct sframe_section= *sec) +{ + unsigned long prev_ip =3D 0; + unsigned int i; + + for (i =3D 0; i < sec->num_fdes; i++) { + struct sframe_fre *fre, *prev_fre =3D NULL; + unsigned long ip, fre_addr; + struct sframe_fde fde; + struct sframe_fre fres[2]; + bool which =3D false; + unsigned int j; + int ret; + + ret =3D __read_fde(sec, i, &fde); + if (ret) + return ret; + + ip =3D sec->sframe_start + fde.start_addr; + if (ip <=3D prev_ip) { + dbg_sec_uaccess("fde %u not sorted\n", i); + return -EFAULT; + } + prev_ip =3D ip; + + fre_addr =3D sec->fres_start + fde.fres_off; + for (j =3D 0; j < fde.fres_num; j++) { + int ret; + + fre =3D which ? fres : fres + 1; + which =3D !which; + + ret =3D __read_fre(sec, &fde, fre_addr, fre); + if (ret) { + dbg_sec_uaccess("fde %u: __read_fre(%u) failed\n", i, j); + dbg_print_fde_uaccess(sec, &fde); + return ret; + } + + fre_addr +=3D fre->size; + + if (prev_fre && fre->ip_off <=3D prev_fre->ip_off) { + dbg_sec_uaccess("fde %u: fre %u not sorted\n", i, j); + return -EFAULT; + } + + prev_fre =3D fre; + } + } + + return 0; +} + +static int sframe_validate_section(struct sframe_section *sec) +{ + int ret; + + if (!user_read_access_begin((void __user *)sec->sframe_start, + sec->sframe_end - sec->sframe_start)) { + dbg_sec("section usercopy failed\n"); + return -EFAULT; + } + + ret =3D __sframe_validate_section(sec); + user_read_access_end(); + return ret; +} + +#else /* !CONFIG_SFRAME_VALIDATION */ + +static int sframe_validate_section(struct sframe_section *sec) { return 0;= } + +#endif /* !CONFIG_SFRAME_VALIDATION */ + + static void free_section(struct sframe_section *sec) { dbg_free(sec); @@ -444,6 +521,10 @@ int sframe_add_section(unsigned long sframe_start, uns= igned long sframe_end, goto err_free; } =20 + ret =3D sframe_validate_section(sec); + if (ret) + goto err_free; + ret =3D mtree_insert_range(sframe_mt, sec->text_start, sec->text_end, sec= , GFP_KERNEL); if (ret) { dbg_sec("mtree_insert_range failed: text=3D%lx-%lx\n", --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B9A4C1DE4D5; Wed, 22 Jan 2025 02:32:21 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513141; cv=none; b=lty2fzsEL5GEm90jUynB4T+1XkhOZ5mkrTckH/gTSZxFDsnzpt0UetQ7+Q+hPyiyodmBwf32NDQ13fgdStSFvpAA81eT/YclS2H034es2dxM77XQpyHnSnZu9fMVnqH5XBMOnlJxpfK8U/HYq2DKuV/4rzr8PzZn0CtVdgHHZc8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513141; c=relaxed/simple; bh=IdqqeJrVDkIMyiK2xCT50O3cV1WwFa2oFCLdnnAcnr4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=h9da9ehVTjUqDaePmi8PFV8OyO/9egtEb94uD6P3cXbLaQ9uNAyRYD2JliQ+g2980q2C+yjVSvcStlbmxmcrl91y20alj8xff2XaP8flzwsjrgNx8DbdrfmTxPSHjixiHkqQuOkaneBszOStYSqCSvxAV7FxGfLBslyACnIfjbI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SJecxHLL; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SJecxHLL" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5C05BC4CEE0; Wed, 22 Jan 2025 02:32:20 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513141; bh=IdqqeJrVDkIMyiK2xCT50O3cV1WwFa2oFCLdnnAcnr4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SJecxHLLqyCc442LgwaEF22hAJru4gqbgBVERPhwbk+1B3V62Rys6U0SwpPfEtKMg cgmdh33OQZlp8i9M+ViIOHdcDKKTrLE+dkBXsruKoJfN0E2U4+zepTWdVKRLhloH76 QAeuuzTGG1WkUm2QIq5Dx2yaa+104IgdEAToiWAeVRFxBdlt2XMD0r4NUEmsdQD/3J HtXCBGzqSMC+KoDfQKAjPnYQVtbjkTavOdsKrP0jWpKQFOmHHyRwrbUacjP2LcSBqE VUoT5v20D2KBM76rWtxbd6WsKXl6Dip75EXZC3TdhAhCNsO0Wq6g2X+2BdSKgaz4Ny 8hYfgPvBWt5hg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 28/39] unwind_user/deferred: Add deferred unwinding interface Date: Tue, 21 Jan 2025 18:31:20 -0800 Message-ID: <6052e8487746603bdb29b65f4033e739092d9925.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Add an interface for scheduling task work to unwind the user space stack before returning to user space. This solves several problems for its callers: - Ensure the unwind happens in task context even if the caller may be running in NMI or interrupt context. - Avoid duplicate unwinds, whether called multiple times by the same caller or by different callers. - Create a "context cookie" which allows trace post-processing to correlate kernel unwinds/traces with the user unwind. Signed-off-by: Josh Poimboeuf --- include/linux/entry-common.h | 2 + include/linux/sched.h | 5 + include/linux/unwind_deferred.h | 46 +++++++ include/linux/unwind_deferred_types.h | 10 ++ kernel/fork.c | 4 + kernel/unwind/Makefile | 2 +- kernel/unwind/deferred.c | 178 ++++++++++++++++++++++++++ 7 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 include/linux/unwind_deferred.h create mode 100644 include/linux/unwind_deferred_types.h create mode 100644 kernel/unwind/deferred.c diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..fb2b27154fee 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -12,6 +12,7 @@ #include #include #include +#include =20 #include =20 @@ -111,6 +112,7 @@ static __always_inline void enter_from_user_mode(struct= pt_regs *regs) =20 CT_WARN_ON(__ct_state() !=3D CT_STATE_USER); user_exit_irqoff(); + unwind_enter_from_user_mode(); =20 instrumentation_begin(); kmsan_unpoison_entry_regs(regs); diff --git a/include/linux/sched.h b/include/linux/sched.h index 64934e0830af..042a95f4f6e6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -46,6 +46,7 @@ #include #include #include +#include #include =20 /* task_struct member predeclarations (sorted alphabetically): */ @@ -1603,6 +1604,10 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif =20 +#ifdef CONFIG_UNWIND_USER + struct unwind_task_info unwind_info; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferre= d.h new file mode 100644 index 000000000000..741f409f0d1f --- /dev/null +++ b/include/linux/unwind_deferred.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_H +#define _LINUX_UNWIND_USER_DEFERRED_H + +#include +#include +#include + +struct unwind_work; + +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_= stacktrace *trace, u64 cookie); + +struct unwind_work { + struct callback_head work; + unwind_callback_t func; + int pending; +}; + +#ifdef CONFIG_UNWIND_USER + +void unwind_task_init(struct task_struct *task); +void unwind_task_free(struct task_struct *task); + +void unwind_deferred_init(struct unwind_work *work, unwind_callback_t func= ); +int unwind_deferred_request(struct unwind_work *work, u64 *cookie); +bool unwind_deferred_cancel(struct task_struct *task, struct unwind_work *= work); + +static __always_inline void unwind_enter_from_user_mode(void) +{ + current->unwind_info.cookie =3D 0; +} + +#else /* !CONFIG_UNWIND_USER */ + +static inline void unwind_task_init(struct task_struct *task) {} +static inline void unwind_task_free(struct task_struct *task) {} + +static inline void unwind_deferred_init(struct unwind_work *work, unwind_c= allback_t func) {} +static inline int unwind_deferred_request(struct task_struct *task, struct= unwind_work *work, u64 *cookie) { return -ENOSYS; } +static inline bool unwind_deferred_cancel(struct task_struct *task, struct= unwind_work *work) { return false; } + +static inline void unwind_enter_from_user_mode(void) {} + +#endif /* !CONFIG_UNWIND_USER */ + +#endif /* _LINUX_UNWIND_USER_DEFERRED_H */ diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_d= eferred_types.h new file mode 100644 index 000000000000..9749824aea09 --- /dev/null +++ b/include/linux/unwind_deferred_types.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H +#define _LINUX_UNWIND_USER_DEFERRED_TYPES_H + +struct unwind_task_info { + unsigned long *entries; + u64 cookie; +}; + +#endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 88753f8bbdd3..c9a954af72a1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,7 @@ #include #include #include +#include =20 #include #include @@ -973,6 +974,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(refcount_read(&tsk->usage)); WARN_ON(tsk =3D=3D current); =20 + unwind_task_free(tsk); sched_ext_free(tsk); io_uring_free(tsk); cgroup_free(tsk); @@ -2370,6 +2372,8 @@ __latent_entropy struct task_struct *copy_process( p->bpf_ctx =3D NULL; #endif =20 + unwind_task_init(p); + /* Perform scheduler related setup. Assign this task to a CPU. */ retval =3D sched_fork(clone_flags, p); if (retval) diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile index f70380d7a6a6..146038165865 100644 --- a/kernel/unwind/Makefile +++ b/kernel/unwind/Makefile @@ -1,2 +1,2 @@ - obj-$(CONFIG_UNWIND_USER) +=3D user.o + obj-$(CONFIG_UNWIND_USER) +=3D user.o deferred.o obj-$(CONFIG_HAVE_UNWIND_USER_SFRAME) +=3D sframe.o diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c new file mode 100644 index 000000000000..f0dbe4069247 --- /dev/null +++ b/kernel/unwind/deferred.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* +* Deferred user space unwinding +*/ +#include +#include +#include +#include +#include +#include +#include +#include + +#define UNWIND_MAX_ENTRIES 512 + +/* entry-from-user counter */ +static DEFINE_PER_CPU(u64, unwind_ctx_ctr); + +/* + * The context cookie is a unique identifier which allows post-processing = to + * correlate kernel trace(s) with user unwinds. The high 12 bits are the = CPU + * id; the lower 48 bits are a per-CPU entry counter. + */ +static u64 ctx_to_cookie(u64 cpu, u64 ctx) +{ + BUILD_BUG_ON(NR_CPUS > 65535); + return (ctx & ((1UL << 48) - 1)) | (cpu << 48); +} + +/* + * Read the task context cookie, first initializing it if this is the first + * call to get_cookie() since the most recent entry from user. + */ +static u64 get_cookie(struct unwind_task_info *info) +{ + u64 ctx_ctr; + u64 cookie; + u64 cpu; + + guard(irqsave)(); + + cookie =3D info->cookie; + if (cookie) + return cookie; + + + cpu =3D raw_smp_processor_id(); + ctx_ctr =3D __this_cpu_inc_return(unwind_ctx_ctr); + info->cookie =3D ctx_to_cookie(cpu, ctx_ctr); + + return cookie; + +} + +static void unwind_deferred_task_work(struct callback_head *head) +{ + struct unwind_work *work =3D container_of(head, struct unwind_work, work); + struct unwind_task_info *info =3D ¤t->unwind_info; + struct unwind_stacktrace trace; + u64 cookie; + + if (WARN_ON_ONCE(!work->pending)) + return; + + /* + * From here on out, the callback must always be called, even if it's + * just an empty trace. + */ + + cookie =3D get_cookie(info); + + /* Check for task exit path. */ + if (!current->mm) + goto do_callback; + + if (!info->entries) { + info->entries =3D kmalloc(UNWIND_MAX_ENTRIES * sizeof(long), + GFP_KERNEL); + if (!info->entries) + goto do_callback; + } + + trace.entries =3D info->entries; + trace.nr =3D 0; + unwind_user(&trace, UNWIND_MAX_ENTRIES); + +do_callback: + work->func(work, &trace, cookie); + work->pending =3D 0; +} + +/* + * Schedule a user space unwind to be done in task work before exiting the + * kernel. + * + * The returned cookie output is a unique identifer for the current task e= ntry + * context. Its value will also be passed to the callback function. It c= an be + * used to stitch kernel and user stack traces together in post-processing. + * + * It's valid to call this function multiple times for the same @work with= in + * the same task entry context. Each call will return the same cookie. I= f the + * callback is already pending, an error will be returned along with the + * cookie. If the callback is not pending because it has already been + * previously called for the same entry context, it will be called again w= ith + * the same stack trace and cookie. + * + * Thus are three possible return scenarios: + * + * * return !=3D 0, *cookie =3D=3D 0: the operation failed, no pending c= allback. + * + * * return !=3D 0, *cookie !=3D 0: the callback is already pending. The= cookie + * can still be used to correlate with the pending callback. + * + * * return =3D=3D 0, *cookie !=3D 0: the callback queued successfully. = The + * callback is guaranteed to be called with the given cookie. + */ +int unwind_deferred_request(struct unwind_work *work, u64 *cookie) +{ + struct unwind_task_info *info =3D ¤t->unwind_info; + int ret; + + *cookie =3D 0; + + if (WARN_ON_ONCE(in_nmi())) + return -EINVAL; + + if (!current->mm || !user_mode(task_pt_regs(current))) + return -EINVAL; + + guard(irqsave)(); + + *cookie =3D get_cookie(info); + + /* callback already pending? */ + if (work->pending) + return -EEXIST; + + ret =3D task_work_add(current, &work->work, TWA_RESUME); + if (WARN_ON_ONCE(ret)) + return ret; + + work->pending =3D 1; + + return 0; +} + +bool unwind_deferred_cancel(struct task_struct *task, struct unwind_work *= work) +{ + bool ret; + + ret =3D task_work_cancel(task, &work->work); + if (ret) + work->pending =3D 0; + + return ret; +} + +void unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) +{ + memset(work, 0, sizeof(*work)); + + init_task_work(&work->work, unwind_deferred_task_work); + work->func =3D func; +} + +void unwind_task_init(struct task_struct *task) +{ + struct unwind_task_info *info =3D &task->unwind_info; + + memset(info, 0, sizeof(*info)); +} + +void unwind_task_free(struct task_struct *task) +{ + struct unwind_task_info *info =3D &task->unwind_info; + + kfree(info->entries); +} --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 56B9E1EEA3C; Wed, 22 Jan 2025 02:32:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513142; cv=none; b=VSeqn8A0clec+LDHFQHVeyydCNknn9cch/uQGwuAUb6b6/YkRmye5Esczu7SNoo7u6ALZHy3PMDK5jbRS6kYipT+VRenYE60ywaYGGdFgzkjynrudYBlFHp7yN0fijcqNYlGJvityGqkfR16YV3SxgUi1ALODczxW3IzlnC/x9E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513142; c=relaxed/simple; bh=GD6lqqKJhzejsQBKB5JdzkZAwfNOtyH5yqbgfZgTnVg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=RWlAhiOGvVB+4Yq67pgU1emMMxhV66JKkyKBWozUN8EIdFqjmVEwz02nYmfA9FahdQCSPl8ScCaCJp+Zc68hLCc/GMpyQsrr2kigt2OtqNe0EgxQPenOKgtQ8P5M0MKCZ8M0PKZDATuOWGmPK1qR9cYAVfwuZ755lhxRlTeECCc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=byQelMJ8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="byQelMJ8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5DE48C4CEE5; Wed, 22 Jan 2025 02:32:21 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513142; bh=GD6lqqKJhzejsQBKB5JdzkZAwfNOtyH5yqbgfZgTnVg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=byQelMJ8J6c1cMq41ntcn63Eir36brW/eyI4TAb+TjIH6oLbgnflsZTs/60GXulKs 5yarE7zAP/sHT8J/MjdtdTFyrIR1Vw5/BusCBQE3RV6ePLTbkhd6TVDF6wUMNR2JkA 4R2xn/0/vyxpY09EJfy1I7Cj8kPYjsaCRbHiBTnGUIj6cJHMWjC2az2SDrZv0xa7zI bKCbwjMqMGgqUMpRVuhtH7rp/vSOBvcAhgltuR3WhdSea042vvJGHN31AyrAWBGBFd +NiXA+RjK+tSlxHBIx/eN+UfjkKjVpBZblHYfAYQGWPhPsbKT/fAW/aV7w7P/xsZMG 9X3/grvMFDIBg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 29/39] unwind_user/deferred: Add unwind cache Date: Tue, 21 Jan 2025 18:31:21 -0800 Message-ID: <51855c0902486060cd6e1ccc6b22fd092a2e676d.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Cache the results of the unwind to ensure the unwind is only performed once, even when called by multiple tracers. Signed-off-by: Josh Poimboeuf --- include/linux/unwind_deferred_types.h | 8 +++++++- kernel/unwind/deferred.c | 26 ++++++++++++++++++++------ 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_d= eferred_types.h index 9749824aea09..6f71a06329fb 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -2,8 +2,14 @@ #ifndef _LINUX_UNWIND_USER_DEFERRED_TYPES_H #define _LINUX_UNWIND_USER_DEFERRED_TYPES_H =20 -struct unwind_task_info { +struct unwind_cache { unsigned long *entries; + unsigned int nr_entries; + u64 cookie; +}; + +struct unwind_task_info { + struct unwind_cache cache; u64 cookie; }; =20 diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index f0dbe4069247..2f38055cce48 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -56,6 +56,7 @@ static void unwind_deferred_task_work(struct callback_hea= d *head) { struct unwind_work *work =3D container_of(head, struct unwind_work, work); struct unwind_task_info *info =3D ¤t->unwind_info; + struct unwind_cache *cache =3D &info->cache; struct unwind_stacktrace trace; u64 cookie; =20 @@ -73,17 +74,30 @@ static void unwind_deferred_task_work(struct callback_h= ead *head) if (!current->mm) goto do_callback; =20 - if (!info->entries) { - info->entries =3D kmalloc(UNWIND_MAX_ENTRIES * sizeof(long), - GFP_KERNEL); - if (!info->entries) + if (!cache->entries) { + cache->entries =3D kmalloc(UNWIND_MAX_ENTRIES * sizeof(long), + GFP_KERNEL); + if (!cache->entries) goto do_callback; } =20 - trace.entries =3D info->entries; + trace.entries =3D cache->entries; + + if (cookie =3D=3D cache->cookie) { + /* + * The user stack has already been previously unwound in this + * entry context. Skip the unwind and use the cache. + */ + trace.nr =3D cache->nr_entries; + goto do_callback; + } + trace.nr =3D 0; unwind_user(&trace, UNWIND_MAX_ENTRIES); =20 + cache->cookie =3D cookie; + cache->nr_entries =3D trace.nr; + do_callback: work->func(work, &trace, cookie); work->pending =3D 0; @@ -174,5 +188,5 @@ void unwind_task_free(struct task_struct *task) { struct unwind_task_info *info =3D &task->unwind_info; =20 - kfree(info->entries); + kfree(info->cache.entries); } --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B33601F7577; Wed, 22 Jan 2025 02:32:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513143; cv=none; b=n5AOIss93CXtXWpywT8BeoSitBZ0wtVEuz5Oniu/LOapukc7KfayahXvAj+LlL3nW5axKBwePtSKFYsNVApUGoW1apbUKGiPi8HI755olC+ALWZWAKep6AxiqrpyHLst6OhlJ+Pb0BtKANCk75xRW/p3M/7LyOsR+kG1oTPihR4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513143; c=relaxed/simple; bh=PQu+wKfIIEYE7EgISh+KBFyzxaRDx69Xi4f1BqfJyYk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MGlbU/o0Xl5W2ahSXODMrx2MzL/UWWHJwh/GV27aSY0gssnx2g5HkvilZXKYRPYQmQXOeIjBdtExUzE22MuWggZ3UOktsRrq3BsRGDlHt3TZHahfriu2tgc9cgALdQyzYuUdpNCWw/ysjQJsXaPXBrwgBlMr+TcZ/jg4RUQNOF8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=r+qatnsd; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="r+qatnsd" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5ADA7C4CEE4; Wed, 22 Jan 2025 02:32:22 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513143; bh=PQu+wKfIIEYE7EgISh+KBFyzxaRDx69Xi4f1BqfJyYk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=r+qatnsddBSQqDToUlQUSWzBO5yXfBN4UPBs0l83zSKm8UiMw8FCVD2bflCzUfddX m97qMEY4elziNFmcU2EVxxBdWGoKX3ibzY8ST4Ks4cMiPPjROFRTsdsVmCckZaHlCd P70iRg2PukQ7Lp1WmzSfKed789yb0WEXDqau0Hs9qYekgsSmUQVWFbHsqdGKao4gyO p4vgabqdo+LwPnOxKm6ExA4nN72/v8rWwR0QTae/c9PG+8dGqInxoINnytUPASad6c Hy3krac//d1ynoMLmDzhhBc84fHdiWlIptq9QVH7l6cZttM5JFx7B59KG/lyxSREQJ bBLNd8MLaTUAg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 30/39] unwind_user/deferred: Make unwind deferral requests NMI-safe Date: Tue, 21 Jan 2025 18:31:22 -0800 Message-ID: <4ea47a9238cb726614f36a0aad2a545816442e57.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Make unwind_deferred_request() NMI-safe so tracers in NMI context can call it to get the cookie immediately rather than have to do the fragile "schedule irq work and then call unwind_deferred_request()" dance. Signed-off-by: Josh Poimboeuf --- include/linux/entry-common.h | 1 + include/linux/unwind_deferred.h | 6 ++ include/linux/unwind_deferred_types.h | 1 + kernel/unwind/deferred.c | 106 ++++++++++++++++++++++---- 4 files changed, 98 insertions(+), 16 deletions(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fb2b27154fee..e9b8c145f480 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -363,6 +363,7 @@ static __always_inline void exit_to_user_mode(void) lockdep_hardirqs_on_prepare(); instrumentation_end(); =20 + unwind_exit_to_user_mode(); user_enter_irqoff(); arch_exit_to_user_mode(); lockdep_hardirqs_on(CALLER_ADDR0); diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferre= d.h index 741f409f0d1f..22269f4d2392 100644 --- a/include/linux/unwind_deferred.h +++ b/include/linux/unwind_deferred.h @@ -30,6 +30,11 @@ static __always_inline void unwind_enter_from_user_mode(= void) current->unwind_info.cookie =3D 0; } =20 +static __always_inline void unwind_exit_to_user_mode(void) +{ + current->unwind_info.cookie =3D 0; +} + #else /* !CONFIG_UNWIND_USER */ =20 static inline void unwind_task_init(struct task_struct *task) {} @@ -40,6 +45,7 @@ static inline int unwind_deferred_request(struct task_str= uct *task, struct unwin static inline bool unwind_deferred_cancel(struct task_struct *task, struct= unwind_work *work) { return false; } =20 static inline void unwind_enter_from_user_mode(void) {} +static inline void unwind_exit_to_user_mode(void) {} =20 #endif /* !CONFIG_UNWIND_USER */ =20 diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_d= eferred_types.h index 6f71a06329fb..c535cca6534b 100644 --- a/include/linux/unwind_deferred_types.h +++ b/include/linux/unwind_deferred_types.h @@ -11,6 +11,7 @@ struct unwind_cache { struct unwind_task_info { struct unwind_cache cache; u64 cookie; + u64 nmi_cookie; }; =20 #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */ diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c index 2f38055cce48..939c94abaa50 100644 --- a/kernel/unwind/deferred.c +++ b/kernel/unwind/deferred.c @@ -29,27 +29,49 @@ static u64 ctx_to_cookie(u64 cpu, u64 ctx) =20 /* * Read the task context cookie, first initializing it if this is the first - * call to get_cookie() since the most recent entry from user. + * call to get_cookie() since the most recent entry from user. This has t= o be + * done carefully to coordinate with unwind_deferred_request_nmi(). */ static u64 get_cookie(struct unwind_task_info *info) { u64 ctx_ctr; u64 cookie; - u64 cpu; =20 guard(irqsave)(); =20 - cookie =3D info->cookie; + cookie =3D READ_ONCE(info->cookie); if (cookie) return cookie; =20 + ctx_ctr =3D __this_cpu_read(unwind_ctx_ctr); =20 - cpu =3D raw_smp_processor_id(); - ctx_ctr =3D __this_cpu_inc_return(unwind_ctx_ctr); - info->cookie =3D ctx_to_cookie(cpu, ctx_ctr); + /* Read ctx_ctr before info->nmi_cookie */ + barrier(); + + cookie =3D READ_ONCE(info->nmi_cookie); + if (cookie) { + /* + * This is the first call to get_cookie() since an NMI handler + * first wrote it to info->nmi_cookie. Sync it. + */ + WRITE_ONCE(info->cookie, cookie); + WRITE_ONCE(info->nmi_cookie, 0); + return cookie; + } + + /* + * Write info->cookie. It's ok to race with an NMI here. The value of + * the cookie is based on ctx_ctr from before the NMI could have + * incremented it. The result will be the same even if cookie or + * ctx_ctr end up getting written twice. + */ + cookie =3D ctx_to_cookie(raw_smp_processor_id(), ctx_ctr + 1); + WRITE_ONCE(info->cookie, cookie); + WRITE_ONCE(info->nmi_cookie, 0); + barrier(); + __this_cpu_write(unwind_ctx_ctr, ctx_ctr + 1); =20 return cookie; - } =20 static void unwind_deferred_task_work(struct callback_head *head) @@ -100,7 +122,52 @@ static void unwind_deferred_task_work(struct callback_= head *head) =20 do_callback: work->func(work, &trace, cookie); - work->pending =3D 0; + WRITE_ONCE(work->pending, 0); +} + +static int unwind_deferred_request_nmi(struct unwind_work *work, u64 *cook= ie) +{ + struct unwind_task_info *info =3D ¤t->unwind_info; + bool inited_cookie =3D false; + int ret; + + *cookie =3D info->cookie; + if (!*cookie) { + /* + * This is the first unwind request since the most recent entry + * from user. Initialize the task cookie. + * + * Don't write to info->cookie directly, otherwise it may get + * cleared if the NMI occurred in the kernel during early entry + * or late exit before the task work gets to run. Instead, use + * info->nmi_cookie which gets synced later by get_cookie(). + */ + if (!info->nmi_cookie) { + u64 cpu =3D raw_smp_processor_id(); + u64 ctx_ctr; + + ctx_ctr =3D __this_cpu_inc_return(unwind_ctx_ctr); + info->nmi_cookie =3D ctx_to_cookie(cpu, ctx_ctr); + + inited_cookie =3D true; + } + + *cookie =3D info->nmi_cookie; + + } else if (work->pending) { + return -EEXIST; + } + + ret =3D task_work_add(current, &work->work, TWA_NMI_CURRENT); + if (ret) { + if (inited_cookie) + info->nmi_cookie =3D 0; + return ret; + } + + work->pending =3D 1; + + return 0; } =20 /* @@ -131,29 +198,36 @@ static void unwind_deferred_task_work(struct callback= _head *head) int unwind_deferred_request(struct unwind_work *work, u64 *cookie) { struct unwind_task_info *info =3D ¤t->unwind_info; + int pending; int ret; =20 *cookie =3D 0; =20 - if (WARN_ON_ONCE(in_nmi())) - return -EINVAL; - if (!current->mm || !user_mode(task_pt_regs(current))) return -EINVAL; =20 + if (in_nmi()) + return unwind_deferred_request_nmi(work, cookie); + guard(irqsave)(); =20 *cookie =3D get_cookie(info); =20 /* callback already pending? */ - if (work->pending) + pending =3D READ_ONCE(work->pending); + if (pending) return -EEXIST; =20 - ret =3D task_work_add(current, &work->work, TWA_RESUME); - if (WARN_ON_ONCE(ret)) - return ret; + /* Claim the work unless an NMI just now swooped in to do so. */ + if (!try_cmpxchg(&work->pending, &pending, 1)) + return -EEXIST; =20 - work->pending =3D 1; + /* The work has been claimed, now schedule it. */ + ret =3D task_work_add(current, &work->work, TWA_RESUME); + if (WARN_ON_ONCE(ret)) { + WRITE_ONCE(work->pending, 0); + return ret; + } =20 return 0; } --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CD8E41527B1; Wed, 22 Jan 2025 02:32:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513144; cv=none; b=KmBwlig0UnXlxL49BSLawRwRkRyEDXdn1fQqQwFVlbcPSRtM6Akj9c94JBAWWHdt4QBOKJ3PEGw1EqNK29OFJAccc06LTpxz8olIQIz5xjAL367M+fWCayVXsWHPJ/oKqIK+ZVdWL/3WVAo15XX4dVnDhmLgJiWfuA+pLb2GggU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513144; c=relaxed/simple; bh=E4iJebAiqcVeBPwO3RXhBAX5K97YidI6HnoAz3makSU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qE4JPbH7yLMnHdgFXI8EUTU2LxNkWi/d1w1OdpUOxdUDMIW1PMI+1HhQxyngWPJi9ox++/TolKEYet0Uvrxu2Qw1/5nongkePU/n5Kvo2EDxNY1sxSb+T+iVbOoCkMI4YvxetTblpsNwXSS1Rcerp9QLgvj4q6lhUW37txOqAsA= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=m3MWH5Bc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="m3MWH5Bc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5EE0AC4CEE2; Wed, 22 Jan 2025 02:32:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513144; bh=E4iJebAiqcVeBPwO3RXhBAX5K97YidI6HnoAz3makSU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=m3MWH5BcjiGSfeRJnKK4ggYBfdOG+piRDqcNxx2vymvi7YIZL/fClZ/XmDEpHh8bH aOw2LFKfpLYqBO7JaollcD3IkvIJptmJTbICIAjbrpjg0nsYf/mMllSZhNVEMwhDgG C18DAizRbhTWkNlpqp0o+Iz7s18wy7sFkPqanix7vItGX6VvsGiRIa05MnIDHSb6B3 cZbfeGKIXHVgXkU6Gi3ZoMjfapNj/4Sb8Gwmhv0ZPu600QFjGGCnL0TMFmgaRJaTCq RoX1HLz66wmrsZ7xOZ6Ex2s8BqIg9ZoHXYt8u/hpJDQYUmtkx0HsfqrqQviNO57yx1 3TevNR+Nqjxyg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu , Namhyung Kim Subject: [PATCH v4 31/39] perf: Remove get_perf_callchain() 'init_nr' argument Date: Tue, 21 Jan 2025 18:31:23 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" The 'init_nr' argument has double duty: it's used to initialize both the number of contexts and the number of stack entries. That's confusing and the callers always pass zero anyway. Hard code the zero. Acked-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- include/linux/perf_event.h | 2 +- kernel/bpf/stackmap.c | 4 ++-- kernel/events/callchain.c | 12 ++++++------ kernel/events/core.c | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cb99ec8c9e96..4c8ff7258c6a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1589,7 +1589,7 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_cal= lchain_entry); extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, st= ruct pt_regs *regs); extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, = struct pt_regs *regs); extern struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool us= er, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3615c06b7dfa..ec3a57a5fba1 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,7 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, str= uct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth =3D sysctl_perf_event_max_stack; =20 - trace =3D get_perf_callchain(regs, 0, kernel, user, max_depth, + trace =3D get_perf_callchain(regs, kernel, user, max_depth, false, false); =20 if (unlikely(!trace)) @@ -451,7 +451,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struc= t task_struct *task, else if (kernel && task) trace =3D get_callchain_entry_for_task(task, max_depth); else - trace =3D get_perf_callchain(regs, 0, kernel, user, max_depth, + trace =3D get_perf_callchain(regs, kernel, user, max_depth, crosstask, false); =20 if (unlikely(!trace) || trace->nr < skip) { diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 8a47e52a454f..83834203e144 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -216,7 +216,7 @@ static void fixup_uretprobe_trampoline_entries(struct p= erf_callchain_entry *entr } =20 struct perf_callchain_entry * -get_perf_callchain(struct pt_regs *regs, u32 init_nr, bool kernel, bool us= er, +get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) { struct perf_callchain_entry *entry; @@ -227,11 +227,11 @@ get_perf_callchain(struct pt_regs *regs, u32 init_nr,= bool kernel, bool user, if (!entry) return NULL; =20 - ctx.entry =3D entry; - ctx.max_stack =3D max_stack; - ctx.nr =3D entry->nr =3D init_nr; - ctx.contexts =3D 0; - ctx.contexts_maxed =3D false; + ctx.entry =3D entry; + ctx.max_stack =3D max_stack; + ctx.nr =3D entry->nr =3D 0; + ctx.contexts =3D 0; + ctx.contexts_maxed =3D false; =20 if (kernel && !user_mode(regs)) { if (add_mark) diff --git a/kernel/events/core.c b/kernel/events/core.c index 065f9188b44a..ebe457bacf96 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7801,7 +7801,7 @@ perf_callchain(struct perf_event *event, struct pt_re= gs *regs) if (!kernel && !user) return &__empty_callchain; =20 - callchain =3D get_perf_callchain(regs, 0, kernel, user, + callchain =3D get_perf_callchain(regs, kernel, user, max_stack, crosstask, true); return callchain ?: &__empty_callchain; } --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D20771F8F0C; Wed, 22 Jan 2025 02:32:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513145; cv=none; b=EGJ2nUX3yGKtXwr7hRoQiHBFYo5yG7TstT+vOL5v2vcra2YlibFoOdo+Acs58esIC8Yvztk6oKMIB5Vu9qcx9P4elqSgXR9Z8jgp7CewpvPzpF3knfiVSHYwTM5GRqNxz/iZ5MThVwtnL9sKcODwGPDT5YiqiAStOcowwivid8E= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513145; c=relaxed/simple; bh=yg549yt5k36N+w4cLjQJtSf1SxCGDHbIvRGsG4AqAok=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Eisuy0MhRsQvUu6Qdxf9lzJkz6LEa/a6tYq/Qvvy2Ckxjz4ymfF0AEj+HEkl5yf9KObD4ASpz78inioCoi5F4WPx1dKs6UC/mUOASq7mQoKCJlG2w4cjzozgAOyeMN7lZFxUIJpP+y+gaYzbYc3MSIf+lvRTaPbp6tBLcYIqI/E= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=QrDnuMIp; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="QrDnuMIp" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 76A31C4CEE0; Wed, 22 Jan 2025 02:32:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513145; bh=yg549yt5k36N+w4cLjQJtSf1SxCGDHbIvRGsG4AqAok=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=QrDnuMIpnHWBgHkJax/ZjLEvkV0m4TP05glKUz2wP+MU1BewEH8+EgtBSZWWPurM3 OKCQahkExfR+CRZNtQPaGfrYk0yTlX1wKsYQOyiHzjDEMt+XRBquBrfCwhDbdtfEGJ o/6VnS0w4UKPw3m5I0YaaK3dViEAT5V0Hp4lNO+ElJVGxxlSCaqldxwBuACou42CQo fI3yAnOVsPHhvmK9yUkE5oIlr+91fmhizUM5GkNPUzqGbBOIcim1HgNpadiggRchxH vJyFjEFGcdduFfOXKjbPPucjbVzHV/5ZRJF3OH53Wph7BWwgjp5w8CtbqvAZeJMR4+ +0yAiehp/KwrQ== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 32/39] perf: Remove get_perf_callchain() 'crosstask' argument Date: Tue, 21 Jan 2025 18:31:24 -0800 Message-ID: <73229250c0279667f617c3d6da121c6621164e4d.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" get_perf_callchain() doesn't support cross-task unwinding, so it doesn't make much sense to have 'crosstask' as an argument. Acked-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- include/linux/perf_event.h | 2 +- kernel/bpf/stackmap.c | 12 ++++-------- kernel/events/callchain.c | 6 +----- kernel/events/core.c | 9 +++++---- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4c8ff7258c6a..1563dc2cd979 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1590,7 +1590,7 @@ extern void perf_callchain_user(struct perf_callchain= _entry_ctx *entry, struct p extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, = struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark); + u32 max_stack, bool add_mark); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index ec3a57a5fba1..ee9701337912 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,8 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, str= uct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth =3D sysctl_perf_event_max_stack; =20 - trace =3D get_perf_callchain(regs, kernel, user, max_depth, - false, false); + trace =3D get_perf_callchain(regs, kernel, user, max_depth, false); =20 if (unlikely(!trace)) /* couldn't fetch the stack trace */ @@ -430,10 +429,8 @@ static long __bpf_get_stack(struct pt_regs *regs, stru= ct task_struct *task, if (task && user && !user_mode(regs)) goto err_fault; =20 - /* get_perf_callchain does not support crosstask user stack walking - * but returns an empty stack instead of NULL. - */ - if (crosstask && user) { + /* get_perf_callchain() does not support crosstask stack walking */ + if (crosstask) { err =3D -EOPNOTSUPP; goto clear; } @@ -451,8 +448,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struc= t task_struct *task, else if (kernel && task) trace =3D get_callchain_entry_for_task(task, max_depth); else - trace =3D get_perf_callchain(regs, kernel, user, max_depth, - crosstask, false); + trace =3D get_perf_callchain(regs, kernel, user, max_depth,false); =20 if (unlikely(!trace) || trace->nr < skip) { if (may_fault) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 83834203e144..655fb25a725b 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,7 +217,7 @@ static void fixup_uretprobe_trampoline_entries(struct p= erf_callchain_entry *entr =20 struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool crosstask, bool add_mark) + u32 max_stack, bool add_mark) { struct perf_callchain_entry *entry; struct perf_callchain_entry_ctx ctx; @@ -248,9 +248,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, b= ool user, } =20 if (regs) { - if (crosstask) - goto exit_put; - if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 @@ -260,7 +257,6 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, b= ool user, } } =20 -exit_put: put_callchain_entry(rctx); =20 return entry; diff --git a/kernel/events/core.c b/kernel/events/core.c index ebe457bacf96..99f0f28feeb5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7793,16 +7793,17 @@ perf_callchain(struct perf_event *event, struct pt_= regs *regs) { bool kernel =3D !event->attr.exclude_callchain_kernel; bool user =3D !event->attr.exclude_callchain_user; - /* Disallow cross-task user callchains. */ - bool crosstask =3D event->ctx->task && event->ctx->task !=3D current; const u32 max_stack =3D event->attr.sample_max_stack; struct perf_callchain_entry *callchain; =20 if (!kernel && !user) return &__empty_callchain; =20 - callchain =3D get_perf_callchain(regs, kernel, user, - max_stack, crosstask, true); + /* Disallow cross-task callchains. */ + if (event->ctx->task && event->ctx->task !=3D current) + return &__empty_callchain; + + callchain =3D get_perf_callchain(regs, kernel, user, max_stack, true); return callchain ?: &__empty_callchain; } =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D57D01F91E7; Wed, 22 Jan 2025 02:32:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513146; cv=none; b=oCbGtxrqBvFNZ+veNSrshdbDjyK6l3caCcrHwjwOZJ7AVfCqHv+L1S/F2+wKbG68mFTP6oazNhDAXcVX7Qc9b152tSnMcDYqv86TfWc3y9d0WmNPH4339Xuqn7rjUx2WuEACT0gkE64HK9fe3XxlUX8t0oFRbwhzTyimitCJX1w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513146; c=relaxed/simple; bh=Gs8v1zYZGA9WgVzi0szwyjCyauktTY2A6iruS7+ZdvM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mNjJMZHVfS/vV6Z8+Zk8G4zj2VmVjnXhi3jPX2zYEJixgQhW4qHnp6QEsCcdQpvxBrFlEOtlvYUtpE1Byek3BJG5SPYFeGmTengT4wFrXjLVwvLvvFyHIdMppcFXw51HQvNt17Pasnaaut6r7rm2oHM8re750onCaU0l+8sDiCk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pea5ZGrZ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pea5ZGrZ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 752D4C4CEE7; Wed, 22 Jan 2025 02:32:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513146; bh=Gs8v1zYZGA9WgVzi0szwyjCyauktTY2A6iruS7+ZdvM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pea5ZGrZ+8aCRbO2PsnB27cGnmW8xsIBCneWjVJRzo5ONnk15IRza1fW74lvNQl+x V+7o20Bn61lMrc87illvKD8CiLyEKB7wi8uxw/ERGHSIlVet5Zcn2zAReAJyom7A4D ZfxVKQ9sB0Dt+3s21v7I9UF41DwWXwCe2cQobJFyQX+A8ftvLOb3yuqnwgNYvfrkgY e4KkyG66YrkkC6xZe55B9VJ+IH5KOvYq3VpuVtqVGIsnKBA6bMhN4SglBMXiGXfs2x CLX4s9oz0UVE/vmpNO6gXNhe2Md8K38oRkzxGwbcCGZclpzADDUkA3zfMMSPILvTiM eEmkyqbEG7b3w== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 33/39] perf: Simplify get_perf_callchain() user logic Date: Tue, 21 Jan 2025 18:31:25 -0800 Message-ID: <074d22524c53f976a716facfb67150ba6d825071.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Simplify the get_perf_callchain() user logic a bit. task_pt_regs() should never be NULL. Acked-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- kernel/events/callchain.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 655fb25a725b..2278402b7ac9 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -241,22 +241,20 @@ get_perf_callchain(struct pt_regs *regs, bool kernel,= bool user, =20 if (user) { if (!user_mode(regs)) { - if (current->mm) - regs =3D task_pt_regs(current); - else - regs =3D NULL; + if (!current->mm) + goto exit_put; + regs =3D task_pt_regs(current); } =20 - if (regs) { - if (add_mark) - perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); + if (add_mark) + perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 - start_entry_idx =3D entry->nr; - perf_callchain_user(&ctx, regs); - fixup_uretprobe_trampoline_entries(entry, start_entry_idx); - } + start_entry_idx =3D entry->nr; + perf_callchain_user(&ctx, regs); + fixup_uretprobe_trampoline_entries(entry, start_entry_idx); } =20 +exit_put: put_callchain_entry(rctx); =20 return entry; --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 86E871F9428; Wed, 22 Jan 2025 02:32:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513147; cv=none; b=HXRTA15rQSK7g4j4OALytWXU+mTd2Q5ub2XRLUIk9MylJ3yO2lZfr6Dua9CCjkxtd+vpCTlKdVbi1lau+Brh2KEuiNF2B0iKpFaS9aT7fxvFkLHDSXNzoe3DadY6s4yqFzqptJN3Z60dRzC0VAQkaeIN+okzrPPUW9yEJqbl5dc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513147; c=relaxed/simple; bh=ofsDiLXX3EH+qN+Y5WqqTscSbexhgOnm4ZEbWMlJa4Y=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=s2KW7w1EVT8jyMU1nZ8JusFPFyOQUC9j43utNPkpwRFl9o0Mi2OghOdkFNkcP/xFnk7FewxX+ZLynb9qpHfLzAnmrsXvaRyimQVVjzN6ZjA8tSeDULb0Y8dYSr/20+8qZNxasb4bbHsP24bmxjggoBek23NiLVXwjKoBWT7sNwc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=gRANL4zK; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="gRANL4zK" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 846CFC4CEDF; Wed, 22 Jan 2025 02:32:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513147; bh=ofsDiLXX3EH+qN+Y5WqqTscSbexhgOnm4ZEbWMlJa4Y=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=gRANL4zKLW7VuPjnLJ1ogZT2uIgpyFZfW0Nx1FXUkuNU0thERSmvguSoKw/sdjOyn u1Ux2ezjLAODbyqCwfAWiaYfApkJhsdWMSEjiQlu2fLF46eVy2pQKCE3k69WcfUhjC /eqYKFWJAli8QMEz+uVHK4TmGKjEEOBxi8wY+W5bjeZ9YQi928wvwnK6dmBmIzVhge b5VovCWrLEw/XuacKV74OXzJE3W1tfxwyFBVcq5IBgxUzognTNyaa4dpsOaPMFULoG 9zGDF3B6m4mygTnFW9NLCbYTC0FRkq0ipzEDyh9OV2bj1PnRC7XBiXgWv/RKHqXxxC flOy18Envivcw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 34/39] perf: Skip user unwind if !current->mm Date: Tue, 21 Jan 2025 18:31:26 -0800 Message-ID: <24cde1381495cd637f0d5663ab7629572bc0eca6.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" If the task doesn't have any memory, there's no stack to unwind. Signed-off-by: Josh Poimboeuf --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 99f0f28feeb5..a886bb83f4d0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7792,7 +7792,7 @@ struct perf_callchain_entry * perf_callchain(struct perf_event *event, struct pt_regs *regs) { bool kernel =3D !event->attr.exclude_callchain_kernel; - bool user =3D !event->attr.exclude_callchain_user; + bool user =3D !event->attr.exclude_callchain_user && current->mm; const u32 max_stack =3D event->attr.sample_max_stack; struct perf_callchain_entry *callchain; =20 --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0AD601F9ABC; Wed, 22 Jan 2025 02:32:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513149; cv=none; b=OrjwMebuzpjfCpF/wHSKF+xSZdF7OvBx71uthBcEgQ4/bAypwRGzVP8y1e8DGQ0jFi7+N9+0la1hOlQll2fDBLGuTGChWD2hPnwC0A2nKbA8arSwdh2X66dQSYcLMCWVQ3GZjmrEIubvlDDG+IL3NDVkD7vf1UUYb0mPnrPt3rg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513149; c=relaxed/simple; bh=XpQOuYi9nm9uhjXgWVCWnmhCnpsXTHRms5GB5/calVk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=vGnUn1muuthG0b01NEJR2/Z5svzMTAZy1mt1YESTC1qZ8SNeKf8PaE5kF9ytHNENvIUUq0DjOND6kWhwAYEElWTZZ36CJyyDVdRhbAlEGbcI6T/twAjMfZ6Z4adQcRZeDqyQR29+wa80iK+0UQisF+jwaEXSZu3PaQ16AMlfScc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=GNMFkKj8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="GNMFkKj8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9226BC4CEE8; Wed, 22 Jan 2025 02:32:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513148; bh=XpQOuYi9nm9uhjXgWVCWnmhCnpsXTHRms5GB5/calVk=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GNMFkKj8gsl5/5CzHyjloKCc0zP/QY3VoLBINjvFCQYJFYepcKlk0pojN8NvsbDGe yQQF7Mp3WL8HbfL24pvlqq8eOb4uwECyRkEjx5/tLWqsiTjOw4r/u7v8G6ANzlfYJb zuGbJcxp48uE8E3esi6H+iBUwmowzpUszPEMcYgLeMbQSGS3X2fJ0jQPMNFcmXvTZI gYYBVYBZ7O2jKT5Sh3lapZMOJhzx7HpM9+4cpRadEOeOeOxgVzI9ha1C8tQgWlqDBS lu67pLZSwqpjCcAcTqgENGY73Vz7ntqgyH83JQPS7LO8ATH5jVdxNCpxLVWnwqiV4S l8jzaASp2qvKg== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 35/39] perf: Support deferred user callchains Date: Tue, 21 Jan 2025 18:31:27 -0800 Message-ID: <2e54e6f1c914b219b889fbb47bc33d4749c3ad87.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Use the new unwind_deferred_request() interface (if available) to defer unwinds to task context. This allows the use of .sframe (if available) and also prevents duplicate userspace unwinds. Suggested-by: Steven Rostedt Suggested-by: Peter Zijlstra Signed-off-by: Josh Poimboeuf --- arch/Kconfig | 3 + include/linux/perf_event.h | 13 +++- include/uapi/linux/perf_event.h | 19 ++++- kernel/bpf/stackmap.c | 6 +- kernel/events/callchain.c | 11 ++- kernel/events/core.c | 103 +++++++++++++++++++++++++- tools/include/uapi/linux/perf_event.h | 19 ++++- 7 files changed, 166 insertions(+), 8 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index b3676605bab6..83ab94af46ca 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -472,6 +472,9 @@ config SFRAME_VALIDATION =20 If unsure, say N. =20 +config HAVE_PERF_CALLCHAIN_DEFERRED + bool + config HAVE_PERF_REGS bool help diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1563dc2cd979..7fd54e4d2084 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -62,6 +62,7 @@ struct perf_guest_info_callbacks { #include #include #include +#include #include =20 struct perf_callchain_entry { @@ -833,6 +834,10 @@ struct perf_event { unsigned int pending_work; struct rcuwait pending_work_wait; =20 + struct unwind_work pending_unwind_work; + struct rcuwait pending_unwind_wait; + unsigned int pending_unwind_callback; + atomic_t event_limit; =20 /* address range filters */ @@ -1590,12 +1595,18 @@ extern void perf_callchain_user(struct perf_callcha= in_entry_ctx *entry, struct p extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, = struct pt_regs *regs); extern struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool add_mark); + u32 max_stack, bool add_mark, bool defer_user); extern int get_callchain_buffers(int max_stack); extern void put_callchain_buffers(void); extern struct perf_callchain_entry *get_callchain_entry(int *rctx); extern void put_callchain_entry(int rctx); =20 +#ifdef CONFIG_HAVE_PERF_CALLCHAIN_DEFERRED +extern void perf_callchain_user_deferred(struct perf_callchain_entry_ctx *= entry, struct pt_regs *regs); +#else +static inline void perf_callchain_user_deferred(struct perf_callchain_entr= y_ctx *entry, struct pt_regs *regs) {} +#endif + extern int sysctl_perf_event_max_stack; extern int sysctl_perf_event_max_contexts_per_stack; =20 diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_even= t.h index 0524d541d4e3..16307be57de9 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -460,7 +460,8 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* generate PERF_RECORD_CALLCHAIN_DEFERRED record= s */ + __reserved_1 : 25; =20 union { __u32 wakeup_events; /* wakeup every n events */ @@ -1226,6 +1227,21 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1256,6 +1272,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index ee9701337912..f073ebaf9c30 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -314,8 +314,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, str= uct bpf_map *, map, if (max_depth > sysctl_perf_event_max_stack) max_depth =3D sysctl_perf_event_max_stack; =20 - trace =3D get_perf_callchain(regs, kernel, user, max_depth, false); - + trace =3D get_perf_callchain(regs, kernel, user, max_depth, false, false); if (unlikely(!trace)) /* couldn't fetch the stack trace */ return -EFAULT; @@ -448,7 +447,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struc= t task_struct *task, else if (kernel && task) trace =3D get_callchain_entry_for_task(task, max_depth); else - trace =3D get_perf_callchain(regs, kernel, user, max_depth,false); + trace =3D get_perf_callchain(regs, kernel, user, max_depth, + false, false); =20 if (unlikely(!trace) || trace->nr < skip) { if (may_fault) diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 2278402b7ac9..eeb15ba0137f 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -217,7 +217,7 @@ static void fixup_uretprobe_trampoline_entries(struct p= erf_callchain_entry *entr =20 struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, - u32 max_stack, bool add_mark) + u32 max_stack, bool add_mark, bool defer_user) { struct perf_callchain_entry *entry; struct perf_callchain_entry_ctx ctx; @@ -246,6 +246,15 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, = bool user, regs =3D task_pt_regs(current); } =20 + if (defer_user) { + /* + * Foretell the coming of PERF_RECORD_CALLCHAIN_DEFERRED + * which can be stitched to this one. + */ + perf_callchain_store_context(&ctx, PERF_CONTEXT_USER_DEFERRED); + goto exit_put; + } + if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); =20 diff --git a/kernel/events/core.c b/kernel/events/core.c index a886bb83f4d0..32603bbd797d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -55,6 +55,7 @@ #include #include #include +#include =20 #include "internal.h" =20 @@ -5312,11 +5313,37 @@ static void perf_pending_task_sync(struct perf_even= t *event) rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_= UNINTERRUPTIBLE); } =20 +static void perf_pending_unwind_sync(struct perf_event *event) +{ + might_sleep(); + + if (!event->pending_unwind_callback) + return; + + /* + * If the task is queued to the current task's queue, we + * obviously can't wait for it to complete. Simply cancel it. + */ + if (unwind_deferred_cancel(current, &event->pending_unwind_work)) { + event->pending_unwind_callback =3D 0; + local_dec(&event->ctx->nr_no_switch_fast); + return; + } + + /* + * All accesses related to the event are within the same RCU section in + * perf_event_callchain_deferred(). The RCU grace period before the + * event is freed will make sure all those accesses are complete by then. + */ + rcuwait_wait_event(&event->pending_unwind_wait, !event->pending_unwind_ca= llback, TASK_UNINTERRUPTIBLE); +} + static void _free_event(struct perf_event *event) { irq_work_sync(&event->pending_irq); irq_work_sync(&event->pending_disable_irq); perf_pending_task_sync(event); + perf_pending_unwind_sync(event); =20 unaccount_event(event); =20 @@ -6933,6 +6960,61 @@ static void perf_pending_irq(struct irq_work *entry) perf_swevent_put_recursion_context(rctx); } =20 + +struct perf_callchain_deferred_event { + struct perf_event_header header; + u64 nr; + u64 ips[]; +}; + +static void perf_event_callchain_deferred(struct unwind_work *work, struct= unwind_stacktrace *trace, u64 cookie) +{ + struct perf_event *event =3D container_of(work, struct perf_event, pendin= g_unwind_work); + struct perf_callchain_deferred_event deferred_event; + u64 callchain_context =3D PERF_CONTEXT_USER; + struct perf_output_handle handle; + struct perf_sample_data data; + u64 nr =3D trace->nr +1 ; /* +1 =3D=3D callchain_context */ + + if (WARN_ON_ONCE(!event->pending_unwind_callback)) + return; + + /* + * All accesses to the event must belong to the same implicit RCU + * read-side critical section as the ->pending_unwind_callback reset. + * See comment in perf_pending_unwind_sync(). + */ + rcu_read_lock(); + + if (!current->mm) + goto out; + + deferred_event.header.type =3D PERF_RECORD_CALLCHAIN_DEFERRED; + deferred_event.header.misc =3D PERF_RECORD_MISC_USER; + deferred_event.header.size =3D sizeof(deferred_event) + (nr * sizeof(u64)= ); + + deferred_event.nr =3D nr; + + perf_event_header__init_id(&deferred_event.header, &data, event); + + if (perf_output_begin(&handle, &data, event, deferred_event.header.size)) + goto out; + + perf_output_put(&handle, deferred_event); + perf_output_put(&handle, callchain_context); + perf_output_copy(&handle, trace->entries, trace->nr * sizeof(u64)); + perf_event__output_id_sample(event, &handle, &data); + + perf_output_end(&handle); + +out: + event->pending_unwind_callback =3D 0; + local_dec(&event->ctx->nr_no_switch_fast); + rcuwait_wake_up(&event->pending_unwind_wait); + + rcu_read_unlock(); +} + static void perf_pending_task(struct callback_head *head) { struct perf_event *event =3D container_of(head, struct perf_event, pendin= g_task); @@ -7795,6 +7877,8 @@ perf_callchain(struct perf_event *event, struct pt_re= gs *regs) bool user =3D !event->attr.exclude_callchain_user && current->mm; const u32 max_stack =3D event->attr.sample_max_stack; struct perf_callchain_entry *callchain; + bool defer_user =3D IS_ENABLED(CONFIG_UNWIND_USER) && user && + event->attr.defer_callchain; =20 if (!kernel && !user) return &__empty_callchain; @@ -7803,7 +7887,21 @@ perf_callchain(struct perf_event *event, struct pt_r= egs *regs) if (event->ctx->task && event->ctx->task !=3D current) return &__empty_callchain; =20 - callchain =3D get_perf_callchain(regs, kernel, user, max_stack, true); + if (defer_user && !event->pending_unwind_callback) { + u64 cookie; + + if (!unwind_deferred_request(&event->pending_unwind_work, &cookie)) { + event->pending_unwind_callback =3D 1; + local_inc(&event->ctx->nr_no_switch_fast); + } + + if (!cookie) + defer_user =3D false; + } + + callchain =3D get_perf_callchain(regs, kernel, user, max_stack, true, + defer_user); + return callchain ?: &__empty_callchain; } =20 @@ -12225,6 +12323,9 @@ perf_event_alloc(struct perf_event_attr *attr, int = cpu, init_task_work(&event->pending_task, perf_pending_task); rcuwait_init(&event->pending_work_wait); =20 + unwind_deferred_init(&event->pending_unwind_work, perf_event_callchain_de= ferred); + rcuwait_init(&event->pending_unwind_wait); + mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); =20 diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/lin= ux/perf_event.h index 0524d541d4e3..16307be57de9 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -460,7 +460,8 @@ struct perf_event_attr { inherit_thread : 1, /* children only inherit if cloned with CLONE_THR= EAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ - __reserved_1 : 26; + defer_callchain: 1, /* generate PERF_RECORD_CALLCHAIN_DEFERRED record= s */ + __reserved_1 : 25; =20 union { __u32 wakeup_events; /* wakeup every n events */ @@ -1226,6 +1227,21 @@ enum perf_event_type { */ PERF_RECORD_AUX_OUTPUT_HW_ID =3D 21, =20 + /* + * This user callchain capture was deferred until shortly before + * returning to user space. Previous samples would have kernel + * callchains only and they need to be stitched with this to make full + * callchains. + * + * struct { + * struct perf_event_header header; + * u64 nr; + * u64 ips[nr]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_CALLCHAIN_DEFERRED =3D 22, + PERF_RECORD_MAX, /* non-ABI */ }; =20 @@ -1256,6 +1272,7 @@ enum perf_callchain_context { PERF_CONTEXT_HV =3D (__u64)-32, PERF_CONTEXT_KERNEL =3D (__u64)-128, PERF_CONTEXT_USER =3D (__u64)-512, + PERF_CONTEXT_USER_DEFERRED =3D (__u64)-640, =20 PERF_CONTEXT_GUEST =3D (__u64)-2048, PERF_CONTEXT_GUEST_KERNEL =3D (__u64)-2176, --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D48381F9F4D; Wed, 22 Jan 2025 02:32:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513150; cv=none; b=h1wkpfh1N60wPMEUK33pD/VqzroVKahUH2ZNkfnUFr8DeZYoTK0XWFTR7mpSGDMQ9w+vP69+kkkoVkirO0bTY7hEmQ8D/y0E9lyKo3m3/AqyM3ZaZpxMU1zm1pqsERWmzMZq54fopnUyp//aPDTYSQZoK2pPK23f2tV4SLgwu4Y= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513150; c=relaxed/simple; bh=/R6DJ6Sn/gOUZAok3dE7uw1a2zSKJM80ZuUXm5hUM2Y=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=s659/stg0mgMc8luiz5rUKtNZdOB1siOxmRX1TH1LJeUA9RlLH/JlxCZUfZe1+++08woH9clCq9z1GeF7EWhqathdzfJr0gTLsZ1nQ8g8aHsyI2dNrgDzOOS4X8Sr+JMX9jWGo5f929+mZlKuL0YT3f1MySJyHU5omTGiy6A5XI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=eBjdOBsB; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="eBjdOBsB" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 971AAC4CEE2; Wed, 22 Jan 2025 02:32:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513150; bh=/R6DJ6Sn/gOUZAok3dE7uw1a2zSKJM80ZuUXm5hUM2Y=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eBjdOBsBB3RAe5IdmfMCc/Fndg4k2xtiiXVSbxI+YktywVq7n8PAbinrvp7phBj3R Lh/o6NLUPK7zyFTEQehTp+8/QhZs5n47LiyT9QMIuH2yM8/XPhyWYj9DkbfkwJAtlt 7WlorgWnH5G4mfIiw81nmYOVHlbqiTw6QpjsWWZKhD2aJhMRRVpJlB7VfcSWXvvnIg E3L4cm1Jjjbr2uwrr+T2oWEs48wUfAfeFUHH69Dn25bA52P2d8bfFSlowrXRPujfNf FGaMNC+ElE1B7+SOTCjC4n4ZU4fv6LeAo3Ee14vDt/WPy0LUZ4bTsQ8GNGM6oQRyn3 Zqnhj2emsHNaQ== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 36/39] perf tools: Minimal CALLCHAIN_DEFERRED support Date: Tue, 21 Jan 2025 18:31:28 -0800 Message-ID: <416f583efd25cfab1c3e6be7fa823c516303df85.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Add a new event type for deferred callchains and a new callback for the struct perf_tool. For now it doesn't actually handle the deferred callchains but it just marks the sample if it has the PERF_CONTEXT_ USER_DEFFERED in the callchain array. At least, perf report can dump the raw data with this change. Actually this requires the next commit to enable attr.defer_callchain, but if you already have a data file, it'll show the following result. $ perf report -D ... 0x5fe0@perf.data [0x40]: event: 22 . . ... raw event: size 64 bytes . 0000: 16 00 00 00 02 00 40 00 02 00 00 00 00 00 00 00 ......@.......= .. . 0010: 00 fe ff ff ff ff ff ff 4b d3 3f 25 45 7f 00 00 ........K.?%E.= .. . 0020: 21 03 00 00 21 03 00 00 43 02 12 ab 05 00 00 00 !...!...C.....= .. . 0030: 00 00 00 00 00 00 00 00 09 00 00 00 00 00 00 00 ..............= .. 0 24344920643 0x5fe0 [0x40]: PERF_RECORD_CALLCHAIN_DEFERRED(IP, 0x2): 801= /801: 0 ... FP chain: nr:2 ..... 0: fffffffffffffe00 ..... 1: 00007f45253fd34b : unhandled! Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- tools/lib/perf/include/perf/event.h | 7 +++++++ tools/perf/util/event.c | 1 + tools/perf/util/evsel.c | 15 +++++++++++++++ tools/perf/util/machine.c | 1 + tools/perf/util/perf_event_attr_fprintf.c | 1 + tools/perf/util/sample.h | 3 ++- tools/perf/util/session.c | 17 +++++++++++++++++ tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 3 ++- 9 files changed, 47 insertions(+), 2 deletions(-) diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/p= erf/event.h index 37bb7771d914..f643a6a2b9fc 100644 --- a/tools/lib/perf/include/perf/event.h +++ b/tools/lib/perf/include/perf/event.h @@ -151,6 +151,12 @@ struct perf_record_switch { __u32 next_prev_tid; }; =20 +struct perf_record_callchain_deferred { + struct perf_event_header header; + __u64 nr; + __u64 ips[]; +}; + struct perf_record_header_attr { struct perf_event_header header; struct perf_event_attr attr; @@ -494,6 +500,7 @@ union perf_event { struct perf_record_read read; struct perf_record_throttle throttle; struct perf_record_sample sample; + struct perf_record_callchain_deferred callchain_deferred; struct perf_record_bpf_event bpf; struct perf_record_ksymbol ksymbol; struct perf_record_text_poke_event text_poke; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index aac96d5d1917..8cdec373db44 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -58,6 +58,7 @@ static const char *perf_event__names[] =3D { [PERF_RECORD_CGROUP] =3D "CGROUP", [PERF_RECORD_TEXT_POKE] =3D "TEXT_POKE", [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D "AUX_OUTPUT_HW_ID", + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D "CALLCHAIN_DEFERRED", [PERF_RECORD_HEADER_ATTR] =3D "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] =3D "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] =3D "TRACING_DATA", diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d22c5df1701e..09b9735f2fb1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2817,6 +2817,18 @@ int evsel__parse_sample(struct evsel *evsel, union p= erf_event *event, data->data_src =3D PERF_MEM_DATA_SRC_NONE; data->vcpu =3D -1; =20 + if (event->header.type =3D=3D PERF_RECORD_CALLCHAIN_DEFERRED) { + const u64 max_callchain_nr =3D UINT64_MAX / sizeof(u64); + + data->callchain =3D (struct ip_callchain *)&event->callchain_deferred.nr; + if (data->callchain->nr > max_callchain_nr) + return -EFAULT; + + if (evsel->core.attr.sample_id_all) + perf_evsel__parse_id_sample(evsel, event, data); + return 0; + } + if (event->header.type !=3D PERF_RECORD_SAMPLE) { if (!evsel->core.attr.sample_id_all) return 0; @@ -2947,6 +2959,9 @@ int evsel__parse_sample(struct evsel *evsel, union pe= rf_event *event, if (data->callchain->nr > max_callchain_nr) return -EFAULT; sz =3D data->callchain->nr * sizeof(u64); + if (evsel->core.attr.defer_callchain && data->callchain->nr >=3D 1 && + data->callchain->ips[data->callchain->nr - 1] =3D=3D PERF_CONTEXT_US= ER_DEFERRED) + data->deferred_callchain =3D true; OVERFLOW_CHECK(array, sz, max_size); array =3D (void *)array + sz; } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 27d5345d2b30..9da467886bc6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2087,6 +2087,7 @@ static int add_callchain_ip(struct thread *thread, *cpumode =3D PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: + case PERF_CONTEXT_USER_DEFERRED: *cpumode =3D PERF_RECORD_MISC_USER; break; default: diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/pe= rf_event_attr_fprintf.c index 59fbbba79697..113845b35110 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -321,6 +321,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_even= t_attr *attr, PRINT_ATTRf(inherit_thread, p_unsigned); PRINT_ATTRf(remove_on_exec, p_unsigned); PRINT_ATTRf(sigtrap, p_unsigned); + PRINT_ATTRf(defer_callchain, p_unsigned); =20 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsig= ned, false); PRINT_ATTRf(bp_type, p_unsigned); diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h index 70b2c3135555..010659dc80f8 100644 --- a/tools/perf/util/sample.h +++ b/tools/perf/util/sample.h @@ -108,7 +108,8 @@ struct perf_sample { u16 p_stage_cyc; u16 retire_lat; }; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + bool deferred_callchain; /* Has deferred user callchains */ char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 507e6cba9545..493070180279 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -714,6 +714,7 @@ static perf_event__swap_op perf_event__swap_ops[] =3D { [PERF_RECORD_CGROUP] =3D perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] =3D perf_event__text_poke_swap, [PERF_RECORD_AUX_OUTPUT_HW_ID] =3D perf_event__all64_swap, + [PERF_RECORD_CALLCHAIN_DEFERRED] =3D perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] =3D perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] =3D perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] =3D perf_event__tracing_data_swap, @@ -1107,6 +1108,19 @@ static void dump_sample(struct evsel *evsel, union p= erf_event *event, sample_read__printf(sample, evsel->core.attr.read_format); } =20 +static void dump_deferred_callchain(struct evsel *evsel, union perf_event = *event, + struct perf_sample *sample) +{ + if (!dump_trace) + return; + + printf("(IP, 0x%x): %d/%d: %#" PRIx64 "\n", + event->header.misc, sample->pid, sample->tid, sample->ip); + + if (evsel__has_callchain(evsel)) + callchain__printf(evsel, sample); +} + static void dump_read(struct evsel *evsel, union perf_event *event) { struct perf_record_read *read_event =3D &event->read; @@ -1337,6 +1351,9 @@ static int machines__deliver_event(struct machines *m= achines, return tool->text_poke(tool, event, sample, machine); case PERF_RECORD_AUX_OUTPUT_HW_ID: return tool->aux_output_hw_id(tool, event, sample, machine); + case PERF_RECORD_CALLCHAIN_DEFERRED: + dump_deferred_callchain(evsel, event, sample); + return tool->callchain_deferred(tool, event, sample, evsel, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 3b7f390f26eb..e78f16de912e 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -259,6 +259,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->read =3D process_event_sample_stub; tool->throttle =3D process_event_stub; tool->unthrottle =3D process_event_stub; + tool->callchain_deferred =3D process_event_sample_stub; tool->attr =3D process_event_synth_attr_stub; tool->event_update =3D process_event_synth_event_update_stub; tool->tracing_data =3D process_event_synth_tracing_data_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index db1c7642b0d1..9987bbde6d5e 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -42,7 +42,8 @@ enum show_feature_header { =20 struct perf_tool { event_sample sample, - read; + read, + callchain_deferred; event_op mmap, mmap2, comm, --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BDA161FA150; Wed, 22 Jan 2025 02:32:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513153; cv=none; b=Q16Jb7mOFSU01hjIwoXbYxWbFP4mu1e0rM3eUGQkumAO89YR28lCHCMWkp5VQMhEQT7vzvtb8nE4GRMeqXhp+PLvvtZUwhoJ8xiu2ZvGC0Rv1w5pD147l4vI2bGXUmWzpJ4p1Y3dbybTRoA30ITCo23PgFmuPNgmZ/yN4V8h/PY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513153; c=relaxed/simple; bh=uQkriTpbv5xsQRFM+z/ztvmuKCsCVHw581yTqgar7aM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=eoW60IEeAzSoJKC6ihVyK93ndJIJDm6eO2uDjJE9HwrkFjDNDaEX/8atsJRDm1c+pn1N1BYfVe/fkIB2WLdHgMZ+22WjRXEit2SaDtK7cV1PW8f8A1nLeHWnq9mBjcZYCCFbG56OY117ksLZgMccnPK/wCp7T//IpWbWkw9o+r4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=r0Dc8Dnd; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="r0Dc8Dnd" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 19006C4CEDF; Wed, 22 Jan 2025 02:32:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513152; bh=uQkriTpbv5xsQRFM+z/ztvmuKCsCVHw581yTqgar7aM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=r0Dc8DndFHN9H320zL9b6Ez3/+/ciLSVzzMWVeNKc9M14Zrdw+qWl7YJgPjjKuOJp rMDFUB0+omkZqhWA9qMnqEI7SQRIA9OsLh7vN5QOb7WhGYofeXcBTg9jXh4qTqYz5d 6pvPkYW+uUMmobaWhiO4Oy+3G2pk9OHJgTEWf9djeznhoVvdwBpuY89SmVAvEkv5QE eIfIesvIfOjfsLDUf/PHDZPbVXZyOJr0kiSME2qr1issMrM5BSigtUYbSDFn+KhCST zdbwCYTFK9ZFkuvUkB7tUIXepwOk4RRRf+lFfCXqLog5JV23KIj8MyOREHnGvbwYae K15p471mPnRLw== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 37/39] perf record: Enable defer_callchain for user callchains Date: Tue, 21 Jan 2025 18:31:29 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim And add the missing feature detection logic to clear the flag on old kernels. $ perf record -g -vv true ... ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) size 136 config 0 (PERF_COUNT_HW_CPU_CYCLES) { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CALLCHAIN|PERIOD read_format ID|LOST disabled 1 inherit 1 mmap 1 comm 1 freq 1 enable_on_exec 1 task 1 sample_id_all 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 defer_callchain 1 ------------------------------------------------------------ sys_perf_event_open: pid 162755 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -22 switching off deferred callchain support Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- tools/perf/util/evsel.c | 24 ++++++++++++++++++++++++ tools/perf/util/evsel.h | 1 + 2 files changed, 25 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 09b9735f2fb1..e3be3cc7632d 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -998,6 +998,14 @@ static void __evsel__config_callchain(struct evsel *ev= sel, struct record_opts *o } } =20 + if (param->record_mode =3D=3D CALLCHAIN_FP && !attr->exclude_callchain_us= er) { + /* + * Enable deferred callchains optimistically. It'll be switched + * off later if the kernel doesn't support it. + */ + attr->defer_callchain =3D 1; + } + if (function) { pr_info("Disabling user space callchains for function trace event.\n"); attr->exclude_callchain_user =3D 1; @@ -2038,6 +2046,8 @@ static int __evsel__prepare_open(struct evsel *evsel,= struct perf_cpu_map *cpus, =20 static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.defer_callchain) + evsel->core.attr.defer_callchain =3D 0; if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit= && (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) evsel->core.attr.inherit =3D 0; @@ -2244,6 +2254,15 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel) =20 /* Please add new feature detection here. */ =20 + attr.defer_callchain =3D true; + attr.sample_type =3D PERF_SAMPLE_CALLCHAIN; + if (has_attr_feature(&attr, /*flags=3D*/0)) + goto found; + perf_missing_features.defer_callchain =3D true; + pr_debug2("switching off deferred callchain support\n"); + attr.defer_callchain =3D false; + attr.sample_type =3D 0; + attr.inherit =3D true; attr.sample_type =3D PERF_SAMPLE_READ; if (has_attr_feature(&attr, /*flags=3D*/0)) @@ -2355,6 +2374,11 @@ static bool evsel__detect_missing_features(struct ev= sel *evsel) errno =3D old_errno; =20 check: + if (evsel->core.attr.defer_callchain && + evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN && + perf_missing_features.defer_callchain) + return true; + if (evsel->core.attr.inherit && (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && perf_missing_features.inherit_sample_read) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 04934a7af174..b90a970f9a30 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -206,6 +206,7 @@ struct perf_missing_features { bool read_lost; bool branch_counters; bool inherit_sample_read; + bool defer_callchain; }; =20 extern struct perf_missing_features perf_missing_features; --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2432C1F9F76; Wed, 22 Jan 2025 02:32:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513153; cv=none; b=cDTDSKRMstCGb1PUVrGOoTY6EoaQ+8WZ34exgTa9PW5pU2pKAff4setdfV+mQp3nc2K2vCvmGYfM/alpSrfKzM4eUs+SYPfFzqrZc0hQTwa20ezouAaICWa+Elt+NBRGhzZl7cDCe7cZLWvjzqA43ih49bVQiKDK+VebSkCtGA0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513153; c=relaxed/simple; bh=I0FdqbtBSpT9jRzDPFUVBvJkq51JCnN3HnuP0QPYBbY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=G5kt+oacmbb1nSrw8O61de3qKeLIk9r4ADpXbA79Ob0ejgpyP0NQGBpU/TzazzqYZK52TnF7lrjE81uYZLGDgsK5J62TrQM1Z3sEsBWOXgcULJLm8KII3J2eIZ+jc+MC+GyChd+assfAddKDu2a7/ABSn5PYYCOHL68ltPqRMS4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cwLor0qK; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cwLor0qK" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2EB4EC4CEE8; Wed, 22 Jan 2025 02:32:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513153; bh=I0FdqbtBSpT9jRzDPFUVBvJkq51JCnN3HnuP0QPYBbY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cwLor0qKCbqZByKRkHfxGMr2S5Pm2GgaRh4h7pu5lI4IChXXwI5B8uBwX+ZCxtuEp SY5pbD4wa2VGqX7Evs5mUn7gpM0F0wnnjelA7XAgY/sRDuxYGl4Q7J3JsmDI1IXRg9 yEAcnfnkNyKstKJFpqL1+XdOWUNtFuogj87l2W+ANEc3DTjsctNf1TCvvEhcJeupmg NTKBaalBYlgkjok1dBM1+wk5zZxCKMf6MUHDgOjuwVGHvu2EZKQiva3UG72EPbWPBj xMvWi7gP0Cpcq6JoeQRObnEvgyPrQ9uz5znbwf9yVcNjUTXmjkjm6W4Lwjsnfs8TzO 8KaheduT9VlHQ== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 38/39] perf script: Display PERF_RECORD_CALLCHAIN_DEFERRED Date: Tue, 21 Jan 2025 18:31:30 -0800 Message-ID: <6685fa04c9c4361d0fd394489d8010e219788cca.1737511963.git.jpoimboe@kernel.org> X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Handle the deferred callchains in the script output. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) perf 801 [000] 18.031814: DEFERRED CALLCHAIN 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- tools/perf/builtin-script.c | 89 +++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9e47905f75a6..2b9085fa18bd 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2541,6 +2541,93 @@ static int process_sample_event(const struct perf_to= ol *tool, return ret; } =20 +static int process_deferred_sample_event(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct perf_script *scr =3D container_of(tool, struct perf_script, tool); + struct perf_event_attr *attr =3D &evsel->core.attr; + struct evsel_script *es =3D evsel->priv; + unsigned int type =3D output_type(attr->type); + struct addr_location al; + FILE *fp =3D es->fp; + int ret =3D 0; + + if (output[type].fields =3D=3D 0) + return 0; + + /* Set thread to NULL to indicate addr_al and al are not initialized */ + addr_location__init(&al); + + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { + goto out_put; + } + + if (debug_mode) { + if (sample->time < last_timestamp) { + pr_err("Samples misordered, previous: %" PRIu64 + " this: %" PRIu64 "\n", last_timestamp, + sample->time); + nr_unordered++; + } + last_timestamp =3D sample->time; + goto out_put; + } + + if (filter_cpu(sample)) + goto out_put; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_err("problem processing %d event, skipping it.\n", + event->header.type); + ret =3D -1; + goto out_put; + } + + if (al.filtered) + goto out_put; + + if (!show_event(sample, evsel, al.thread, &al, NULL)) + goto out_put; + + if (evswitch__discard(&scr->evswitch, evsel)) + goto out_put; + + perf_sample__fprintf_start(scr, sample, al.thread, evsel, + PERF_RECORD_CALLCHAIN_DEFERRED, fp); + fprintf(fp, "DEFERRED CALLCHAIN"); + + if (PRINT_FIELD(IP)) { + struct callchain_cursor *cursor =3D NULL; + + if (symbol_conf.use_callchain && sample->callchain) { + cursor =3D get_tls_callchain_cursor(); + if (thread__resolve_callchain(al.thread, cursor, evsel, + sample, NULL, NULL, + scripting_max_stack)) { + pr_info("cannot resolve deferred callchains\n"); + cursor =3D NULL; + } + } + + fputc(cursor ? '\n' : ' ', fp); + sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts, + cursor, symbol_conf.bt_stop_list, fp); + } + + fprintf(fp, "\n"); + + if (verbose > 0) + fflush(fp); + +out_put: + addr_location__exit(&al); + return ret; +} + // Used when scr->per_event_dump is not set static struct evsel_script es_stdout; =20 @@ -4326,6 +4413,7 @@ int cmd_script(int argc, const char **argv) =20 perf_tool__init(&script.tool, !unsorted_dump); script.tool.sample =3D process_sample_event; + script.tool.callchain_deferred =3D process_deferred_sample_event; script.tool.mmap =3D perf_event__process_mmap; script.tool.mmap2 =3D perf_event__process_mmap2; script.tool.comm =3D perf_event__process_comm; @@ -4352,6 +4440,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; + script.tool.merge_deferred_callchains =3D false; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); --=20 2.48.1 From nobody Wed Jan 22 08:57:26 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8BADE1FA176; Wed, 22 Jan 2025 02:32:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513154; cv=none; b=Ocjh2S361MtBWKihRNHYeYPif6pOF4mMScL06hOOCndKagf3F62Lx0rC1MSBZvX5UdHboNhL5RKpS7pGp9JtSxswKvMIup/BVygOCt4+WtaKACSLYNOSQw9/CJqzV7f8BLIryYwaAeXp/C/Q5QwGohV7gfx2Vouzzd+ChxntMeo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1737513154; c=relaxed/simple; bh=JQKXYhOuSy/+p5/9b2zzzkONdT4iGz9x57qeeTXfhFs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UpFtYJDGuKt5rIJDXNUdvIPxOaCiUHzEGsPViESChfXhepZ1Acm9aAGqNA5H+jYxG9HTZESfZkaqHcjG6Wkak874HTwUN4zAbfxSPAEktrzqqCDvyadSbIiFKkEWveo04E8DPUGYw4uD/eNuCVFI1GChcexsSIdMbC3mESd34vk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SEiWo7fX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SEiWo7fX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2A573C4CEE4; Wed, 22 Jan 2025 02:32:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1737513154; bh=JQKXYhOuSy/+p5/9b2zzzkONdT4iGz9x57qeeTXfhFs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SEiWo7fXVZytGaLCZqNyJI6E8igL0xdGLM6KPTB6V9eVTJY+M5Qm5zaoTIHv0bA6c HT2pWx+S3GyBX02ZioiCYGWClhtWhmqQqMCMEvUA5KlGtlNzfJiVhU94FWKWY3Losn IEItb80tB20lJc18mg0Md3KkmDtj8EFJPDZPRREwxRyhTIG6fsi0s9Yr1LH9TYhsMT 5gosoGOdFScuCMfeg28nlQrwtjfWrBc0oqn3DBPkKQ9Mm4vBUF2O93yl6urs1DfrR2 O+Yf26Uuu6ezEje/R1du+7F0Y7WPxB/S30PpqLjwh4ITZd5N0klFEMlN83t3Q34sic WwrJFC076SnrA== From: Josh Poimboeuf To: x86@kernel.org Cc: Peter Zijlstra , Steven Rostedt , Ingo Molnar , Arnaldo Carvalho de Melo , linux-kernel@vger.kernel.org, Indu Bhagat , Mark Rutland , Alexander Shishkin , Jiri Olsa , Namhyung Kim , Ian Rogers , Adrian Hunter , linux-perf-users@vger.kernel.org, Mark Brown , linux-toolchains@vger.kernel.org, Jordan Rome , Sam James , linux-trace-kernel@vger.kernel.org, Andrii Nakryiko , Jens Remus , Mathieu Desnoyers , Florian Weimer , Andy Lutomirski , Masami Hiramatsu , Weinan Liu Subject: [PATCH v4 39/39] perf tools: Merge deferred user callchains Date: Tue, 21 Jan 2025 18:31:31 -0800 Message-ID: X-Mailer: git-send-email 2.48.1 In-Reply-To: References: Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Namhyung Kim Save samples with deferred callchains in a separate list and deliver them after merging the user callchains. If users don't want to merge they can set tool->merge_deferred_callchains to false to prevent the behavior. With previous result, now perf script will show the merged callchains. $ perf script perf 801 [000] 18.031793: 1 cycles:P: ffffffff91a14c36 __intel_pmu_enable_all.isra.0+0x56 ([kernel.kall= syms]) ffffffff91d373e9 perf_ctx_enable+0x39 ([kernel.kallsyms]) ffffffff91d36af7 event_function+0xd7 ([kernel.kallsyms]) ffffffff91d34222 remote_function+0x42 ([kernel.kallsyms]) ffffffff91c1ebe1 generic_exec_single+0x61 ([kernel.kallsyms]) ffffffff91c1edac smp_call_function_single+0xec ([kernel.kallsyms]) ffffffff91d37a9d event_function_call+0x10d ([kernel.kallsyms]) ffffffff91d33557 perf_event_for_each_child+0x37 ([kernel.kallsyms= ]) ffffffff91d47324 _perf_ioctl+0x204 ([kernel.kallsyms]) ffffffff91d47c43 perf_ioctl+0x33 ([kernel.kallsyms]) ffffffff91e2f216 __x64_sys_ioctl+0x96 ([kernel.kallsyms]) ffffffff9265f1ae do_syscall_64+0x9e ([kernel.kallsyms]) ffffffff92800130 entry_SYSCALL_64+0xb0 ([kernel.kallsyms]) 7fb5fc22034b __GI___ioctl+0x3b (/usr/lib/x86_64-linux-gnu/lib= c.so.6) ... The old output can be get using --no-merge-callchain option. Also perf report can get the user callchain entry at the end. $ perf report --no-children --percent-limit=3D0 --stdio -q -S __intel_pmu= _enable_all.isra.0 # symbol: __intel_pmu_enable_all.isra.0 0.00% perf [kernel.kallsyms] | ---__intel_pmu_enable_all.isra.0 perf_ctx_enable event_function remote_function generic_exec_single smp_call_function_single event_function_call perf_event_for_each_child _perf_ioctl perf_ioctl __x64_sys_ioctl do_syscall_64 entry_SYSCALL_64 __GI___ioctl Signed-off-by: Namhyung Kim Signed-off-by: Josh Poimboeuf --- tools/perf/Documentation/perf-script.txt | 5 ++ tools/perf/builtin-script.c | 5 +- tools/perf/util/callchain.c | 24 +++++++++ tools/perf/util/callchain.h | 3 ++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 + tools/perf/util/session.c | 63 +++++++++++++++++++++++- tools/perf/util/tool.c | 1 + tools/perf/util/tool.h | 1 + 9 files changed, 102 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Document= ation/perf-script.txt index b72866ef270b..69f018b3d199 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -518,6 +518,11 @@ include::itrace.txt[] The known limitations include exception handing such as setjmp/longjmp will have calls/returns not match. =20 +--merge-callchains:: + Enable merging deferred user callchains if available. This is the + default behavior. If you want to see separate CALLCHAIN_DEFERRED + records for some reason, use --no-merge-callchains explicitly. + :GMEXAMPLECMD: script :GMEXAMPLESUBCMD: include::guest-files.txt[] diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2b9085fa18bd..d18ada14a83a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -4032,6 +4032,7 @@ int cmd_script(int argc, const char **argv) bool header_only =3D false; bool script_started =3D false; bool unsorted_dump =3D false; + bool merge_deferred_callchains =3D true; char *rec_script_path =3D NULL; char *rep_script_path =3D NULL; struct perf_session *session; @@ -4185,6 +4186,8 @@ int cmd_script(int argc, const char **argv) "Guest code can be found in hypervisor process"), OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr, "Enable LBR callgraph stitching approach"), + OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains, + "Enable merge deferred user callchains"), OPTS_EVSWITCH(&script.evswitch), OPT_END() }; @@ -4440,7 +4443,7 @@ int cmd_script(int argc, const char **argv) script.tool.throttle =3D process_throttle_event; script.tool.unthrottle =3D process_throttle_event; script.tool.ordering_requires_timestamps =3D true; - script.tool.merge_deferred_callchains =3D false; + script.tool.merge_deferred_callchains =3D merge_deferred_callchains; session =3D perf_session__new(&data, &script.tool); if (IS_ERR(session)) return PTR_ERR(session); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 0c7564747a14..d1114491c3da 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1832,3 +1832,27 @@ int sample__for_each_callchain_node(struct thread *t= hread, struct evsel *evsel, } return 0; } + +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain) +{ + u64 nr_orig =3D sample_orig->callchain->nr - 1; + u64 nr_deferred =3D sample_callchain->callchain->nr; + struct ip_callchain *callchain; + + callchain =3D calloc(1 + nr_orig + nr_deferred, sizeof(u64)); + if (callchain =3D=3D NULL) { + sample_orig->deferred_callchain =3D false; + return -ENOMEM; + } + + callchain->nr =3D nr_orig + nr_deferred; + /* copy except for the last PERF_CONTEXT_USER_DEFERRED */ + memcpy(callchain->ips, sample_orig->callchain->ips, nr_orig * sizeof(u64)= ); + /* copy deferred use callchains */ + memcpy(&callchain->ips[nr_orig], sample_callchain->callchain->ips, + nr_deferred * sizeof(u64)); + + sample_orig->callchain =3D callchain; + return 0; +} diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 86ed9e4d04f9..89785125ed25 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -317,4 +317,7 @@ int sample__for_each_callchain_node(struct thread *thre= ad, struct evsel *evsel, struct perf_sample *sample, int max_stack, bool symbols, callchain_iter_fn cb, void *data); =20 +int sample__merge_deferred_callchain(struct perf_sample *sample_orig, + struct perf_sample *sample_callchain); + #endif /* __PERF_CALLCHAIN_H */ diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f0dd174e2deb..39a43980f6aa 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -82,6 +82,7 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_= map *cpus, evlist->ctl_fd.ack =3D -1; evlist->ctl_fd.pos =3D -1; evlist->nr_br_cntr =3D -1; + INIT_LIST_HEAD(&evlist->deferred_samples); } =20 struct evlist *evlist__new(void) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index adddb1db1ad2..f78275af1553 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -84,6 +84,7 @@ struct evlist { int pos; /* index at evlist core object to check signals */ } ctl_fd; struct event_enable_timer *eet; + struct list_head deferred_samples; }; =20 struct evsel_str_handler { diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 493070180279..e02e69ce2f77 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1266,6 +1266,56 @@ static int evlist__deliver_sample(struct evlist *evl= ist, const struct perf_tool per_thread); } =20 +struct deferred_event { + struct list_head list; + union perf_event *event; +}; + +static int evlist__deliver_deferred_samples(struct evlist *evlist, + const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct deferred_event *de, *tmp; + struct evsel *evsel; + int ret =3D 0; + + if (!tool->merge_deferred_callchains) { + evsel =3D evlist__id2evsel(evlist, sample->id); + return tool->callchain_deferred(tool, event, sample, + evsel, machine); + } + + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) { + struct perf_sample orig_sample; + + ret =3D evlist__parse_sample(evlist, de->event, &orig_sample); + if (ret < 0) { + pr_err("failed to parse original sample\n"); + break; + } + + if (sample->tid !=3D orig_sample.tid) + continue; + + evsel =3D evlist__id2evsel(evlist, orig_sample.id); + sample__merge_deferred_callchain(&orig_sample, sample); + ret =3D evlist__deliver_sample(evlist, tool, de->event, + &orig_sample, evsel, machine); + + if (orig_sample.deferred_callchain) + free(orig_sample.callchain); + + list_del(&de->list); + free(de); + + if (ret) + break; + } + return ret; +} + static int machines__deliver_event(struct machines *machines, struct evlist *evlist, union perf_event *event, @@ -1294,6 +1344,16 @@ static int machines__deliver_event(struct machines *= machines, return 0; } dump_sample(evsel, event, sample, perf_env__arch(machine->env)); + if (sample->deferred_callchain && tool->merge_deferred_callchains) { + struct deferred_event *de =3D malloc(sizeof(*de)); + + if (de =3D=3D NULL) + return -ENOMEM; + + de->event =3D event; + list_add_tail(&de->list, &evlist->deferred_samples); + return 0; + } return evlist__deliver_sample(evlist, tool, event, sample, evsel, machin= e); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -1353,7 +1413,8 @@ static int machines__deliver_event(struct machines *m= achines, return tool->aux_output_hw_id(tool, event, sample, machine); case PERF_RECORD_CALLCHAIN_DEFERRED: dump_deferred_callchain(evsel, event, sample); - return tool->callchain_deferred(tool, event, sample, evsel, machine); + return evlist__deliver_deferred_samples(evlist, tool, event, + sample, machine); default: ++evlist->stats.nr_unknown_events; return -1; diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index e78f16de912e..385043e06627 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -238,6 +238,7 @@ void perf_tool__init(struct perf_tool *tool, bool order= ed_events) tool->cgroup_events =3D false; tool->no_warn =3D false; tool->show_feat_hdr =3D SHOW_FEAT_NO_HEADER; + tool->merge_deferred_callchains =3D true; =20 tool->sample =3D process_event_sample_stub; tool->mmap =3D process_event_stub; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 9987bbde6d5e..d06580478ab1 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -87,6 +87,7 @@ struct perf_tool { bool cgroup_events; bool no_warn; bool dont_split_sample_group; + bool merge_deferred_callchains; enum show_feature_header show_feat_hdr; }; =20 --=20 2.48.1