From: Josh Poimboeuf <jpoimboe@kernel.org>
Add optional support for user space frame pointer unwinding. If
supported, the arch needs to enable CONFIG_HAVE_UNWIND_USER_FP and
define ARCH_INIT_USER_FP_FRAME.
By encoding the frame offsets in struct unwind_user_frame, much of this
code can also be reused for future unwinder implementations like sframe.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Co-developed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
arch/Kconfig | 4 +++
include/asm-generic/Kbuild | 1 +
include/asm-generic/unwind_user.h | 5 +++
include/linux/unwind_user.h | 5 +++
include/linux/unwind_user_types.h | 1 +
kernel/unwind/user.c | 51 +++++++++++++++++++++++++++++--
6 files changed, 65 insertions(+), 2 deletions(-)
create mode 100644 include/asm-generic/unwind_user.h
diff --git a/arch/Kconfig b/arch/Kconfig
index ea59e5d7cc69..8e3fd723bd74 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -438,6 +438,10 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH
config UNWIND_USER
bool
+config HAVE_UNWIND_USER_FP
+ bool
+ select UNWIND_USER
+
config HAVE_PERF_REGS
bool
help
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 8675b7b4ad23..295c94a3ccc1 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -59,6 +59,7 @@ mandatory-y += tlbflush.h
mandatory-y += topology.h
mandatory-y += trace_clock.h
mandatory-y += uaccess.h
+mandatory-y += unwind_user.h
mandatory-y += vermagic.h
mandatory-y += vga.h
mandatory-y += video.h
diff --git a/include/asm-generic/unwind_user.h b/include/asm-generic/unwind_user.h
new file mode 100644
index 000000000000..b8882b909944
--- /dev/null
+++ b/include/asm-generic/unwind_user.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_UNWIND_USER_H
+#define _ASM_GENERIC_UNWIND_USER_H
+
+#endif /* _ASM_GENERIC_UNWIND_USER_H */
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
index aa7923c1384f..a405111c41b0 100644
--- a/include/linux/unwind_user.h
+++ b/include/linux/unwind_user.h
@@ -3,6 +3,11 @@
#define _LINUX_UNWIND_USER_H
#include <linux/unwind_user_types.h>
+#include <asm/unwind_user.h>
+
+#ifndef ARCH_INIT_USER_FP_FRAME
+ #define ARCH_INIT_USER_FP_FRAME
+#endif
int unwind_user_start(struct unwind_user_state *state);
int unwind_user_next(struct unwind_user_state *state);
diff --git a/include/linux/unwind_user_types.h b/include/linux/unwind_user_types.h
index 6ed1b4ae74e1..65bd070eb6b0 100644
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -6,6 +6,7 @@
enum unwind_user_type {
UNWIND_USER_TYPE_NONE,
+ UNWIND_USER_TYPE_FP,
};
struct unwind_stacktrace {
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
index d30449328981..1201d655654a 100644
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -6,10 +6,54 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
#include <linux/unwind_user.h>
+#include <linux/uaccess.h>
+
+static struct unwind_user_frame fp_frame = {
+ ARCH_INIT_USER_FP_FRAME
+};
+
+static inline bool fp_state(struct unwind_user_state *state)
+{
+ return IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP) &&
+ state->type == UNWIND_USER_TYPE_FP;
+}
int unwind_user_next(struct unwind_user_state *state)
{
- /* no implementation yet */
+ struct unwind_user_frame *frame;
+ unsigned long cfa = 0, fp, ra = 0;
+
+ if (state->done)
+ return -EINVAL;
+
+ if (fp_state(state))
+ frame = &fp_frame;
+ else
+ goto done;
+
+ /* Get the Canonical Frame Address (CFA) */
+ cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off;
+
+ /* stack going in wrong direction? */
+ if (cfa <= state->sp)
+ goto done;
+
+ /* Find the Return Address (RA) */
+ if (get_user(ra, (unsigned long *)(cfa + frame->ra_off)))
+ goto done;
+
+ if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
+ goto done;
+
+ state->ip = ra;
+ state->sp = cfa;
+ if (frame->fp_off)
+ state->fp = fp;
+
+ return 0;
+
+done:
+ state->done = true;
return -EINVAL;
}
@@ -24,7 +68,10 @@ int unwind_user_start(struct unwind_user_state *state)
return -EINVAL;
}
- state->type = UNWIND_USER_TYPE_NONE;
+ if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP))
+ state->type = UNWIND_USER_TYPE_FP;
+ else
+ state->type = UNWIND_USER_TYPE_NONE;
state->ip = instruction_pointer(regs);
state->sp = user_stack_pointer(regs);
--
2.47.2
On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: > > + /* stack going in wrong direction? */ > + if (cfa <= state->sp) > + goto done; I suspect this should do a lot more testing. > + /* Find the Return Address (RA) */ > + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) > + goto done; > + > + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) > + goto done; .. and this should check the frame for validity too. At a minimum it should be properly aligned, but things like "it had better be above the current frame" to avoid having some loop would seem to be a good idea. Maybe even check that it's the same vma? Linus
* Linus Torvalds: > On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: >> >> + /* stack going in wrong direction? */ >> + if (cfa <= state->sp) >> + goto done; > > I suspect this should do a lot more testing. > >> + /* Find the Return Address (RA) */ >> + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) >> + goto done; >> + >> + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) >> + goto done; > > .. and this should check the frame for validity too. At a minimum it > should be properly aligned, but things like "it had better be above > the current frame" to avoid having some loop would seem to be a good > idea. I don't think SFrame as-is requires stacks to be contiguous. Maybe there could be a per-frame flag that indicates whether a stack switch is expected? Thanks, Florian
On Tue, 01 Jul 2025 06:46:14 +0200 Florian Weimer <fweimer@redhat.com> wrote: > * Linus Torvalds: > > > On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: > >> > >> + /* stack going in wrong direction? */ > >> + if (cfa <= state->sp) > >> + goto done; > > > > I suspect this should do a lot more testing. > > > >> + /* Find the Return Address (RA) */ > >> + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) > >> + goto done; > >> + > >> + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) > >> + goto done; > > > > .. and this should check the frame for validity too. At a minimum it > > should be properly aligned, but things like "it had better be above > > the current frame" to avoid having some loop would seem to be a good > > idea. > > I don't think SFrame as-is requires stacks to be contiguous. Maybe > there could be a per-frame flag that indicates whether a stack switch is > expected? Looking at the current code of perf, it appears to only check that the address is valid to read from user space. Perhaps that's the only check needed here too? Now this loop will not go into an infinite loop as the code has: for_each_user_frame(&state) { trace->entries[trace->nr++] = state.ip; if (trace->nr >= max_entries) break; } Where #define for_each_user_frame(state) \ for (unwind_user_start((state)); !(state)->done; unwind_user_next((state))) It will stop at "max_entries" even if the user space tries to make it go forever. max_entries is either 511 (on 64 bit) or 1023 (on 32 bit), as it is defined as: /* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \ ((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long)) Now, perhaps we need to verify that the cfa is indeed canonical, but what other test do we have perform? In the future, this code will also be handling JIT and possibly interpreted code. How much do we really want to limit the stack walking due to checks? -- Steve
On Mon, 30 Jun 2025 19:10:09 -0700 Linus Torvalds <torvalds@linux-foundation.org> wrote: > On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: > > > > + /* stack going in wrong direction? */ > > + if (cfa <= state->sp) > > + goto done; > > I suspect this should do a lot more testing. Sure. > > > + /* Find the Return Address (RA) */ > > + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) > > + goto done; > > + > > + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) > > + goto done; > > .. and this should check the frame for validity too. At a minimum it > should be properly aligned, but things like "it had better be above > the current frame" to avoid having some loop would seem to be a good > idea. Makes sense. > > Maybe even check that it's the same vma? Hmm, I call on to Jens Remus and ask if s390 can do anything whacky here? Where something that isn't allowed on other architectures may be allowed there? I know s390 has some strange type of stack usage. -- Steve
On 01.07.2025 04:56, Steven Rostedt wrote: > On Mon, 30 Jun 2025 19:10:09 -0700 > Linus Torvalds <torvalds@linux-foundation.org> wrote: >> On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: >>> >>> + /* stack going in wrong direction? */ >>> + if (cfa <= state->sp) >>> + goto done; >> >> I suspect this should do a lot more testing. > > Sure. The above assumes: curr_frame_sp = state->sp prev_frame_sp = cfa // for arches that define CFA as SP at call site On s390 the prev_frame_sp may be equal to curr_frame_sp for the topmost frame, as long as the topmost function did not allocate any stack. For instance when early in the prologue or when in a leaf function that does not require any stack space. My s390 sframe support patches would therefore currently change above check to: /* stack going in wrong direction? */ if (sp <= state->sp - topmost) goto done; I assume that should be the case for all architectures whose function call instruction does not modify the SP, unlike x86-64's CALL does. >>> + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) >>> + goto done; >> >> .. and this should check the frame for validity too. At a minimum it >> should be properly aligned, but things like "it had better be above >> the current frame" to avoid having some loop would seem to be a good >> idea. > > Makes sense. On s390 the FP (register) value does not necessarily need to be above the SP value, as the s390x ELF ABI does only designate a "preferred" FP register, so that the FP register may be used for other purposes, when a FP is not required in a function. So the output FP value cannot be checked on s390. But the input FP value could probably be checked as follows before computing the CFA: if (frame->use_fp && state->fp < state->sp) goto done; /* Get the Canonical Frame Address (CFA) */ cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off; >> Maybe even check that it's the same vma? > > Hmm, I call on to Jens Remus and ask if s390 can do anything whacky here? > Where something that isn't allowed on other architectures may be allowed > there? I know s390 has some strange type of stack usage. On s390, if a FP is required for dynamic stack allocation, it is only initialized as late as possible, that is usually after static stack allocation. Therefore SP == FP is rather seldom the case. Regards, Jens -- Jens Remus Linux on Z Development (D3303) +49-7031-16-1128 Office jremus@de.ibm.com IBM IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294 IBM Data Privacy Statement: https://www.ibm.com/privacy/
On Tue, 1 Jul 2025 17:36:55 +0200 Jens Remus <jremus@linux.ibm.com> wrote: > On s390 the prev_frame_sp may be equal to curr_frame_sp for the topmost > frame, as long as the topmost function did not allocate any stack. For > instance when early in the prologue or when in a leaf function that does > not require any stack space. My s390 sframe support patches would > therefore currently change above check to: > > /* stack going in wrong direction? */ > if (sp <= state->sp - topmost) > goto done; How do you calculate "topmost" then? Is it another field you add to "state"? -- Steve
On 03.07.2025 01:50, Steven Rostedt wrote: > On Tue, 1 Jul 2025 17:36:55 +0200 > Jens Remus <jremus@linux.ibm.com> wrote: > >> On s390 the prev_frame_sp may be equal to curr_frame_sp for the topmost >> frame, as long as the topmost function did not allocate any stack. For >> instance when early in the prologue or when in a leaf function that does >> not require any stack space. My s390 sframe support patches would >> therefore currently change above check to: >> >> /* stack going in wrong direction? */ >> if (sp <= state->sp - topmost) >> goto done; > > How do you calculate "topmost" then? > > Is it another field you add to "state"? Correct. It is a boolean set to true in unwind_user_start() and set to false in unwind_user_next() when updating the state. I assume most architectures need above change, as their SP at function entry should be equal to the SP at call site (unlike x86-64 due to CALL). s390 also needs this information to allow restoring of FP/RA saved in other registers (instead of on the stack) only for the topmost frame. For any other frame arbitrary register contents would not be available, as user unwind only unwinds SP, FP, and RA. I would post my s390 sframe support patches as RFC once you have provided a merged sframe branch as discussed in: https://lore.kernel.org/all/20250702124737.565934b5@batman.local.home/ Regards, Jens -- Jens Remus Linux on Z Development (D3303) +49-7031-16-1128 Office jremus@de.ibm.com IBM IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294 IBM Data Privacy Statement: https://www.ibm.com/privacy/
On Thu, 3 Jul 2025 18:21:10 +0200 Jens Remus <jremus@linux.ibm.com> wrote: > >> /* stack going in wrong direction? */ > >> if (sp <= state->sp - topmost) > >> goto done; > > > > How do you calculate "topmost" then? > > > > Is it another field you add to "state"? > > Correct. It is a boolean set to true in unwind_user_start() and set to > false in unwind_user_next() when updating the state. So it's subtracting 1 or zero? So that the topmost can be equal. Well, that would need a bit of commenting. > > I assume most architectures need above change, as their SP at function > entry should be equal to the SP at call site (unlike x86-64 due to CALL). > > s390 also needs this information to allow restoring of FP/RA saved in > other registers (instead of on the stack) only for the topmost frame. > For any other frame arbitrary register contents would not be available, > as user unwind only unwinds SP, FP, and RA. > > I would post my s390 sframe support patches as RFC once you have > provided a merged sframe branch as discussed in: > https://lore.kernel.org/all/20250702124737.565934b5@batman.local.home/ I did have a merge branch on my repo. But I was hoping to see your code so that I can add this to this patch before having to post again. But now I'm posting without this change, as I don't want to screw it up. I think I know what it it looks like, but it would be better to see what you did to make sure what I envision is correct. Oh well. I'll post v13 without it. -- Steve
On Mon, 30 Jun 2025 22:56:03 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Mon, 30 Jun 2025 19:10:09 -0700 > Linus Torvalds <torvalds@linux-foundation.org> wrote: > > > On Mon, 30 Jun 2025 at 17:54, Steven Rostedt <rostedt@goodmis.org> wrote: > > > > > > + /* stack going in wrong direction? */ > > > + if (cfa <= state->sp) > > > + goto done; > > > > I suspect this should do a lot more testing. > > Sure. Adding Kees too. Kees, I'd like to get some security eyes on this code to take a look at it. As it is making decisions on input from user space, I'd like to have more security folks looking at this to make sure that some malicious task can't set up its stack in such a way that it can exploit something here. The parsing of the sframe code (latest version net yet posted) will need a similar audit. Thanks, -- Steve > > > > > > + /* Find the Return Address (RA) */ > > > + if (get_user(ra, (unsigned long *)(cfa + frame->ra_off))) > > > + goto done; > > > + > > > + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off))) > > > + goto done; > > > > .. and this should check the frame for validity too. At a minimum it > > should be properly aligned, but things like "it had better be above > > the current frame" to avoid having some loop would seem to be a good > > idea. > > Makes sense. > > > > > Maybe even check that it's the same vma? > > Hmm, I call on to Jens Remus and ask if s390 can do anything whacky here? > Where something that isn't allowed on other architectures may be allowed > there? I know s390 has some strange type of stack usage. > > -- Steve
© 2016 - 2025 Red Hat, Inc.