Introduce a user space unwinder API which provides a generic way to
unwind user stacks.
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
arch/Kconfig | 7 +++
include/linux/unwind_user.h | 41 +++++++++++++++
kernel/Makefile | 1 +
kernel/unwind/Makefile | 1 +
kernel/unwind/user.c | 99 +++++++++++++++++++++++++++++++++++++
5 files changed, 149 insertions(+)
create mode 100644 include/linux/unwind_user.h
create mode 100644 kernel/unwind/Makefile
create mode 100644 kernel/unwind/user.c
diff --git a/arch/Kconfig b/arch/Kconfig
index 7a95c1052cd5..ee8ec97ea0ef 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -435,6 +435,13 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH
It uses the same command line parameters, and sysctl interface,
as the generic hardlockup detectors.
+config UNWIND_USER
+ bool
+
+config HAVE_UNWIND_USER_FP
+ bool
+ select UNWIND_USER
+
config HAVE_PERF_REGS
bool
help
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
new file mode 100644
index 000000000000..9d28db06f33f
--- /dev/null
+++ b/include/linux/unwind_user.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_USER_H
+#define _LINUX_UNWIND_USER_H
+
+#include <linux/types.h>
+
+enum unwind_user_type {
+ UNWIND_USER_TYPE_FP,
+};
+
+struct unwind_stacktrace {
+ unsigned int nr;
+ unsigned long *entries;
+};
+
+struct unwind_user_frame {
+ s32 cfa_off;
+ s32 ra_off;
+ s32 fp_off;
+ bool use_fp;
+};
+
+struct unwind_user_state {
+ unsigned long ip;
+ unsigned long sp;
+ unsigned long fp;
+ enum unwind_user_type type;
+ bool done;
+};
+
+/* Synchronous interfaces: */
+
+int unwind_user_start(struct unwind_user_state *state);
+int unwind_user_next(struct unwind_user_state *state);
+
+int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries);
+
+#define for_each_user_frame(state) \
+ for (unwind_user_start((state)); !(state)->done; unwind_user_next((state)))
+
+#endif /* _LINUX_UNWIND_USER_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 87866b037fbe..6cb4b0e02a34 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -50,6 +50,7 @@ obj-y += rcu/
obj-y += livepatch/
obj-y += dma/
obj-y += entry/
+obj-y += unwind/
obj-$(CONFIG_MODULES) += module/
obj-$(CONFIG_KCMP) += kcmp.o
diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile
new file mode 100644
index 000000000000..349ce3677526
--- /dev/null
+++ b/kernel/unwind/Makefile
@@ -0,0 +1 @@
+ obj-$(CONFIG_UNWIND_USER) += user.o
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
new file mode 100644
index 000000000000..54b989810a0e
--- /dev/null
+++ b/kernel/unwind/user.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+* Generic interfaces for unwinding user space
+*
+* Copyright (C) 2024 Josh Poimboeuf <jpoimboe@kernel.org>
+*/
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/unwind_user.h>
+#include <linux/uaccess.h>
+#include <asm/unwind_user.h>
+
+static struct unwind_user_frame fp_frame = {
+ ARCH_INIT_USER_FP_FRAME
+};
+
+int unwind_user_next(struct unwind_user_state *state)
+{
+ struct unwind_user_frame _frame;
+ struct unwind_user_frame *frame = &_frame;
+ unsigned long prev_ip, cfa, fp, ra = 0;
+
+ if (state->done)
+ return -EINVAL;
+
+ prev_ip = state->ip;
+
+ switch (state->type) {
+ case UNWIND_USER_TYPE_FP:
+ frame = &fp_frame;
+ break;
+ default:
+ BUG();
+ }
+
+ cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off;
+
+ if (frame->ra_off && get_user(ra, (unsigned long __user *)(cfa + frame->ra_off)))
+ goto the_end;
+
+ if (ra == prev_ip)
+ goto the_end;
+
+ if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
+ goto the_end;
+
+ state->sp = cfa;
+ state->ip = ra;
+ if (frame->fp_off)
+ state->fp = fp;
+
+ return 0;
+
+the_end:
+ state->done = true;
+ return -EINVAL;
+}
+
+int unwind_user_start(struct unwind_user_state *state)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ memset(state, 0, sizeof(*state));
+
+ if (!current->mm) {
+ state->done = true;
+ return -EINVAL;
+ }
+
+ state->type = UNWIND_USER_TYPE_FP;
+
+ state->sp = user_stack_pointer(regs);
+ state->ip = instruction_pointer(regs);
+ state->fp = frame_pointer(regs);
+
+ return 0;
+}
+
+int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries)
+{
+ struct unwind_user_state state;
+
+ trace->nr = 0;
+
+ if (!max_entries)
+ return -EINVAL;
+
+ if (!current->mm)
+ return 0;
+
+ for_each_user_frame(&state) {
+ trace->entries[trace->nr++] = state.ip;
+ if (trace->nr >= max_entries)
+ break;
+ }
+
+ return 0;
+}
--
2.47.0
On 28.10.2024 22:47, Josh Poimboeuf wrote:
> Introduce a user space unwinder API which provides a generic way to
> unwind user stacks.
...
> diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
...
> +int unwind_user_next(struct unwind_user_state *state)
> +{
> + struct unwind_user_frame _frame;
> + struct unwind_user_frame *frame = &_frame;
> + unsigned long prev_ip, cfa, fp, ra = 0;
> +
> + if (state->done)
> + return -EINVAL;
> +
> + prev_ip = state->ip;
> +
> + switch (state->type) {
> + case UNWIND_USER_TYPE_FP:
> + frame = &fp_frame;
> + break;
> + default:
> + BUG();
> + }
> +
> + cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off;
> +
> + if (frame->ra_off && get_user(ra, (unsigned long __user *)(cfa + frame->ra_off)))
> + goto the_end;
> +
> + if (ra == prev_ip)
> + goto the_end;
This seems too restrictive to me, as it effectively prevents
unwinding from recursive functions, e.g. Glibc internal merge sort
msort_with_tmp():
$ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump
$ perf script
...
objdump 8314 236064.515562: 100010 task-clock:ppp:
100630a compare_symbols+0x2a (/usr/bin/objdump)
3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6)
3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6)
[unwinding unexpectedly stops]
Would it be an option to only stop unwinding if both the IP and SP do
not change?
if (sp == prev_sp && ra == prev_ra)
gote the_end;
> +
> + if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
> + goto the_end;
> +
> + state->sp = cfa;
> + state->ip = ra;
> + if (frame->fp_off)
> + state->fp = fp;
> +
> + return 0;
> +
> +the_end:
> + state->done = true;
> + return -EINVAL;
> +}
...
Thanks and regards,
Jens
--
Jens Remus
Linux on Z Development (D3303) and z/VSE Support
+49-7031-16-1128 Office
jremus@de.ibm.com
IBM
IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/
On Fri, Dec 06, 2024 at 11:29:21AM +0100, Jens Remus wrote: > On 28.10.2024 22:47, Josh Poimboeuf wrote: > > + if (ra == prev_ip) > > + goto the_end; > > This seems too restrictive to me, as it effectively prevents > unwinding from recursive functions, e.g. Glibc internal merge sort > msort_with_tmp(): > > $ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump > $ perf script > ... > objdump 8314 236064.515562: 100010 task-clock:ppp: > 100630a compare_symbols+0x2a (/usr/bin/objdump) > 3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6) > 3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6) > [unwinding unexpectedly stops] > > Would it be an option to only stop unwinding if both the IP and SP do > not change? > > if (sp == prev_sp && ra == prev_ra) > gote the_end; Good point, I've already fixed that for the next version (not yet posted). I believe the only thing we really need to check here is that the unwind is heading in the right direction: if (cfa <= state->sp) goto the_end; -- Josh
On 09.12.2024 21:54, Josh Poimboeuf wrote: > On Fri, Dec 06, 2024 at 11:29:21AM +0100, Jens Remus wrote: >> On 28.10.2024 22:47, Josh Poimboeuf wrote: >>> + if (ra == prev_ip) >>> + goto the_end; >> >> This seems too restrictive to me, as it effectively prevents >> unwinding from recursive functions, e.g. Glibc internal merge sort >> msort_with_tmp(): >> >> $ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump >> $ perf script >> ... >> objdump 8314 236064.515562: 100010 task-clock:ppp: >> 100630a compare_symbols+0x2a (/usr/bin/objdump) >> 3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6) >> 3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6) >> [unwinding unexpectedly stops] >> >> Would it be an option to only stop unwinding if both the IP and SP do >> not change? >> >> if (sp == prev_sp && ra == prev_ra) >> gote the_end; > > Good point, I've already fixed that for the next version (not yet > posted). I believe the only thing we really need to check here is that > the unwind is heading in the right direction: > > if (cfa <= state->sp) > goto the_end; Assuming the x86 definition of the CFA (CFA == SP at call site) this translates into: if (sp <= state->sp) goto the_end; That won't work for architectures that pass the return address in a register instead of on the stack, such as s390. At least in the topmost frame the unwound SP may be unchanged. For instance when in the function prologue or when in a leaf function. One of my patches for s390 support introduces a state->first flag, indicating whether it is the topmost user space frame. Using that your check could be extended to: if ((state->first && sp < state->sp) || (!state->first && sp <= state->sp)) goto the_end; Which could be simplified to: if (sp <= state->sp - state->first) goto the_end; Btw. neither would work for architectures with an upwards-growing stack, such as hppa. Not sure if that needs to be considered. Regards, Jens -- Jens Remus Linux on Z Development (D3303) and z/VSE Support +49-7031-16-1128 Office jremus@de.ibm.com IBM IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294 IBM Data Privacy Statement: https://www.ibm.com/privacy/
On Wed, Dec 11, 2024 at 03:53:26PM +0100, Jens Remus wrote: > On 09.12.2024 21:54, Josh Poimboeuf wrote: > > if (cfa <= state->sp) > > goto the_end; > > Assuming the x86 definition of the CFA (CFA == SP at call site) this > translates into: > > if (sp <= state->sp) > goto the_end; > > That won't work for architectures that pass the return address in a > register instead of on the stack, such as s390. At least in the > topmost frame the unwound SP may be unchanged. For instance when in > the function prologue or when in a leaf function. > > One of my patches for s390 support introduces a state->first flag, > indicating whether it is the topmost user space frame. Using that > your check could be extended to: > > if ((state->first && sp < state->sp) || (!state->first && sp <= state->sp)) > goto the_end; > > Which could be simplified to: > > if (sp <= state->sp - state->first) > goto the_end; Since my patches are x86-only, how about I leave the "sp <= state->sp" check and then you add something like that in your patches on top? > Btw. neither would work for architectures with an upwards-growing > stack, such as hppa. Not sure if that needs to be considered. I don't think that's needed until if/when sframe becomes supported for such an arch. -- Josh
© 2016 - 2026 Red Hat, Inc.