[PATCH v3 07/19] unwind: Add user space unwinding API

Josh Poimboeuf posted 19 patches 1 year, 3 months ago
There is a newer version of this series
[PATCH v3 07/19] unwind: Add user space unwinding API
Posted by Josh Poimboeuf 1 year, 3 months ago
Introduce a user space unwinder API which provides a generic way to
unwind user stacks.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
---
 arch/Kconfig                |  7 +++
 include/linux/unwind_user.h | 41 +++++++++++++++
 kernel/Makefile             |  1 +
 kernel/unwind/Makefile      |  1 +
 kernel/unwind/user.c        | 99 +++++++++++++++++++++++++++++++++++++
 5 files changed, 149 insertions(+)
 create mode 100644 include/linux/unwind_user.h
 create mode 100644 kernel/unwind/Makefile
 create mode 100644 kernel/unwind/user.c

diff --git a/arch/Kconfig b/arch/Kconfig
index 7a95c1052cd5..ee8ec97ea0ef 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -435,6 +435,13 @@ config HAVE_HARDLOCKUP_DETECTOR_ARCH
 	  It uses the same command line parameters, and sysctl interface,
 	  as the generic hardlockup detectors.
 
+config UNWIND_USER
+	bool
+
+config HAVE_UNWIND_USER_FP
+	bool
+	select UNWIND_USER
+
 config HAVE_PERF_REGS
 	bool
 	help
diff --git a/include/linux/unwind_user.h b/include/linux/unwind_user.h
new file mode 100644
index 000000000000..9d28db06f33f
--- /dev/null
+++ b/include/linux/unwind_user.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNWIND_USER_H
+#define _LINUX_UNWIND_USER_H
+
+#include <linux/types.h>
+
+enum unwind_user_type {
+	UNWIND_USER_TYPE_FP,
+};
+
+struct unwind_stacktrace {
+	unsigned int	nr;
+	unsigned long	*entries;
+};
+
+struct unwind_user_frame {
+	s32 cfa_off;
+	s32 ra_off;
+	s32 fp_off;
+	bool use_fp;
+};
+
+struct unwind_user_state {
+	unsigned long ip;
+	unsigned long sp;
+	unsigned long fp;
+	enum unwind_user_type type;
+	bool done;
+};
+
+/* Synchronous interfaces: */
+
+int unwind_user_start(struct unwind_user_state *state);
+int unwind_user_next(struct unwind_user_state *state);
+
+int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries);
+
+#define for_each_user_frame(state) \
+	for (unwind_user_start((state)); !(state)->done; unwind_user_next((state)))
+
+#endif /* _LINUX_UNWIND_USER_H */
diff --git a/kernel/Makefile b/kernel/Makefile
index 87866b037fbe..6cb4b0e02a34 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -50,6 +50,7 @@ obj-y += rcu/
 obj-y += livepatch/
 obj-y += dma/
 obj-y += entry/
+obj-y += unwind/
 obj-$(CONFIG_MODULES) += module/
 
 obj-$(CONFIG_KCMP) += kcmp.o
diff --git a/kernel/unwind/Makefile b/kernel/unwind/Makefile
new file mode 100644
index 000000000000..349ce3677526
--- /dev/null
+++ b/kernel/unwind/Makefile
@@ -0,0 +1 @@
+ obj-$(CONFIG_UNWIND_USER) += user.o
diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c
new file mode 100644
index 000000000000..54b989810a0e
--- /dev/null
+++ b/kernel/unwind/user.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+* Generic interfaces for unwinding user space
+*
+* Copyright (C) 2024 Josh Poimboeuf <jpoimboe@kernel.org>
+*/
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/sched/task_stack.h>
+#include <linux/unwind_user.h>
+#include <linux/uaccess.h>
+#include <asm/unwind_user.h>
+
+static struct unwind_user_frame fp_frame = {
+	ARCH_INIT_USER_FP_FRAME
+};
+
+int unwind_user_next(struct unwind_user_state *state)
+{
+	struct unwind_user_frame _frame;
+	struct unwind_user_frame *frame = &_frame;
+	unsigned long prev_ip, cfa, fp, ra = 0;
+
+	if (state->done)
+		return -EINVAL;
+
+	prev_ip = state->ip;
+
+	switch (state->type) {
+	case UNWIND_USER_TYPE_FP:
+		frame = &fp_frame;
+		break;
+	default:
+		BUG();
+	}
+
+	cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off;
+
+	if (frame->ra_off && get_user(ra, (unsigned long __user *)(cfa + frame->ra_off)))
+		goto the_end;
+
+	if (ra == prev_ip)
+		goto the_end;
+
+	if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
+		goto the_end;
+
+	state->sp = cfa;
+	state->ip = ra;
+	if (frame->fp_off)
+		state->fp = fp;
+
+	return 0;
+
+the_end:
+	state->done = true;
+	return -EINVAL;
+}
+
+int unwind_user_start(struct unwind_user_state *state)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	memset(state, 0, sizeof(*state));
+
+	if (!current->mm) {
+		state->done = true;
+		return -EINVAL;
+	}
+
+	state->type = UNWIND_USER_TYPE_FP;
+
+	state->sp = user_stack_pointer(regs);
+	state->ip = instruction_pointer(regs);
+	state->fp = frame_pointer(regs);
+
+	return 0;
+}
+
+int unwind_user(struct unwind_stacktrace *trace, unsigned int max_entries)
+{
+	struct unwind_user_state state;
+
+	trace->nr = 0;
+
+	if (!max_entries)
+		return -EINVAL;
+
+	if (!current->mm)
+		return 0;
+
+	for_each_user_frame(&state) {
+		trace->entries[trace->nr++] = state.ip;
+		if (trace->nr >= max_entries)
+			break;
+	}
+
+	return 0;
+}
-- 
2.47.0
Re: [PATCH v3 07/19] unwind: Add user space unwinding API
Posted by Jens Remus 1 year, 2 months ago
On 28.10.2024 22:47, Josh Poimboeuf wrote:
> Introduce a user space unwinder API which provides a generic way to
> unwind user stacks.

...

> diff --git a/kernel/unwind/user.c b/kernel/unwind/user.c

...

> +int unwind_user_next(struct unwind_user_state *state)
> +{
> +	struct unwind_user_frame _frame;
> +	struct unwind_user_frame *frame = &_frame;
> +	unsigned long prev_ip, cfa, fp, ra = 0;
> +
> +	if (state->done)
> +		return -EINVAL;
> +
> +	prev_ip = state->ip;
> +
> +	switch (state->type) {
> +	case UNWIND_USER_TYPE_FP:
> +		frame = &fp_frame;
> +		break;
> +	default:
> +		BUG();
> +	}
> +
> +	cfa = (frame->use_fp ? state->fp : state->sp) + frame->cfa_off;
> +
> +	if (frame->ra_off && get_user(ra, (unsigned long __user *)(cfa + frame->ra_off)))
> +		goto the_end;
> +
> +	if (ra == prev_ip)
> +		goto the_end;

This seems too restrictive to me, as it effectively prevents
unwinding from recursive functions, e.g. Glibc internal merge sort
msort_with_tmp():

$ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump
$ perf script
...
objdump    8314 236064.515562:     100010 task-clock:ppp:
                  100630a compare_symbols+0x2a (/usr/bin/objdump)
              3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6)
              3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6)
[unwinding unexpectedly stops]

Would it be an option to only stop unwinding if both the IP and SP do
not change?

if (sp == prev_sp && ra == prev_ra)
	gote the_end;

> +
> +	if (frame->fp_off && get_user(fp, (unsigned long __user *)(cfa + frame->fp_off)))
> +		goto the_end;
> +
> +	state->sp = cfa;
> +	state->ip = ra;
> +	if (frame->fp_off)
> +		state->fp = fp;
> +
> +	return 0;
> +
> +the_end:
> +	state->done = true;
> +	return -EINVAL;
> +}

...

Thanks and regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303) and z/VSE Support
+49-7031-16-1128 Office
jremus@de.ibm.com

IBM

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/

Re: [PATCH v3 07/19] unwind: Add user space unwinding API
Posted by Josh Poimboeuf 1 year, 2 months ago
On Fri, Dec 06, 2024 at 11:29:21AM +0100, Jens Remus wrote:
> On 28.10.2024 22:47, Josh Poimboeuf wrote:
> > +	if (ra == prev_ip)
> > +		goto the_end;
> 
> This seems too restrictive to me, as it effectively prevents
> unwinding from recursive functions, e.g. Glibc internal merge sort
> msort_with_tmp():
> 
> $ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump
> $ perf script
> ...
> objdump    8314 236064.515562:     100010 task-clock:ppp:
>                  100630a compare_symbols+0x2a (/usr/bin/objdump)
>              3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6)
>              3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6)
> [unwinding unexpectedly stops]
> 
> Would it be an option to only stop unwinding if both the IP and SP do
> not change?
> 
> if (sp == prev_sp && ra == prev_ra)
> 	gote the_end;

Good point, I've already fixed that for the next version (not yet
posted).  I believe the only thing we really need to check here is that
the unwind is heading in the right direction:

if (cfa <= state->sp)
	goto the_end;

-- 
Josh
Re: [PATCH v3 07/19] unwind: Add user space unwinding API
Posted by Jens Remus 1 year, 1 month ago
On 09.12.2024 21:54, Josh Poimboeuf wrote:
> On Fri, Dec 06, 2024 at 11:29:21AM +0100, Jens Remus wrote:
>> On 28.10.2024 22:47, Josh Poimboeuf wrote:
>>> +	if (ra == prev_ip)
>>> +		goto the_end;
>>
>> This seems too restrictive to me, as it effectively prevents
>> unwinding from recursive functions, e.g. Glibc internal merge sort
>> msort_with_tmp():
>>
>> $ perf record -F 9999 --call-graph fp /usr/bin/objdump -wdWF /usr/bin/objdump
>> $ perf script
>> ...
>> objdump    8314 236064.515562:     100010 task-clock:ppp:
>>                   100630a compare_symbols+0x2a (/usr/bin/objdump)
>>               3ffb9e58e7c msort_with_tmp.part.0+0x15c (/usr/lib64/libc.so.6)
>>               3ffb9e58d76 msort_with_tmp.part.0+0x56 (/usr/lib64/libc.so.6)
>> [unwinding unexpectedly stops]
>>
>> Would it be an option to only stop unwinding if both the IP and SP do
>> not change?
>>
>> if (sp == prev_sp && ra == prev_ra)
>> 	gote the_end;
> 
> Good point, I've already fixed that for the next version (not yet
> posted).  I believe the only thing we really need to check here is that
> the unwind is heading in the right direction:
> 
> if (cfa <= state->sp)
> 	goto the_end;

Assuming the x86 definition of the CFA (CFA == SP at call site) this
translates into:

if (sp <= state->sp)
	goto the_end;

That won't work for architectures that pass the return address in a
register instead of on the stack, such as s390. At least in the
topmost frame the unwound SP may be unchanged. For instance when in
the function prologue or when in a leaf function.

One of my patches for s390 support introduces a state->first flag,
indicating whether it is the topmost user space frame. Using that
your check could be extended to:

if ((state->first && sp < state->sp) || (!state->first && sp <= state->sp))
	goto the_end;

Which could be simplified to:

if (sp <= state->sp - state->first)
	goto the_end;

Btw. neither would work for architectures with an upwards-growing
stack, such as hppa. Not sure if that needs to be considered.

Regards,
Jens
-- 
Jens Remus
Linux on Z Development (D3303) and z/VSE Support
+49-7031-16-1128 Office
jremus@de.ibm.com

IBM

IBM Deutschland Research & Development GmbH; Vorsitzender des Aufsichtsrats: Wolfgang Wendt; Geschäftsführung: David Faller; Sitz der Gesellschaft: Böblingen; Registergericht: Amtsgericht Stuttgart, HRB 243294
IBM Data Privacy Statement: https://www.ibm.com/privacy/

Re: [PATCH v3 07/19] unwind: Add user space unwinding API
Posted by Josh Poimboeuf 1 year, 1 month ago
On Wed, Dec 11, 2024 at 03:53:26PM +0100, Jens Remus wrote:
> On 09.12.2024 21:54, Josh Poimboeuf wrote:
> > if (cfa <= state->sp)
> > 	goto the_end;
> 
> Assuming the x86 definition of the CFA (CFA == SP at call site) this
> translates into:
> 
> if (sp <= state->sp)
> 	goto the_end;
> 
> That won't work for architectures that pass the return address in a
> register instead of on the stack, such as s390. At least in the
> topmost frame the unwound SP may be unchanged. For instance when in
> the function prologue or when in a leaf function.
> 
> One of my patches for s390 support introduces a state->first flag,
> indicating whether it is the topmost user space frame. Using that
> your check could be extended to:
> 
> if ((state->first && sp < state->sp) || (!state->first && sp <= state->sp))
> 	goto the_end;
> 
> Which could be simplified to:
> 
> if (sp <= state->sp - state->first)
> 	goto the_end;

Since my patches are x86-only, how about I leave the "sp <= state->sp"
check and then you add something like that in your patches on top?

> Btw. neither would work for architectures with an upwards-growing
> stack, such as hppa. Not sure if that needs to be considered.

I don't think that's needed until if/when sframe becomes supported for
such an arch.

-- 
Josh