With proper exception boundary detection, it is possible to implment
arch_stack_walk_reliable without sframe.
Note that, arch_stack_walk_reliable does not guarantee getting reliable
stack in all scenarios. Instead, it can reliably detect when the stack
trace is not reliable, which is enough to provide reliable livepatching.
Signed-off-by: Song Liu <song@kernel.org>
---
arch/arm64/Kconfig | 2 +-
arch/arm64/kernel/stacktrace.c | 66 +++++++++++++++++++++++++---------
2 files changed, 51 insertions(+), 17 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 701d980ea921..31d5e1ee6089 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -276,6 +276,7 @@ config ARM64
select HAVE_SOFTIRQ_ON_OWN_STACK
select USER_STACKTRACE_SUPPORT
select VDSO_GETRANDOM
+ select HAVE_RELIABLE_STACKTRACE
help
ARM 64-bit (AArch64) Linux support.
@@ -2500,4 +2501,3 @@ endmenu # "CPU Power Management"
source "drivers/acpi/Kconfig"
source "arch/arm64/kvm/Kconfig"
-
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 1d9d51d7627f..7e07911d8694 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -56,6 +56,7 @@ struct kunwind_state {
enum kunwind_source source;
union unwind_flags flags;
struct pt_regs *regs;
+ bool end_on_unreliable;
};
static __always_inline void
@@ -230,8 +231,26 @@ kunwind_next_frame_record(struct kunwind_state *state)
new_fp = READ_ONCE(record->fp);
new_pc = READ_ONCE(record->lr);
- if (!new_fp && !new_pc)
- return kunwind_next_frame_record_meta(state);
+ if (!new_fp && !new_pc) {
+ int ret;
+
+ ret = kunwind_next_frame_record_meta(state);
+ if (ret < 0) {
+ /*
+ * This covers two different conditions:
+ * 1. ret == -ENOENT, unwinding is done.
+ * 2. ret == -EINVAL, unwinding hit error.
+ */
+ return ret;
+ }
+ /*
+ * Searching across exception boundaries. The stack is now
+ * unreliable.
+ */
+ if (state->end_on_unreliable)
+ return -EINVAL;
+ return 0;
+ }
unwind_consume_stack(&state->common, info, fp, sizeof(*record));
@@ -277,21 +296,24 @@ kunwind_next(struct kunwind_state *state)
typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
-static __always_inline void
+static __always_inline int
do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
void *cookie)
{
- if (kunwind_recover_return_address(state))
- return;
+ int ret;
- while (1) {
- int ret;
+ ret = kunwind_recover_return_address(state);
+ if (ret)
+ return ret;
+ while (1) {
if (!consume_state(state, cookie))
- break;
+ return -EINVAL;
ret = kunwind_next(state);
+ if (ret == -ENOENT)
+ return 0;
if (ret < 0)
- break;
+ return ret;
}
}
@@ -324,10 +346,10 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
: stackinfo_get_unknown(); \
})
-static __always_inline void
+static __always_inline int
kunwind_stack_walk(kunwind_consume_fn consume_state,
void *cookie, struct task_struct *task,
- struct pt_regs *regs)
+ struct pt_regs *regs, bool end_on_unreliable)
{
struct stack_info stacks[] = {
stackinfo_get_task(task),
@@ -348,11 +370,12 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
.stacks = stacks,
.nr_stacks = ARRAY_SIZE(stacks),
},
+ .end_on_unreliable = end_on_unreliable,
};
if (regs) {
if (task != current)
- return;
+ return -EINVAL;
kunwind_init_from_regs(&state, regs);
} else if (task == current) {
kunwind_init_from_caller(&state);
@@ -360,7 +383,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
kunwind_init_from_task(&state, task);
}
- do_kunwind(&state, consume_state, cookie);
+ return do_kunwind(&state, consume_state, cookie);
}
struct kunwind_consume_entry_data {
@@ -384,7 +407,18 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
.cookie = cookie,
};
- kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
+ kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs, false);
+}
+
+noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
+{
+ struct kunwind_consume_entry_data data = {
+ .consume_entry = consume_entry,
+ .cookie = cookie,
+ };
+
+ return kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, NULL, true);
}
struct bpf_unwind_consume_entry_data {
@@ -409,7 +443,7 @@ noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u6
.cookie = cookie,
};
- kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
+ kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL, false);
}
static const char *state_source_string(const struct kunwind_state *state)
@@ -456,7 +490,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
return;
printk("%sCall trace:\n", loglvl);
- kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
+ kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs, false);
put_task_stack(tsk);
}
--
2.47.1
On Thu, Mar 20, 2025 at 10:15:58AM -0700, Song Liu wrote:
> With proper exception boundary detection, it is possible to implment
> arch_stack_walk_reliable without sframe.
>
> Note that, arch_stack_walk_reliable does not guarantee getting reliable
> stack in all scenarios. Instead, it can reliably detect when the stack
> trace is not reliable, which is enough to provide reliable livepatching.
>
> Signed-off-by: Song Liu <song@kernel.org>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/kernel/stacktrace.c | 66 +++++++++++++++++++++++++---------
> 2 files changed, 51 insertions(+), 17 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 701d980ea921..31d5e1ee6089 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -276,6 +276,7 @@ config ARM64
> select HAVE_SOFTIRQ_ON_OWN_STACK
> select USER_STACKTRACE_SUPPORT
> select VDSO_GETRANDOM
> + select HAVE_RELIABLE_STACKTRACE
> help
> ARM 64-bit (AArch64) Linux support.
>
> @@ -2500,4 +2501,3 @@ endmenu # "CPU Power Management"
> source "drivers/acpi/Kconfig"
>
> source "arch/arm64/kvm/Kconfig"
> -
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 1d9d51d7627f..7e07911d8694 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -56,6 +56,7 @@ struct kunwind_state {
> enum kunwind_source source;
> union unwind_flags flags;
> struct pt_regs *regs;
> + bool end_on_unreliable;
> };
>
> static __always_inline void
> @@ -230,8 +231,26 @@ kunwind_next_frame_record(struct kunwind_state *state)
> new_fp = READ_ONCE(record->fp);
> new_pc = READ_ONCE(record->lr);
>
> - if (!new_fp && !new_pc)
> - return kunwind_next_frame_record_meta(state);
> + if (!new_fp && !new_pc) {
> + int ret;
> +
> + ret = kunwind_next_frame_record_meta(state);
> + if (ret < 0) {
> + /*
> + * This covers two different conditions:
> + * 1. ret == -ENOENT, unwinding is done.
> + * 2. ret == -EINVAL, unwinding hit error.
> + */
> + return ret;
> + }
> + /*
> + * Searching across exception boundaries. The stack is now
> + * unreliable.
> + */
> + if (state->end_on_unreliable)
> + return -EINVAL;
> + return 0;
> + }
My original expectation for this this was that we'd propogate the
errors, and then all the reliability logic would live un a consume_entry
wrapper like we have for BPF, e.g.
| static __always_inline bool
| arch_reliable_kunwind_consume_entry(const struct kunwind_state *state, void *cookie)
| {
| struct kunwind_consume_entry_data *data = cookie;
|
| /*
| * When unwinding across an exception boundary, the PC will be
| * reliable, but we do not know whether the FP is live, and so we
| * cannot perform the *next* unwind reliably.
| *
| * Give up as soon as we hit an exception boundary.
| */
| if (state->source == KUNWIND_SOURCE_REGS_PC)
| return false;
|
| return data->consume_entry(data->cookie, state->common.pc);
| }
|
| noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
| void *cookie,
| struct task_struct *task)
| {
| int ret;
| struct kunwind_consume_entry_data data = {
| .consume_entry = consume_entry,
| .cookie = cookie,
| };
|
| ret = kunwind_stack_walk(arch_reliable_kunwind_consume_entry, &data,
| task, NULL);
| return ret == -ENOENT ? 0 : ret;
| }
... and then in future we can add anything spdecific to reliable
stacktrace there.
That aside, this generally looks good to me. The only thing that I note
is that we're lacking a check on the return value of
kretprobe_find_ret_addr(), and we should return -EINVAL when that is
NULL, but that should never happen in normal operation.
I've pushed a arm64/stacktrace-updates branch [1] with fixups for those
as two separate commits atop this one. If that looks good to you I
suggest we post that as a series and ask Will and Catalin to take that
as-is.
I'll look at the actual patching bits now.
Mark.
[1] https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/ arm64/stacktrace-updates
On Mon, May 19, 2025 at 02:41:06PM +0100, Mark Rutland wrote: > I've pushed a arm64/stacktrace-updates branch [1] with fixups for those > as two separate commits atop this one. If that looks good to you I > suggest we post that as a series and ask Will and Catalin to take that > as-is. Yes, please post those to the list for review. Will
On Tue, May 20, 2025 at 03:28:45PM +0100, Will Deacon wrote: > On Mon, May 19, 2025 at 02:41:06PM +0100, Mark Rutland wrote: > > I've pushed a arm64/stacktrace-updates branch [1] with fixups for those > > as two separate commits atop this one. If that looks good to you I > > suggest we post that as a series and ask Will and Catalin to take that > > as-is. > > Yes, please post those to the list for review. Sure; I'm just prepping that now... Mark.
Hi Mark, Thanks for your review and the fixups! On Mon, May 19, 2025 at 6:41 AM Mark Rutland <mark.rutland@arm.com> wrote: > [...] > > ... and then in future we can add anything spdecific to reliable > stacktrace there. > > That aside, this generally looks good to me. The only thing that I note > is that we're lacking a check on the return value of > kretprobe_find_ret_addr(), and we should return -EINVAL when that is > NULL, but that should never happen in normal operation. > > I've pushed a arm64/stacktrace-updates branch [1] with fixups for those > as two separate commits atop this one. If that looks good to you I > suggest we post that as a series and ask Will and Catalin to take that > as-is. > > I'll look at the actual patching bits now. > > Mark. > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/mark/linux.git/ arm64/stacktrace-updates For the fixups: Reviewed-and-tested-by: Song Liu <song@kernel.org> Tested with 2/2 of this set and samples/livepatch. Song
Hi song,
On 10:15 Thu 20 Mar , Song Liu wrote:
> With proper exception boundary detection, it is possible to implment
> arch_stack_walk_reliable without sframe.
>
> Note that, arch_stack_walk_reliable does not guarantee getting reliable
> stack in all scenarios. Instead, it can reliably detect when the stack
> trace is not reliable, which is enough to provide reliable livepatching.
>
> Signed-off-by: Song Liu <song@kernel.org>
> ---
> arch/arm64/Kconfig | 2 +-
> arch/arm64/kernel/stacktrace.c | 66 +++++++++++++++++++++++++---------
> 2 files changed, 51 insertions(+), 17 deletions(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 701d980ea921..31d5e1ee6089 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -276,6 +276,7 @@ config ARM64
> select HAVE_SOFTIRQ_ON_OWN_STACK
> select USER_STACKTRACE_SUPPORT
> select VDSO_GETRANDOM
> + select HAVE_RELIABLE_STACKTRACE
> help
> ARM 64-bit (AArch64) Linux support.
>
> @@ -2500,4 +2501,3 @@ endmenu # "CPU Power Management"
> source "drivers/acpi/Kconfig"
>
> source "arch/arm64/kvm/Kconfig"
> -
> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
> index 1d9d51d7627f..7e07911d8694 100644
> --- a/arch/arm64/kernel/stacktrace.c
> +++ b/arch/arm64/kernel/stacktrace.c
> @@ -56,6 +56,7 @@ struct kunwind_state {
> enum kunwind_source source;
> union unwind_flags flags;
> struct pt_regs *regs;
> + bool end_on_unreliable;
> };
>
> static __always_inline void
> @@ -230,8 +231,26 @@ kunwind_next_frame_record(struct kunwind_state *state)
> new_fp = READ_ONCE(record->fp);
> new_pc = READ_ONCE(record->lr);
>
> - if (!new_fp && !new_pc)
> - return kunwind_next_frame_record_meta(state);
> + if (!new_fp && !new_pc) {
> + int ret;
> +
> + ret = kunwind_next_frame_record_meta(state);
> + if (ret < 0) {
> + /*
> + * This covers two different conditions:
> + * 1. ret == -ENOENT, unwinding is done.
> + * 2. ret == -EINVAL, unwinding hit error.
> + */
> + return ret;
> + }
> + /*
> + * Searching across exception boundaries. The stack is now
> + * unreliable.
> + */
> + if (state->end_on_unreliable)
> + return -EINVAL;
> + return 0;
> + }
>
> unwind_consume_stack(&state->common, info, fp, sizeof(*record));
>
> @@ -277,21 +296,24 @@ kunwind_next(struct kunwind_state *state)
>
> typedef bool (*kunwind_consume_fn)(const struct kunwind_state *state, void *cookie);
>
> -static __always_inline void
> +static __always_inline int
> do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
> void *cookie)
> {
> - if (kunwind_recover_return_address(state))
> - return;
> + int ret;
>
> - while (1) {
> - int ret;
> + ret = kunwind_recover_return_address(state);
> + if (ret)
> + return ret;
>
> + while (1) {
> if (!consume_state(state, cookie))
> - break;
> + return -EINVAL;
> ret = kunwind_next(state);
> + if (ret == -ENOENT)
> + return 0;
> if (ret < 0)
> - break;
> + return ret;
> }
> }
>
> @@ -324,10 +346,10 @@ do_kunwind(struct kunwind_state *state, kunwind_consume_fn consume_state,
> : stackinfo_get_unknown(); \
> })
>
> -static __always_inline void
> +static __always_inline int
> kunwind_stack_walk(kunwind_consume_fn consume_state,
> void *cookie, struct task_struct *task,
> - struct pt_regs *regs)
> + struct pt_regs *regs, bool end_on_unreliable)
> {
> struct stack_info stacks[] = {
> stackinfo_get_task(task),
> @@ -348,11 +370,12 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
> .stacks = stacks,
> .nr_stacks = ARRAY_SIZE(stacks),
> },
> + .end_on_unreliable = end_on_unreliable,
> };
>
> if (regs) {
> if (task != current)
> - return;
> + return -EINVAL;
> kunwind_init_from_regs(&state, regs);
> } else if (task == current) {
> kunwind_init_from_caller(&state);
> @@ -360,7 +383,7 @@ kunwind_stack_walk(kunwind_consume_fn consume_state,
> kunwind_init_from_task(&state, task);
> }
>
> - do_kunwind(&state, consume_state, cookie);
> + return do_kunwind(&state, consume_state, cookie);
> }
>
> struct kunwind_consume_entry_data {
> @@ -384,7 +407,18 @@ noinline noinstr void arch_stack_walk(stack_trace_consume_fn consume_entry,
> .cookie = cookie,
> };
>
> - kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs);
> + kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, regs, false);
> +}
> +
> +noinline noinstr int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
> + void *cookie, struct task_struct *task)
> +{
> + struct kunwind_consume_entry_data data = {
> + .consume_entry = consume_entry,
> + .cookie = cookie,
> + };
> +
> + return kunwind_stack_walk(arch_kunwind_consume_entry, &data, task, NULL, true);
> }
>
> struct bpf_unwind_consume_entry_data {
> @@ -409,7 +443,7 @@ noinline noinstr void arch_bpf_stack_walk(bool (*consume_entry)(void *cookie, u6
> .cookie = cookie,
> };
>
> - kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL);
> + kunwind_stack_walk(arch_bpf_unwind_consume_entry, &data, current, NULL, false);
> }
>
> static const char *state_source_string(const struct kunwind_state *state)
> @@ -456,7 +490,7 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
> return;
>
> printk("%sCall trace:\n", loglvl);
> - kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs);
> + kunwind_stack_walk(dump_backtrace_entry, (void *)loglvl, tsk, regs, false);
>
> put_task_stack(tsk);
> }
> --
> 2.47.1
>
Tested-by: Andrea della Porta <andrea.porta@suse.com>
Thanks,
Andrea
On Thu, 20 Mar 2025, Song Liu wrote: > With proper exception boundary detection, it is possible to implment > arch_stack_walk_reliable without sframe. > > Note that, arch_stack_walk_reliable does not guarantee getting reliable > stack in all scenarios. Instead, it can reliably detect when the stack > trace is not reliable, which is enough to provide reliable livepatching. > > Signed-off-by: Song Liu <song@kernel.org> Looks good to me. Reviewed-by: Miroslav Benes <mbenes@suse.cz> M
On Thu, Mar 20, 2025 at 10:15:58AM -0700, Song Liu wrote: > With proper exception boundary detection, it is possible to implment > arch_stack_walk_reliable without sframe. > > Note that, arch_stack_walk_reliable does not guarantee getting reliable > stack in all scenarios. Instead, it can reliably detect when the stack > trace is not reliable, which is enough to provide reliable livepatching. > > Signed-off-by: Song Liu <song@kernel.org> Reviewed-by: Josh Poimboeuf <jpoimboe@kernel.org> -- Josh
On Thu, Mar 20, 2025 at 10:16 AM Song Liu <song@kernel.org> wrote:
>
> static __always_inline void
> @@ -230,8 +231,26 @@ kunwind_next_frame_record(struct kunwind_state *state)
> new_fp = READ_ONCE(record->fp);
> new_pc = READ_ONCE(record->lr);
>
> - if (!new_fp && !new_pc)
> - return kunwind_next_frame_record_meta(state);
> + if (!new_fp && !new_pc) {
> + int ret;
> +
> + ret = kunwind_next_frame_record_meta(state);
The exception case kunwind_next_regs_pc() will return 0 when unwind success.
Should we return a different value for the success case of kunwind_next_regs_pc()?
> + if (ret < 0) {
> + /*
> + * This covers two different conditions:
> + * 1. ret == -ENOENT, unwinding is done.
> + * 2. ret == -EINVAL, unwinding hit error.
> + */
> + return ret;
> + }
> + /*
> + * Searching across exception boundaries. The stack is now
> + * unreliable.
> + */
> + if (state->end_on_unreliable)
> + return -EINVAL;
> + return 0;
> + }
On Thu, Mar 20, 2025 at 10:46 AM Weinan Liu <wnliu@google.com> wrote:
>
> On Thu, Mar 20, 2025 at 10:16 AM Song Liu <song@kernel.org> wrote:
> >
> > static __always_inline void
> > @@ -230,8 +231,26 @@ kunwind_next_frame_record(struct kunwind_state *state)
> > new_fp = READ_ONCE(record->fp);
> > new_pc = READ_ONCE(record->lr);
> >
> > - if (!new_fp && !new_pc)
> > - return kunwind_next_frame_record_meta(state);
> > + if (!new_fp && !new_pc) {
> > + int ret;
> > +
> > + ret = kunwind_next_frame_record_meta(state);
>
> The exception case kunwind_next_regs_pc() will return 0 when unwind success.
> Should we return a different value for the success case of kunwind_next_regs_pc()?
I am assuming once the unwinder hits an exception boundary, the stack is not
100% reliable. This does mean we will return -EINVAL for some reliable stack
walk, but this is safer and good enough for livepatch. IIUC, SFrame based
unwinder should not have this limitation.
Thanks,
Song
>
> > + if (ret < 0) {
> > + /*
> > + * This covers two different conditions:
> > + * 1. ret == -ENOENT, unwinding is done.
> > + * 2. ret == -EINVAL, unwinding hit error.
> > + */
> > + return ret;
> > + }
> > + /*
> > + * Searching across exception boundaries. The stack is now
> > + * unreliable.
> > + */
> > + if (state->end_on_unreliable)
> > + return -EINVAL;
> > + return 0;
> > + }
>
© 2016 - 2025 Red Hat, Inc.