arch/x86/kernel/dumpstack.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+)
When task A walks task B's stack without suspending it, the continuous
changes in task B's stack (and corresponding KASAN shadow tags) may cause
task A to hit KASAN redzones when accessing obsolete values on the stack,
resulting in false positive reports. [1][2]
The specific issue occurs as follows:
Task A (walk other tasks' stacks) Task B (running)
1. echo t > /proc/sysrq-trigger
show_trace_log_lvl
regs = unwind_get_entry_regs()
show_regs_if_on_stack(regs)
2. The stack data pointed by
`regs` keeps changing, and
so are the tags in its
KASAN shadow region.
__show_regs(regs)
regs->ax, regs->bx, ...
3. hit KASAN redzones, OOB
Fix this by detecting asynchronous stack unwinding scenarios through
`task != current` during unwinding, and disabling KASAN checks when this
scenario occurs.
[1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/
[2] KASAN out-of-bounds:
[332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340
[332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032
[332706.552562]
[332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20
[332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016
[332706.552906] Call Trace:
[332706.552998] <TASK>
[332706.553089] dump_stack_lvl+0x32/0x50
[332706.553193] print_address_description.constprop.0+0x6b/0x3d0
[332706.553303] print_report+0xbe/0x280
[332706.553409] ? __virt_addr_valid+0xed/0x160
[332706.553512] ? __show_regs+0x4b/0x340
[332706.553612] kasan_report+0xa8/0xe0
[332706.553716] ? __show_regs+0x4b/0x340
[332706.553816] ? asm_exc_page_fault+0x22/0x30
[332706.553919] __show_regs+0x4b/0x340
[332706.554021] ? asm_exc_page_fault+0x22/0x30
[332706.554123] show_trace_log_lvl+0x274/0x3b0
[332706.554229] ? load_elf_binary+0xf6e/0x1610
[332706.554330] ? rep_stos_alternative+0x40/0x80
[332706.554439] sched_show_task+0x211/0x290
[332706.554544] ? __pfx_sched_show_task+0x10/0x10
[332706.554648] ? _find_next_bit+0x6/0xc0
[332706.554749] ? _find_next_bit+0x37/0xc0
[332706.554852] show_state_filter+0x72/0x130
[332706.554956] sysrq_handle_showstate+0x7/0x10
[332706.555062] __handle_sysrq+0x146/0x2d0
[332706.555165] write_sysrq_trigger+0x2f/0x50
[332706.555270] proc_reg_write+0xdd/0x140
[332706.555372] vfs_write+0x1ff/0x5f0
[332706.555474] ? __pfx_vfs_write+0x10/0x10
[332706.555576] ? __pfx___handle_mm_fault+0x10/0x10
[332706.555682] ? __fget_light+0x99/0xf0
[332706.555785] ksys_write+0xb8/0x150
[332706.555887] ? __pfx_ksys_write+0x10/0x10
[332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70
[332706.556094] do_syscall_64+0x55/0x100
[332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2
Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack")
Signed-off-by: Tengda Wu <wutengda@huaweicloud.com>
---
v3: Address Josh comment, move kasan checks to show_trace_log_lvl, and
controlled by task != current.
v2: https://lore.kernel.org/all/20250829094744.3133324-1-wutengda@huaweicloud.com/
v1: https://lore.kernel.org/all/20250818130715.2904264-1-wutengda@huaweicloud.com/
---
arch/x86/kernel/dumpstack.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 71ee20102a8a..6d1ae0d77ffc 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -189,9 +189,20 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
unsigned long visit_mask = 0;
int graph_idx = 0;
bool partial = false;
+ bool kasan_disabled = false;
printk("%sCall Trace:\n", log_lvl);
+ /*
+ * KASAN should be disabled during walking another task's stacks
+ * to prevent false positives, as values on these stacks may change
+ * concurrently with task execution.
+ */
+ if (task && task != current) {
+ kasan_disable_current();
+ kasan_disabled = true;
+ }
+
unwind_start(&state, task, regs, stack);
stack = stack ?: get_stack_pointer(task, regs);
regs = unwind_get_entry_regs(&state, &partial);
@@ -301,6 +312,9 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
if (stack_name)
printk("%s </%s>\n", log_lvl, stack_name);
}
+
+ if (kasan_disabled)
+ kasan_enable_current();
}
void show_stack(struct task_struct *task, unsigned long *sp,
--
2.34.1
On Sat, Aug 30, 2025 at 09:25:56AM +0000, Tengda Wu wrote: > When task A walks task B's stack without suspending it, the continuous > changes in task B's stack (and corresponding KASAN shadow tags) may cause > task A to hit KASAN redzones when accessing obsolete values on the stack, > resulting in false positive reports. [1][2] What kind of a use case is that even? Why do we care? > The specific issue occurs as follows: > > Task A (walk other tasks' stacks) Task B (running) > 1. echo t > /proc/sysrq-trigger > > show_trace_log_lvl > regs = unwind_get_entry_regs() > show_regs_if_on_stack(regs) > 2. The stack data pointed by > `regs` keeps changing, and > so are the tags in its > KASAN shadow region. > __show_regs(regs) > regs->ax, regs->bx, ... > 3. hit KASAN redzones, OOB > > Fix this by detecting asynchronous stack unwinding scenarios through > `task != current` during unwinding, and disabling KASAN checks when this > scenario occurs. > > [1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/ > [2] KASAN out-of-bounds: > [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 > [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 > [332706.552562] > [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 > [332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016 > [332706.552906] Call Trace: > [332706.552998] <TASK> > [332706.553089] dump_stack_lvl+0x32/0x50 > [332706.553193] print_address_description.constprop.0+0x6b/0x3d0 > [332706.553303] print_report+0xbe/0x280 > [332706.553409] ? __virt_addr_valid+0xed/0x160 > [332706.553512] ? __show_regs+0x4b/0x340 > [332706.553612] kasan_report+0xa8/0xe0 > [332706.553716] ? __show_regs+0x4b/0x340 > [332706.553816] ? asm_exc_page_fault+0x22/0x30 > [332706.553919] __show_regs+0x4b/0x340 > [332706.554021] ? asm_exc_page_fault+0x22/0x30 > [332706.554123] show_trace_log_lvl+0x274/0x3b0 > [332706.554229] ? load_elf_binary+0xf6e/0x1610 > [332706.554330] ? rep_stos_alternative+0x40/0x80 > [332706.554439] sched_show_task+0x211/0x290 > [332706.554544] ? __pfx_sched_show_task+0x10/0x10 > [332706.554648] ? _find_next_bit+0x6/0xc0 > [332706.554749] ? _find_next_bit+0x37/0xc0 > [332706.554852] show_state_filter+0x72/0x130 > [332706.554956] sysrq_handle_showstate+0x7/0x10 > [332706.555062] __handle_sysrq+0x146/0x2d0 > [332706.555165] write_sysrq_trigger+0x2f/0x50 > [332706.555270] proc_reg_write+0xdd/0x140 > [332706.555372] vfs_write+0x1ff/0x5f0 > [332706.555474] ? __pfx_vfs_write+0x10/0x10 > [332706.555576] ? __pfx___handle_mm_fault+0x10/0x10 > [332706.555682] ? __fget_light+0x99/0xf0 > [332706.555785] ksys_write+0xb8/0x150 > [332706.555887] ? __pfx_ksys_write+0x10/0x10 > [332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70 > [332706.556094] do_syscall_64+0x55/0x100 > [332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2 What is this stack dump here for? If it is absolutely needed, at least clean it up by removing hex numbers and timestamps because they're useless in a commit message. > > Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") > Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> > --- > v3: Address Josh comment, move kasan checks to show_trace_log_lvl, and > controlled by task != current. > v2: https://lore.kernel.org/all/20250829094744.3133324-1-wutengda@huaweicloud.com/ > v1: https://lore.kernel.org/all/20250818130715.2904264-1-wutengda@huaweicloud.com/ > --- > arch/x86/kernel/dumpstack.c | 14 ++++++++++++++ > 1 file changed, 14 insertions(+) > > diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c > index 71ee20102a8a..6d1ae0d77ffc 100644 > --- a/arch/x86/kernel/dumpstack.c > +++ b/arch/x86/kernel/dumpstack.c > @@ -189,9 +189,20 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, > unsigned long visit_mask = 0; > int graph_idx = 0; > bool partial = false; > + bool kasan_disabled = false; > > printk("%sCall Trace:\n", log_lvl); > > + /* > + * KASAN should be disabled during walking another task's stacks > + * to prevent false positives, as values on these stacks may change > + * concurrently with task execution. > + */ > + if (task && task != current) { > + kasan_disable_current(); > + kasan_disabled = true; > + } I don't like the sprinking of KASAN checks all over the tree just to make this thing work. If anything, you should disable KASAN like this: show_trace_log_lvl: disable KASAN __show_trace_log_lvl enable KASAN so that at least this thing is out of the way. But I'd like to hear first whether that's a use case we really want to support. Right now, it sounds like a meh to me: if you want to walk a task's stack with KASAN enabled, then stop it first. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
Hi Boris, Thank you very much for your feedback and for taking another look at the patch. On 2025/9/22 17:44, Borislav Petkov wrote: > On Sat, Aug 30, 2025 at 09:25:56AM +0000, Tengda Wu wrote: >> When task A walks task B's stack without suspending it, the continuous >> changes in task B's stack (and corresponding KASAN shadow tags) may cause >> task A to hit KASAN redzones when accessing obsolete values on the stack, >> resulting in false positive reports. [1][2] > > What kind of a use case is that even? Why do we care? Running 'echo t > /proc/sysrq-trigger' will trigger this type of asynchronous stack walk, as demonstrated in the use case provided below. > >> The specific issue occurs as follows: >> >> Task A (walk other tasks' stacks) Task B (running) >> 1. echo t > /proc/sysrq-trigger >> >> show_trace_log_lvl >> regs = unwind_get_entry_regs() >> show_regs_if_on_stack(regs) >> 2. The stack data pointed by >> `regs` keeps changing, and >> so are the tags in its >> KASAN shadow region. >> __show_regs(regs) >> regs->ax, regs->bx, ... >> 3. hit KASAN redzones, OOB >> >> Fix this by detecting asynchronous stack unwinding scenarios through >> `task != current` during unwinding, and disabling KASAN checks when this >> scenario occurs. >> >> [1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/ >> [2] KASAN out-of-bounds: >> [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 >> [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 >> [332706.552562] >> [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 >> [332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016 >> [332706.552906] Call Trace: >> [332706.552998] <TASK> >> [332706.553089] dump_stack_lvl+0x32/0x50 >> [332706.553193] print_address_description.constprop.0+0x6b/0x3d0 >> [332706.553303] print_report+0xbe/0x280 >> [332706.553409] ? __virt_addr_valid+0xed/0x160 >> [332706.553512] ? __show_regs+0x4b/0x340 >> [332706.553612] kasan_report+0xa8/0xe0 >> [332706.553716] ? __show_regs+0x4b/0x340 >> [332706.553816] ? asm_exc_page_fault+0x22/0x30 >> [332706.553919] __show_regs+0x4b/0x340 >> [332706.554021] ? asm_exc_page_fault+0x22/0x30 >> [332706.554123] show_trace_log_lvl+0x274/0x3b0 >> [332706.554229] ? load_elf_binary+0xf6e/0x1610 >> [332706.554330] ? rep_stos_alternative+0x40/0x80 >> [332706.554439] sched_show_task+0x211/0x290 >> [332706.554544] ? __pfx_sched_show_task+0x10/0x10 >> [332706.554648] ? _find_next_bit+0x6/0xc0 >> [332706.554749] ? _find_next_bit+0x37/0xc0 >> [332706.554852] show_state_filter+0x72/0x130 >> [332706.554956] sysrq_handle_showstate+0x7/0x10 >> [332706.555062] __handle_sysrq+0x146/0x2d0 >> [332706.555165] write_sysrq_trigger+0x2f/0x50 >> [332706.555270] proc_reg_write+0xdd/0x140 >> [332706.555372] vfs_write+0x1ff/0x5f0 >> [332706.555474] ? __pfx_vfs_write+0x10/0x10 >> [332706.555576] ? __pfx___handle_mm_fault+0x10/0x10 >> [332706.555682] ? __fget_light+0x99/0xf0 >> [332706.555785] ksys_write+0xb8/0x150 >> [332706.555887] ? __pfx_ksys_write+0x10/0x10 >> [332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70 >> [332706.556094] do_syscall_64+0x55/0x100 >> [332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2 > > What is this stack dump here for? This call stack corresponds to the exception thrown by the sysrq use case mentioned above. > > If it is absolutely needed, at least clean it up by removing hex numbers and > timestamps because they're useless in a commit message. Okay, I will clean up this useless information. > >> >> Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") >> Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> >> --- >> v3: Address Josh comment, move kasan checks to show_trace_log_lvl, and >> controlled by task != current. >> v2: https://lore.kernel.org/all/20250829094744.3133324-1-wutengda@huaweicloud.com/ >> v1: https://lore.kernel.org/all/20250818130715.2904264-1-wutengda@huaweicloud.com/ >> --- >> arch/x86/kernel/dumpstack.c | 14 ++++++++++++++ >> 1 file changed, 14 insertions(+) >> >> diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c >> index 71ee20102a8a..6d1ae0d77ffc 100644 >> --- a/arch/x86/kernel/dumpstack.c >> +++ b/arch/x86/kernel/dumpstack.c >> @@ -189,9 +189,20 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, >> unsigned long visit_mask = 0; >> int graph_idx = 0; >> bool partial = false; >> + bool kasan_disabled = false; >> >> printk("%sCall Trace:\n", log_lvl); >> >> + /* >> + * KASAN should be disabled during walking another task's stacks >> + * to prevent false positives, as values on these stacks may change >> + * concurrently with task execution. >> + */ >> + if (task && task != current) { >> + kasan_disable_current(); >> + kasan_disabled = true; >> + } > > I don't like the sprinking of KASAN checks all over the tree just to make this > thing work. > > If anything, you should disable KASAN like this: > > show_trace_log_lvl: > disable KASAN > __show_trace_log_lvl > enable KASAN > > so that at least this thing is out of the way. Yeah, that's a much cleaner approach. > > But I'd like to hear first whether that's a use case we really want to > support. Right now, it sounds like a meh to me: if you want to walk a task's > stack with KASAN enabled, then stop it first. > > Thx. > I understand your point about keeping things simple and requiring the task to be stopped. However, the main challenge with stopping the task first is that it fundamentally alters the state we're trying to inspect. The primary use case for an asynchronous stack walk is to diagnose a task that is already misbehaving (e.g., spinning in a hard lockup, not responding to stops). If we need to stop a task to get its stack, we might not be able to stop it at all, or the act of stopping it could change the call stack, hiding the root cause of the issue. This is why my implementation selectively disables KASAN precisely for the async walk scenario. Thanks, Tengda
On Mon, Sep 22, 2025 at 08:14:50PM +0800, Tengda Wu wrote: > Running 'echo t > /proc/sysrq-trigger' will trigger this type of asynchronous > stack walk, as demonstrated in the use case provided below. So lead with that please. > >> [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 > >> [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 > >> [332706.552562] > >> [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 ^^^^^^^^^^^^^^ This doesn't help - it is some random script. Trigger this with the echo 't' ... thing. > However, the main challenge with stopping the task first is that it fundamentally > alters the state we're trying to inspect. The primary use case for an asynchronous > stack walk is to diagnose a task that is already misbehaving (e.g., spinning in a > hard lockup, not responding to stops). If we need to stop a task to get its stack, > we might not be able to stop it at all, or the act of stopping it could change the > call stack, hiding the root cause of the issue. > > This is why my implementation selectively disables KASAN precisely for the async > walk scenario. Add this to the commit message too. You need to explain it in such a way so that someone can use your instructions and reproduce it herself, without asking more questions. IOW, it needs to be perfectly clear what the issue is and what this is fixing. Also, you don't have to rush your next version - we have a merge window coming up so this'll wait for the next round. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 2025/9/22 20:28, Borislav Petkov wrote: > On Mon, Sep 22, 2025 at 08:14:50PM +0800, Tengda Wu wrote: >> Running 'echo t > /proc/sysrq-trigger' will trigger this type of asynchronous >> stack walk, as demonstrated in the use case provided below. > > So lead with that please. > >>>> [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 >>>> [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 >>>> [332706.552562] >>>> [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 > ^^^^^^^^^^^^^^ > > This doesn't help - it is some random script. Trigger this with the echo 't' > ... thing. > >> However, the main challenge with stopping the task first is that it fundamentally >> alters the state we're trying to inspect. The primary use case for an asynchronous >> stack walk is to diagnose a task that is already misbehaving (e.g., spinning in a >> hard lockup, not responding to stops). If we need to stop a task to get its stack, >> we might not be able to stop it at all, or the act of stopping it could change the >> call stack, hiding the root cause of the issue. >> >> This is why my implementation selectively disables KASAN precisely for the async >> walk scenario. > > Add this to the commit message too. You need to explain it in such a way so > that someone can use your instructions and reproduce it herself, without > asking more questions. IOW, it needs to be perfectly clear what the issue is > and what this is fixing. > > Also, you don't have to rush your next version - we have a merge window coming > up so this'll wait for the next round. > > Thx. > Will do. I'll revise the commit message with the needed details. Thanks for the note on the timing. I'll hold it for the next round. Best regards, Tengda
On 8/30/25 11:25 AM, Tengda Wu wrote: > When task A walks task B's stack without suspending it, the continuous > changes in task B's stack (and corresponding KASAN shadow tags) may cause > task A to hit KASAN redzones when accessing obsolete values on the stack, > resulting in false positive reports. [1][2] > > The specific issue occurs as follows: > > Task A (walk other tasks' stacks) Task B (running) > 1. echo t > /proc/sysrq-trigger > > show_trace_log_lvl > regs = unwind_get_entry_regs() > show_regs_if_on_stack(regs) > 2. The stack data pointed by > `regs` keeps changing, and > so are the tags in its > KASAN shadow region. > __show_regs(regs) > regs->ax, regs->bx, ... > 3. hit KASAN redzones, OOB > > Fix this by detecting asynchronous stack unwinding scenarios through > `task != current` during unwinding, and disabling KASAN checks when this > scenario occurs. > > [1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/ > [2] KASAN out-of-bounds: > [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 > [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 > [332706.552562] > [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 > [332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016 > [332706.552906] Call Trace: > [332706.552998] <TASK> > [332706.553089] dump_stack_lvl+0x32/0x50 > [332706.553193] print_address_description.constprop.0+0x6b/0x3d0 > [332706.553303] print_report+0xbe/0x280 > [332706.553409] ? __virt_addr_valid+0xed/0x160 > [332706.553512] ? __show_regs+0x4b/0x340 > [332706.553612] kasan_report+0xa8/0xe0 > [332706.553716] ? __show_regs+0x4b/0x340 > [332706.553816] ? asm_exc_page_fault+0x22/0x30 > [332706.553919] __show_regs+0x4b/0x340 > [332706.554021] ? asm_exc_page_fault+0x22/0x30 > [332706.554123] show_trace_log_lvl+0x274/0x3b0 > [332706.554229] ? load_elf_binary+0xf6e/0x1610 > [332706.554330] ? rep_stos_alternative+0x40/0x80 > [332706.554439] sched_show_task+0x211/0x290 > [332706.554544] ? __pfx_sched_show_task+0x10/0x10 > [332706.554648] ? _find_next_bit+0x6/0xc0 > [332706.554749] ? _find_next_bit+0x37/0xc0 > [332706.554852] show_state_filter+0x72/0x130 > [332706.554956] sysrq_handle_showstate+0x7/0x10 > [332706.555062] __handle_sysrq+0x146/0x2d0 > [332706.555165] write_sysrq_trigger+0x2f/0x50 > [332706.555270] proc_reg_write+0xdd/0x140 > [332706.555372] vfs_write+0x1ff/0x5f0 > [332706.555474] ? __pfx_vfs_write+0x10/0x10 > [332706.555576] ? __pfx___handle_mm_fault+0x10/0x10 > [332706.555682] ? __fget_light+0x99/0xf0 > [332706.555785] ksys_write+0xb8/0x150 > [332706.555887] ? __pfx_ksys_write+0x10/0x10 > [332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70 > [332706.556094] do_syscall_64+0x55/0x100 > [332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2 > > Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") > Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Gentle Reminder: Patch Not Merged Yet Hi, Just a quick follow-up on my patch that got acked and reviewed over two weeks ago. Could you please check on its status? Let me know if anything is needed from my side. Thanks, Tengda On 2025/9/5 22:12, Andrey Ryabinin wrote: > > > On 8/30/25 11:25 AM, Tengda Wu wrote: >> When task A walks task B's stack without suspending it, the continuous >> changes in task B's stack (and corresponding KASAN shadow tags) may cause >> task A to hit KASAN redzones when accessing obsolete values on the stack, >> resulting in false positive reports. [1][2] >> >> The specific issue occurs as follows: >> >> Task A (walk other tasks' stacks) Task B (running) >> 1. echo t > /proc/sysrq-trigger >> >> show_trace_log_lvl >> regs = unwind_get_entry_regs() >> show_regs_if_on_stack(regs) >> 2. The stack data pointed by >> `regs` keeps changing, and >> so are the tags in its >> KASAN shadow region. >> __show_regs(regs) >> regs->ax, regs->bx, ... >> 3. hit KASAN redzones, OOB >> >> Fix this by detecting asynchronous stack unwinding scenarios through >> `task != current` during unwinding, and disabling KASAN checks when this >> scenario occurs. >> >> [1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/ >> [2] KASAN out-of-bounds: >> [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 >> [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 >> [332706.552562] >> [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 >> [332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016 >> [332706.552906] Call Trace: >> [332706.552998] <TASK> >> [332706.553089] dump_stack_lvl+0x32/0x50 >> [332706.553193] print_address_description.constprop.0+0x6b/0x3d0 >> [332706.553303] print_report+0xbe/0x280 >> [332706.553409] ? __virt_addr_valid+0xed/0x160 >> [332706.553512] ? __show_regs+0x4b/0x340 >> [332706.553612] kasan_report+0xa8/0xe0 >> [332706.553716] ? __show_regs+0x4b/0x340 >> [332706.553816] ? asm_exc_page_fault+0x22/0x30 >> [332706.553919] __show_regs+0x4b/0x340 >> [332706.554021] ? asm_exc_page_fault+0x22/0x30 >> [332706.554123] show_trace_log_lvl+0x274/0x3b0 >> [332706.554229] ? load_elf_binary+0xf6e/0x1610 >> [332706.554330] ? rep_stos_alternative+0x40/0x80 >> [332706.554439] sched_show_task+0x211/0x290 >> [332706.554544] ? __pfx_sched_show_task+0x10/0x10 >> [332706.554648] ? _find_next_bit+0x6/0xc0 >> [332706.554749] ? _find_next_bit+0x37/0xc0 >> [332706.554852] show_state_filter+0x72/0x130 >> [332706.554956] sysrq_handle_showstate+0x7/0x10 >> [332706.555062] __handle_sysrq+0x146/0x2d0 >> [332706.555165] write_sysrq_trigger+0x2f/0x50 >> [332706.555270] proc_reg_write+0xdd/0x140 >> [332706.555372] vfs_write+0x1ff/0x5f0 >> [332706.555474] ? __pfx_vfs_write+0x10/0x10 >> [332706.555576] ? __pfx___handle_mm_fault+0x10/0x10 >> [332706.555682] ? __fget_light+0x99/0xf0 >> [332706.555785] ksys_write+0xb8/0x150 >> [332706.555887] ? __pfx_ksys_write+0x10/0x10 >> [332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70 >> [332706.556094] do_syscall_64+0x55/0x100 >> [332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2 >> >> Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") >> Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> > > Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
On Sat, Aug 30, 2025 at 09:25:56AM +0000, Tengda Wu wrote: > When task A walks task B's stack without suspending it, the continuous > changes in task B's stack (and corresponding KASAN shadow tags) may cause > task A to hit KASAN redzones when accessing obsolete values on the stack, > resulting in false positive reports. [1][2] > > The specific issue occurs as follows: > > Task A (walk other tasks' stacks) Task B (running) > 1. echo t > /proc/sysrq-trigger > > show_trace_log_lvl > regs = unwind_get_entry_regs() > show_regs_if_on_stack(regs) > 2. The stack data pointed by > `regs` keeps changing, and > so are the tags in its > KASAN shadow region. > __show_regs(regs) > regs->ax, regs->bx, ... > 3. hit KASAN redzones, OOB > > Fix this by detecting asynchronous stack unwinding scenarios through > `task != current` during unwinding, and disabling KASAN checks when this > scenario occurs. > > [1] https://lore.kernel.org/all/000000000000cb8e3a05c4ed84bb@google.com/ > [2] KASAN out-of-bounds: > [332706.552324] BUG: KASAN: out-of-bounds in __show_regs+0x4b/0x340 > [332706.552433] Read of size 8 at addr ffff88d24999fb20 by task sysrq_t_test.sh/3977032 > [332706.552562] > [332706.552652] CPU: 36 PID: 3977032 Comm: sysrq_t_test.sh Kdump: loaded Not tainted 6.6.0+ #20 > [332706.552783] Hardware name: Huawei RH2288H V3/BC11HGSA0, BIOS 3.35 10/20/2016 > [332706.552906] Call Trace: > [332706.552998] <TASK> > [332706.553089] dump_stack_lvl+0x32/0x50 > [332706.553193] print_address_description.constprop.0+0x6b/0x3d0 > [332706.553303] print_report+0xbe/0x280 > [332706.553409] ? __virt_addr_valid+0xed/0x160 > [332706.553512] ? __show_regs+0x4b/0x340 > [332706.553612] kasan_report+0xa8/0xe0 > [332706.553716] ? __show_regs+0x4b/0x340 > [332706.553816] ? asm_exc_page_fault+0x22/0x30 > [332706.553919] __show_regs+0x4b/0x340 > [332706.554021] ? asm_exc_page_fault+0x22/0x30 > [332706.554123] show_trace_log_lvl+0x274/0x3b0 > [332706.554229] ? load_elf_binary+0xf6e/0x1610 > [332706.554330] ? rep_stos_alternative+0x40/0x80 > [332706.554439] sched_show_task+0x211/0x290 > [332706.554544] ? __pfx_sched_show_task+0x10/0x10 > [332706.554648] ? _find_next_bit+0x6/0xc0 > [332706.554749] ? _find_next_bit+0x37/0xc0 > [332706.554852] show_state_filter+0x72/0x130 > [332706.554956] sysrq_handle_showstate+0x7/0x10 > [332706.555062] __handle_sysrq+0x146/0x2d0 > [332706.555165] write_sysrq_trigger+0x2f/0x50 > [332706.555270] proc_reg_write+0xdd/0x140 > [332706.555372] vfs_write+0x1ff/0x5f0 > [332706.555474] ? __pfx_vfs_write+0x10/0x10 > [332706.555576] ? __pfx___handle_mm_fault+0x10/0x10 > [332706.555682] ? __fget_light+0x99/0xf0 > [332706.555785] ksys_write+0xb8/0x150 > [332706.555887] ? __pfx_ksys_write+0x10/0x10 > [332706.555989] ? ktime_get_coarse_real_ts64+0x4e/0x70 > [332706.556094] do_syscall_64+0x55/0x100 > [332706.556196] entry_SYSCALL_64_after_hwframe+0x78/0xe2 > > Fixes: 3b3fa11bc700 ("x86/dumpstack: Print any pt_regs found on the stack") > Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> > --- > v3: Address Josh comment, move kasan checks to show_trace_log_lvl, and > controlled by task != current. > v2: https://lore.kernel.org/all/20250829094744.3133324-1-wutengda@huaweicloud.com/ > v1: https://lore.kernel.org/all/20250818130715.2904264-1-wutengda@huaweicloud.com/ Acked-by: Josh Poimboeuf <jpoimboe@kernel.org> -- Josh
© 2016 - 2025 Red Hat, Inc.