arch/arm/kernel/sleep.S | 4 ++++ 1 file changed, 4 insertions(+)
From: Boy Wu <boy.wu@mediatek.com>
We found below OOB crash:
[ 33.452494] ==================================================================
[ 33.453513] BUG: KASAN: stack-out-of-bounds in refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec
[ 33.454660] Write of size 164 at addr c1d03d30 by task swapper/0/0
[ 33.455515]
[ 33.455767] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 6.1.25-mainline #1
[ 33.456880] Hardware name: Generic DT based system
[ 33.457555] unwind_backtrace from show_stack+0x18/0x1c
[ 33.458326] show_stack from dump_stack_lvl+0x40/0x4c
[ 33.459072] dump_stack_lvl from print_report+0x158/0x4a4
[ 33.459863] print_report from kasan_report+0x9c/0x148
[ 33.460616] kasan_report from kasan_check_range+0x94/0x1a0
[ 33.461424] kasan_check_range from memset+0x20/0x3c
[ 33.462157] memset from refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec
[ 33.463064] refresh_cpu_vm_stats.constprop.0 from tick_nohz_idle_stop_tick+0x180/0x53c
[ 33.464181] tick_nohz_idle_stop_tick from do_idle+0x264/0x354
[ 33.465029] do_idle from cpu_startup_entry+0x20/0x24
[ 33.465769] cpu_startup_entry from rest_init+0xf0/0xf4
[ 33.466528] rest_init from arch_post_acpi_subsys_init+0x0/0x18
[ 33.467397]
[ 33.467644] The buggy address belongs to stack of task swapper/0/0
[ 33.468493] and is located at offset 112 in frame:
[ 33.469172] refresh_cpu_vm_stats.constprop.0+0x0/0x2ec
[ 33.469917]
[ 33.470165] This frame has 2 objects:
[ 33.470696] [32, 76) 'global_zone_diff'
[ 33.470729] [112, 276) 'global_node_diff'
[ 33.471294]
[ 33.472095] The buggy address belongs to the physical page:
[ 33.472862] page:3cd72da8 refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x41d03
[ 33.473944] flags: 0x1000(reserved|zone=0)
[ 33.474565] raw: 00001000 ed741470 ed741470 00000000 00000000 00000000 ffffffff 00000001
[ 33.475656] raw: 00000000
[ 33.476050] page dumped because: kasan: bad access detected
[ 33.476816]
[ 33.477061] Memory state around the buggy address:
[ 33.477732] c1d03c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[ 33.478630] c1d03c80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00
[ 33.479526] >c1d03d00: 00 04 f2 f2 f2 f2 00 00 00 00 00 00 f1 f1 f1 f1
[ 33.480415] ^
[ 33.481195] c1d03d80: 00 00 00 00 00 00 00 00 00 00 04 f3 f3 f3 f3 f3
[ 33.482088] c1d03e00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
[ 33.482978] ==================================================================
We find the root cause of this OOB is that arm does not clear stale stack
poison in the case of cpuidle.
This patch refer to arch/arm64/kernel/sleep.S to resolve this issue.
Signed-off-by: Boy Wu <boy.wu@mediatek.com>
---
arch/arm/kernel/sleep.S | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index a86a1d4f3461..93afd1005b43 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -127,6 +127,10 @@ cpu_resume_after_mmu:
instr_sync
#endif
bl cpu_init @ restore the und/abt/irq banked regs
+#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK)
+ mov r0, sp
+ bl kasan_unpoison_task_stack_below
+#endif
mov r0, #0 @ return zero on success
ldmfd sp!, {r4 - r11, pc}
ENDPROC(cpu_resume_after_mmu)
--
2.18.0
On Fri, Dec 22, 2023 at 3:28 AM boy.wu <boy.wu@mediatek.com> wrote: > From: Boy Wu <boy.wu@mediatek.com> > > We found below OOB crash: (...) > We find the root cause of this OOB is that arm does not clear stale stack > poison in the case of cpuidle. > > This patch refer to arch/arm64/kernel/sleep.S to resolve this issue. > > Signed-off-by: Boy Wu <boy.wu@mediatek.com> With the commit context pointed out by Mark Rutland: Reviewed-by: Linus Walleij <linus.walleij@linaro.org> Yours, Linus Walleij
Hi, On Fri, Dec 22, 2023 at 10:27:41AM +0800, boy.wu wrote: > From: Boy Wu <boy.wu@mediatek.com> > > We found below OOB crash: > > [ 33.452494] ================================================================== > [ 33.453513] BUG: KASAN: stack-out-of-bounds in refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec > [ 33.454660] Write of size 164 at addr c1d03d30 by task swapper/0/0 > [ 33.455515] > [ 33.455767] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 6.1.25-mainline #1 > [ 33.456880] Hardware name: Generic DT based system > [ 33.457555] unwind_backtrace from show_stack+0x18/0x1c > [ 33.458326] show_stack from dump_stack_lvl+0x40/0x4c > [ 33.459072] dump_stack_lvl from print_report+0x158/0x4a4 > [ 33.459863] print_report from kasan_report+0x9c/0x148 > [ 33.460616] kasan_report from kasan_check_range+0x94/0x1a0 > [ 33.461424] kasan_check_range from memset+0x20/0x3c > [ 33.462157] memset from refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec > [ 33.463064] refresh_cpu_vm_stats.constprop.0 from tick_nohz_idle_stop_tick+0x180/0x53c > [ 33.464181] tick_nohz_idle_stop_tick from do_idle+0x264/0x354 > [ 33.465029] do_idle from cpu_startup_entry+0x20/0x24 > [ 33.465769] cpu_startup_entry from rest_init+0xf0/0xf4 > [ 33.466528] rest_init from arch_post_acpi_subsys_init+0x0/0x18 > [ 33.467397] > [ 33.467644] The buggy address belongs to stack of task swapper/0/0 > [ 33.468493] and is located at offset 112 in frame: > [ 33.469172] refresh_cpu_vm_stats.constprop.0+0x0/0x2ec > [ 33.469917] > [ 33.470165] This frame has 2 objects: > [ 33.470696] [32, 76) 'global_zone_diff' > [ 33.470729] [112, 276) 'global_node_diff' > [ 33.471294] > [ 33.472095] The buggy address belongs to the physical page: > [ 33.472862] page:3cd72da8 refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x41d03 > [ 33.473944] flags: 0x1000(reserved|zone=0) > [ 33.474565] raw: 00001000 ed741470 ed741470 00000000 00000000 00000000 ffffffff 00000001 > [ 33.475656] raw: 00000000 > [ 33.476050] page dumped because: kasan: bad access detected > [ 33.476816] > [ 33.477061] Memory state around the buggy address: > [ 33.477732] c1d03c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > [ 33.478630] c1d03c80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 00 > [ 33.479526] >c1d03d00: 00 04 f2 f2 f2 f2 00 00 00 00 00 00 f1 f1 f1 f1 > [ 33.480415] ^ > [ 33.481195] c1d03d80: 00 00 00 00 00 00 00 00 00 00 04 f3 f3 f3 f3 f3 > [ 33.482088] c1d03e00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 > [ 33.482978] ================================================================== > > We find the root cause of this OOB is that arm does not clear stale stack > poison in the case of cpuidle. > > This patch refer to arch/arm64/kernel/sleep.S to resolve this issue. > > Signed-off-by: Boy Wu <boy.wu@mediatek.com> It looks like you're specifically referring to what arm64 did in commit: 0d97e6d8024c71cc ("arm64: kasan: clear stale stack poison") Where the commit message explained the problem: | Functions which the compiler has instrumented for KASAN place poison on | the stack shadow upon entry and remove this poison prior to returning. | | In the case of cpuidle, CPUs exit the kernel a number of levels deep in | C code. Any instrumented functions on this critical path will leave | portions of the stack shadow poisoned. | | If CPUs lose context and return to the kernel via a cold path, we | restore a prior context saved in __cpu_suspend_enter are forgotten, and | we never remove the poison they placed in the stack shadow area by | functions calls between this and the actual exit of the kernel. | | Thus, (depending on stackframe layout) subsequent calls to instrumented | functions may hit this stale poison, resulting in (spurious) KASAN | splats to the console. | | To avoid this, clear any stale poison from the idle thread for a CPU | prior to bringing a CPU online. ... which we then extended to check for CONFIG_KASAN_STACK in commit: d56a9ef84bd0e1e8 ("kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK") If you can fold in the description above (i.e. cite commit 0d97e6d8024c71cc and a copy of its commit message): Reviewed-by: Mark Rutland <mark.rutland@arm.com> Mark. > --- > arch/arm/kernel/sleep.S | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S > index a86a1d4f3461..93afd1005b43 100644 > --- a/arch/arm/kernel/sleep.S > +++ b/arch/arm/kernel/sleep.S > @@ -127,6 +127,10 @@ cpu_resume_after_mmu: > instr_sync > #endif > bl cpu_init @ restore the und/abt/irq banked regs > +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) > + mov r0, sp > + bl kasan_unpoison_task_stack_below > +#endif > mov r0, #0 @ return zero on success > ldmfd sp!, {r4 - r11, pc} > ENDPROC(cpu_resume_after_mmu) > -- > 2.18.0 > >
On Tue, Apr 2, 2024 at 11:36 AM Mark Rutland <mark.rutland@arm.com> wrote: ... > It looks like you're specifically referring to what arm64 did in commit: > > 0d97e6d8024c71cc ("arm64: kasan: clear stale stack poison") > > Where the commit message explained the problem: > > | Functions which the compiler has instrumented for KASAN place poison on > | the stack shadow upon entry and remove this poison prior to returning. > | > | In the case of cpuidle, CPUs exit the kernel a number of levels deep in > | C code. Any instrumented functions on this critical path will leave > | portions of the stack shadow poisoned. > | > | If CPUs lose context and return to the kernel via a cold path, we > | restore a prior context saved in __cpu_suspend_enter are forgotten, and > | we never remove the poison they placed in the stack shadow area by > | functions calls between this and the actual exit of the kernel. > | > | Thus, (depending on stackframe layout) subsequent calls to instrumented > | functions may hit this stale poison, resulting in (spurious) KASAN > | splats to the console. > | > | To avoid this, clear any stale poison from the idle thread for a CPU > | prior to bringing a CPU online. > > ... which we then extended to check for CONFIG_KASAN_STACK in commit: > > d56a9ef84bd0e1e8 ("kasan, arm64: unpoison stack only with CONFIG_KASAN_STACK") > > If you can fold in the description above (i.e. cite commit 0d97e6d8024c71cc and > a copy of its commit message): > > Reviewed-by: Mark Rutland <mark.rutland@arm.com> > Agreed with the above, feel free to add: Acked-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Hi Russell: Kingly ping Thanks. Boy. On Fri, 2023-12-22 at 10:27 +0800, boy.wu wrote: > From: Boy Wu <boy.wu@mediatek.com> > > We found below OOB crash: > > [ 33.452494] > ================================================================== > [ 33.453513] BUG: KASAN: stack-out-of-bounds in > refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec > [ 33.454660] Write of size 164 at addr c1d03d30 by task swapper/0/0 > [ 33.455515] > [ 33.455767] CPU: 0 PID: 0 Comm: swapper/0 Tainted: > G O 6.1.25-mainline #1 > [ 33.456880] Hardware name: Generic DT based system > [ 33.457555] unwind_backtrace from show_stack+0x18/0x1c > [ 33.458326] show_stack from dump_stack_lvl+0x40/0x4c > [ 33.459072] dump_stack_lvl from print_report+0x158/0x4a4 > [ 33.459863] print_report from kasan_report+0x9c/0x148 > [ 33.460616] kasan_report from kasan_check_range+0x94/0x1a0 > [ 33.461424] kasan_check_range from memset+0x20/0x3c > [ 33.462157] memset from > refresh_cpu_vm_stats.constprop.0+0xcc/0x2ec > [ 33.463064] refresh_cpu_vm_stats.constprop.0 from > tick_nohz_idle_stop_tick+0x180/0x53c > [ 33.464181] tick_nohz_idle_stop_tick from do_idle+0x264/0x354 > [ 33.465029] do_idle from cpu_startup_entry+0x20/0x24 > [ 33.465769] cpu_startup_entry from rest_init+0xf0/0xf4 > [ 33.466528] rest_init from arch_post_acpi_subsys_init+0x0/0x18 > [ 33.467397] > [ 33.467644] The buggy address belongs to stack of task swapper/0/0 > [ 33.468493] and is located at offset 112 in frame: > [ 33.469172] refresh_cpu_vm_stats.constprop.0+0x0/0x2ec > [ 33.469917] > [ 33.470165] This frame has 2 objects: > [ 33.470696] [32, 76) 'global_zone_diff' > [ 33.470729] [112, 276) 'global_node_diff' > [ 33.471294] > [ 33.472095] The buggy address belongs to the physical page: > [ 33.472862] page:3cd72da8 refcount:1 mapcount:0 mapping:00000000 > index:0x0 pfn:0x41d03 > [ 33.473944] flags: 0x1000(reserved|zone=0) > [ 33.474565] raw: 00001000 ed741470 ed741470 00000000 00000000 > 00000000 ffffffff 00000001 > [ 33.475656] raw: 00000000 > [ 33.476050] page dumped because: kasan: bad access detected > [ 33.476816] > [ 33.477061] Memory state around the buggy address: > [ 33.477732] c1d03c00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > 00 00 > [ 33.478630] c1d03c80: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 > 00 00 > [ 33.479526] >c1d03d00: 00 04 f2 f2 f2 f2 00 00 00 00 00 00 f1 f1 > f1 f1 > [ 33.480415] ^ > [ 33.481195] c1d03d80: 00 00 00 00 00 00 00 00 00 00 04 f3 f3 f3 > f3 f3 > [ 33.482088] c1d03e00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 > 00 00 > [ 33.482978] > ================================================================== > > We find the root cause of this OOB is that arm does not clear stale > stack > poison in the case of cpuidle. > > This patch refer to arch/arm64/kernel/sleep.S to resolve this issue. > > Signed-off-by: Boy Wu <boy.wu@mediatek.com> > --- > arch/arm/kernel/sleep.S | 4 ++++ > 1 file changed, 4 insertions(+) > > diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S > index a86a1d4f3461..93afd1005b43 100644 > --- a/arch/arm/kernel/sleep.S > +++ b/arch/arm/kernel/sleep.S > @@ -127,6 +127,10 @@ cpu_resume_after_mmu: > instr_sync > #endif > bl cpu_init @ restore the und/abt/irq > banked regs > +#if defined(CONFIG_KASAN) && defined(CONFIG_KASAN_STACK) > + mov r0, sp > + bl kasan_unpoison_task_stack_below > +#endif > mov r0, #0 @ return zero on success > ldmfd sp!, {r4 - r11, pc} > ENDPROC(cpu_resume_after_mmu)
On Fri, Mar 29, 2024 at 03:17:39AM +0000, Boy Wu (吳勃誼) wrote: > Hi Russell: > > Kingly ping I'm afraid I know nowt about KASAN. It was added to ARM32 by others. I've no idea whether this is correct or not. Can we get someone who knows KASAN to review this? -- RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
On Sat, Mar 30, 2024 at 7:36 PM Russell King (Oracle) <linux@armlinux.org.uk> wrote: > On Fri, Mar 29, 2024 at 03:17:39AM +0000, Boy Wu (吳勃誼) wrote: > > Hi Russell: > > > > Kingly ping > > I'm afraid I know nowt about KASAN. It was added to ARM32 by others. > I've no idea whether this is correct or not. Can we get someone who > knows KASAN to review this? I rewrote the patches from Andrey, Abbot and Ard into the current form and I tend to keep an eye on it, I can add a MAINTAINERS entry for arch/arm/mm/kasan_init.c pointing to me and Andrey so we (hopefully) get CC:ed on these patches. get_maintainer.pl won't help in cases like this patch though :/ Yours, Linus Walleij
Hi Andrey, Could you please help review this patch? On Sat, 2024-03-30 at 18:36 +0000, Russell King (Oracle) wrote: > On Fri, Mar 29, 2024 at 03:17:39AM +0000, Boy Wu (吳勃誼) wrote: > > Hi Russell: > > > > Kingly ping > > I'm afraid I know nowt about KASAN. It was added to ARM32 by others. > I've no idea whether this is correct or not. Can we get someone who > knows KASAN to review this? > > -- > RMK's Patch system: https://www.armlinux.org.uk/developer/patches/ > FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
© 2016 - 2024 Red Hat, Inc.