From: David Woodhouse <dwmw@amazon.co.uk>
There are some failure modes which lead to triple-faults in the
relocate_kernel function, which is fairly much undebuggable for normal
mortals.
Adding a GDT in the relocate_kernel environment is step 1 towards being
able to catch faults and do something more useful.
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
arch/x86/kernel/relocate_kernel_64.S | 27 +++++++++++++++++++++++++++
1 file changed, 27 insertions(+)
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index af2cd06ff318..c62f03808f18 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -39,6 +39,18 @@ SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
+#ifdef CONFIG_KEXEC_DEBUG
+ .balign 16
+SYM_DATA_START_LOCAL(kexec_debug_gdt)
+ .word kexec_debug_gdt_end - kexec_debug_gdt - 1
+ .long 0
+ .word 0
+ .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
+ .quad 0x00af9a000000ffff /* __KERNEL_CS */
+ .quad 0x00cf92000000ffff /* __KERNEL_DS */
+SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)
+#endif /* CONFIG_KEXEC_DEBUG */
+
.section .text..relocate_kernel,"ax";
.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
@@ -115,6 +127,21 @@ SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* store the start address on the stack */
pushq %rdx
+#ifdef CONFIG_KEXEC_DEBUG
+ /* Create a GDTR (16 bits limit, 64 bits addr) on stack */
+ leaq kexec_debug_gdt(%rip), %rax
+ pushq %rax
+ pushw (%rax)
+
+ /* Load the GDT, put the stack back */
+ lgdt (%rsp)
+ addq $10, %rsp
+
+ /* Test that we can load segments */
+ movq %ds, %rax
+ movq %rax, %ds
+#endif /* CONFIG_KEXEC_DEBUG */
+
/*
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
* below.
--
2.47.0
* David Woodhouse <dwmw2@infradead.org> wrote: > From: David Woodhouse <dwmw@amazon.co.uk> > > There are some failure modes which lead to triple-faults in the > relocate_kernel function, which is fairly much undebuggable for normal > mortals. > > Adding a GDT in the relocate_kernel environment is step 1 towards being > able to catch faults and do something more useful. > > Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> > --- > arch/x86/kernel/relocate_kernel_64.S | 27 +++++++++++++++++++++++++++ > 1 file changed, 27 insertions(+) > > diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S > index af2cd06ff318..c62f03808f18 100644 > --- a/arch/x86/kernel/relocate_kernel_64.S > +++ b/arch/x86/kernel/relocate_kernel_64.S > @@ -39,6 +39,18 @@ SYM_DATA(kexec_pa_table_page, .quad 0) > SYM_DATA(kexec_pa_swap_page, .quad 0) > SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) > > +#ifdef CONFIG_KEXEC_DEBUG > + .balign 16 > +SYM_DATA_START_LOCAL(kexec_debug_gdt) > + .word kexec_debug_gdt_end - kexec_debug_gdt - 1 > + .long 0 > + .word 0 > + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ > + .quad 0x00af9a000000ffff /* __KERNEL_CS */ > + .quad 0x00cf92000000ffff /* __KERNEL_DS */ > +SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) > +#endif /* CONFIG_KEXEC_DEBUG */ Yeah, so is there any reason (other than paranoia) why the early-early GDT and IDT shouldn't be unconditional? There's many ways for such an approach to bitrot, it's much better to not hide it behind a default-disabled debug option... Some of the other bits, like the hard-coded serial debugging assumptions, probably need to be behind the debug option - but much of the new debug mechanism looks safe and generic and can be always-on, IMHO. This would also throw regressions back into the face of whoever manages to introduce them, ideally. ;-) Thanks, Ingo
On 23 February 2025 09:53:07 GMT, Ingo Molnar <mingo@kernel.org> wrote: > >* David Woodhouse <dwmw2@infradead.org> wrote: > >> From: David Woodhouse <dwmw@amazon.co.uk> >> >> There are some failure modes which lead to triple-faults in the >> relocate_kernel function, which is fairly much undebuggable for normal >> mortals. >> >> Adding a GDT in the relocate_kernel environment is step 1 towards being >> able to catch faults and do something more useful. >> >> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> >> --- >> arch/x86/kernel/relocate_kernel_64.S | 27 +++++++++++++++++++++++++++ >> 1 file changed, 27 insertions(+) >> >> diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S >> index af2cd06ff318..c62f03808f18 100644 >> --- a/arch/x86/kernel/relocate_kernel_64.S >> +++ b/arch/x86/kernel/relocate_kernel_64.S >> @@ -39,6 +39,18 @@ SYM_DATA(kexec_pa_table_page, .quad 0) >> SYM_DATA(kexec_pa_swap_page, .quad 0) >> SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0) >> >> +#ifdef CONFIG_KEXEC_DEBUG >> + .balign 16 >> +SYM_DATA_START_LOCAL(kexec_debug_gdt) >> + .word kexec_debug_gdt_end - kexec_debug_gdt - 1 >> + .long 0 >> + .word 0 >> + .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ >> + .quad 0x00af9a000000ffff /* __KERNEL_CS */ >> + .quad 0x00cf92000000ffff /* __KERNEL_DS */ >> +SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end) >> +#endif /* CONFIG_KEXEC_DEBUG */ > >Yeah, so is there any reason (other than paranoia) why the early-early >GDT and IDT shouldn't be unconditional? There's many ways for such an >approach to bitrot, it's much better to not hide it behind a >default-disabled debug option... > >Some of the other bits, like the hard-coded serial debugging >assumptions, probably need to be behind the debug option - but much of >the new debug mechanism looks safe and generic and can be always-on, >IMHO. > >This would also throw regressions back into the face of whoever manages >to introduce them, ideally. ;-) > >Thanks, > > Ingo Makes sense to me. I was just trying to be as unobtrusive as possible. In a test branch where I was trying to fix up the objtool vs. CFI pain, I did move the IDT/GDT setup entirely into the ASM code and remove the C code which clears them (before the call into relocate_kernel() which might now trap if we remove the __nocfi hack). I never did get objtool to tolerate both clang and GCC builds though. I think even the serial output (tied as it is to earlyprintk setup) could reasonably be enabled by default too.
© 2016 - 2026 Red Hat, Inc.