arch/x86/hyperv/hv_crash.c | 79 ++++++++++---------- 1 file changed, 41 insertions(+), 38 deletions(-)
hv_crash_c_entry() is a C function that is entered without a stack,
and this is only allowed for functions that have the __naked attribute,
which informs the compiler that it must not emit the usual prologue and
epilogue or emit any other kind of instrumentation that relies on a
stack frame.
So split up the function, and set the __naked attribute on the initial
part that sets up the stack, GDT, IDT and other pieces that are needed
for ordinary C execution. Given that function calls are not permitted
either, use the existing long return coded in an asm() block to call the
second part of the function, which is an ordinary function that is
permitted to call other functions as usual.
Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: Uros Bizjak <ubizjak@gmail.com>
Cc: Andrew Cooper <andrew.cooper3@citrix.com>
Cc: linux-hyperv@vger.kernel.org
Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
v2: apply some asm tweaks suggested by Uros and Andrew
arch/x86/hyperv/hv_crash.c | 79 ++++++++++----------
1 file changed, 41 insertions(+), 38 deletions(-)
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
index 92da1b4f2e73..1c0965eb346e 100644
--- a/arch/x86/hyperv/hv_crash.c
+++ b/arch/x86/hyperv/hv_crash.c
@@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
cpu_relax();
}
-/* This cannot be inlined as it needs stack */
-static noinline __noclone void hv_crash_restore_tss(void)
+static void hv_crash_restore_tss(void)
{
load_TR_desc();
}
-/* This cannot be inlined as it needs stack */
-static noinline void hv_crash_clear_kernpt(void)
+static void hv_crash_clear_kernpt(void)
{
pgd_t *pgd;
p4d_t *p4d;
@@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
native_p4d_clear(p4d);
}
+
+static void __noreturn hv_crash_handle(void)
+{
+ hv_crash_restore_tss();
+ hv_crash_clear_kernpt();
+
+ /* we are now fully in devirtualized normal kernel mode */
+ __crash_kexec(NULL);
+
+ hv_panic_timeout_reboot();
+}
+
+/*
+ * __naked functions do not permit function calls, not even to __always_inline
+ * functions that only contain asm() blocks themselves. So use a macro instead.
+ */
+#define hv_wrmsr(msr, val) \
+ asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
+
/*
* This is the C entry point from the asm glue code after the disable hypercall.
* We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
@@ -133,49 +150,35 @@ static noinline void hv_crash_clear_kernpt(void)
* available. We restore kernel GDT, and rest of the context, and continue
* to kexec.
*/
-static asmlinkage void __noreturn hv_crash_c_entry(void)
+static void __naked hv_crash_c_entry(void)
{
- struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
-
/* first thing, restore kernel gdt */
- native_load_gdt(&ctxt->gdtr);
+ asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
- asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
- asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
+ asm volatile("movw %0, %%ss" : : "m"(hv_crash_ctxt.ss));
+ asm volatile("movq %0, %%rsp" : : "m"(hv_crash_ctxt.rsp));
- asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds));
- asm volatile("movw %%ax, %%es" : : "a"(ctxt->es));
- asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs));
- asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs));
+ asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds));
+ asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es));
+ asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs));
+ asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs));
- native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat);
- asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0));
+ hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat);
+ asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0));
- asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8));
- asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4));
- asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4));
+ asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8));
+ asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4));
+ asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr4));
- native_load_idt(&ctxt->idtr);
- native_wrmsrq(MSR_GS_BASE, ctxt->gsbase);
- native_wrmsrq(MSR_EFER, ctxt->efer);
+ asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr));
+ hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase);
+ hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer);
/* restore the original kernel CS now via far return */
- asm volatile("movzwq %0, %%rax\n\t"
- "pushq %%rax\n\t"
- "pushq $1f\n\t"
- "lretq\n\t"
- "1:nop\n\t" : : "m"(ctxt->cs) : "rax");
-
- /* We are in asmlinkage without stack frame, hence make C function
- * calls which will buy stack frames.
- */
- hv_crash_restore_tss();
- hv_crash_clear_kernpt();
-
- /* we are now fully in devirtualized normal kernel mode */
- __crash_kexec(NULL);
-
- hv_panic_timeout_reboot();
+ asm volatile("pushq %q0\n\t"
+ "pushq %q1\n\t"
+ "lretq"
+ :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle));
}
/* Tell gcc we are using lretq long jump in the above function intentionally */
STACK_FRAME_NON_STANDARD(hv_crash_c_entry);
--
2.47.3
On Fri, Feb 27, 2026 at 11:40 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> hv_crash_c_entry() is a C function that is entered without a stack,
> and this is only allowed for functions that have the __naked attribute,
> which informs the compiler that it must not emit the usual prologue and
> epilogue or emit any other kind of instrumentation that relies on a
> stack frame.
>
> So split up the function, and set the __naked attribute on the initial
> part that sets up the stack, GDT, IDT and other pieces that are needed
> for ordinary C execution. Given that function calls are not permitted
> either, use the existing long return coded in an asm() block to call the
> second part of the function, which is an ordinary function that is
> permitted to call other functions as usual.
>
> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: linux-hyperv@vger.kernel.org
> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> v2: apply some asm tweaks suggested by Uros and Andrew
>
> arch/x86/hyperv/hv_crash.c | 79 ++++++++++----------
> 1 file changed, 41 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
> index 92da1b4f2e73..1c0965eb346e 100644
> --- a/arch/x86/hyperv/hv_crash.c
> +++ b/arch/x86/hyperv/hv_crash.c
> @@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
> cpu_relax();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline __noclone void hv_crash_restore_tss(void)
> +static void hv_crash_restore_tss(void)
> {
> load_TR_desc();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline void hv_crash_clear_kernpt(void)
> +static void hv_crash_clear_kernpt(void)
> {
> pgd_t *pgd;
> p4d_t *p4d;
> @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
> native_p4d_clear(p4d);
> }
>
> +
> +static void __noreturn hv_crash_handle(void)
> +{
> + hv_crash_restore_tss();
> + hv_crash_clear_kernpt();
> +
> + /* we are now fully in devirtualized normal kernel mode */
> + __crash_kexec(NULL);
> +
> + hv_panic_timeout_reboot();
> +}
> +
> +/*
> + * __naked functions do not permit function calls, not even to __always_inline
> + * functions that only contain asm() blocks themselves. So use a macro instead.
> + */
> +#define hv_wrmsr(msr, val) \
> + asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
This one should be defined as "asm volatile", otherwise the compiler
will remove it (it has no outputs used later in the code!). Also, it
should be defined as "asm volatile" when it is important that the insn
stays where it is, relative to other "asm volatile"s. Otherwise, the
compiler is free to schedule other insns, including other "asm
volatile"s around . Since this macro is also used to update
MSR_GS_BASE (so it affects memory in case of %gs prefixed access),
"memory" clobber should remain).
Uros.
On Fri, Feb 27, 2026 at 11:40 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> hv_crash_c_entry() is a C function that is entered without a stack,
> and this is only allowed for functions that have the __naked attribute,
> which informs the compiler that it must not emit the usual prologue and
> epilogue or emit any other kind of instrumentation that relies on a
> stack frame.
>
> So split up the function, and set the __naked attribute on the initial
> part that sets up the stack, GDT, IDT and other pieces that are needed
> for ordinary C execution. Given that function calls are not permitted
> either, use the existing long return coded in an asm() block to call the
> second part of the function, which is an ordinary function that is
> permitted to call other functions as usual.
>
> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: linux-hyperv@vger.kernel.org
> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> v2: apply some asm tweaks suggested by Uros and Andrew
>
> arch/x86/hyperv/hv_crash.c | 79 ++++++++++----------
> 1 file changed, 41 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
> index 92da1b4f2e73..1c0965eb346e 100644
> --- a/arch/x86/hyperv/hv_crash.c
> +++ b/arch/x86/hyperv/hv_crash.c
> @@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
> cpu_relax();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline __noclone void hv_crash_restore_tss(void)
> +static void hv_crash_restore_tss(void)
> {
> load_TR_desc();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline void hv_crash_clear_kernpt(void)
> +static void hv_crash_clear_kernpt(void)
> {
> pgd_t *pgd;
> p4d_t *p4d;
> @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
> native_p4d_clear(p4d);
> }
>
> +
> +static void __noreturn hv_crash_handle(void)
> +{
> + hv_crash_restore_tss();
> + hv_crash_clear_kernpt();
> +
> + /* we are now fully in devirtualized normal kernel mode */
> + __crash_kexec(NULL);
> +
> + hv_panic_timeout_reboot();
> +}
> +
> +/*
> + * __naked functions do not permit function calls, not even to __always_inline
> + * functions that only contain asm() blocks themselves. So use a macro instead.
> + */
> +#define hv_wrmsr(msr, val) \
> + asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
> +
> /*
> * This is the C entry point from the asm glue code after the disable hypercall.
> * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
> @@ -133,49 +150,35 @@ static noinline void hv_crash_clear_kernpt(void)
> * available. We restore kernel GDT, and rest of the context, and continue
> * to kexec.
> */
> -static asmlinkage void __noreturn hv_crash_c_entry(void)
> +static void __naked hv_crash_c_entry(void)
> {
> - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
> -
> /* first thing, restore kernel gdt */
> - native_load_gdt(&ctxt->gdtr);
> + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
>
> - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
> - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
> + asm volatile("movw %0, %%ss" : : "m"(hv_crash_ctxt.ss));
> + asm volatile("movq %0, %%rsp" : : "m"(hv_crash_ctxt.rsp));
Maybe this part should be written together as:
asm volatile("movw %0, %%ss\n\t"
"movq %1, %%rsp"
:: "m"(hv_crash_ctxt.ss), "m"(hv_crash_ctxt,rsp));
This way, the stack register update is guaranteed to execute in the
stack segment shadow. Otherwise, the compiler is free to insert some
unrelated instruction in between. It probably won't happen in practice
in this case, but the compiler can be quite creative with moving asm
arguments around.
Uros.
On Fri, Feb 27, 2026 at 11:40 PM Ard Biesheuvel <ardb@kernel.org> wrote:
>
> hv_crash_c_entry() is a C function that is entered without a stack,
> and this is only allowed for functions that have the __naked attribute,
> which informs the compiler that it must not emit the usual prologue and
> epilogue or emit any other kind of instrumentation that relies on a
> stack frame.
>
> So split up the function, and set the __naked attribute on the initial
> part that sets up the stack, GDT, IDT and other pieces that are needed
> for ordinary C execution. Given that function calls are not permitted
> either, use the existing long return coded in an asm() block to call the
> second part of the function, which is an ordinary function that is
> permitted to call other functions as usual.
>
> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: linux-hyperv@vger.kernel.org
> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> v2: apply some asm tweaks suggested by Uros and Andrew
Acked by: Uros Bizjak <ubizjak@gmail.com>
FYI: GCC by design inserts ud2 at the end of x86 naked functions. This
is intended to help debugging in case someone forgets ret/jmp, so
program execution does not wander into whatever follows the function.
IIRC, when ud2 follows ret, code analyzers may report "unreachable
code" warnings. I don't know if this is still the case, but
nevertheless this should be considered an important "safety net"
feature of the compiler.
Uros.
On 27/02/2026 10:40 pm, Ard Biesheuvel wrote:
> hv_crash_c_entry() is a C function that is entered without a stack,
> and this is only allowed for functions that have the __naked attribute,
> which informs the compiler that it must not emit the usual prologue and
> epilogue or emit any other kind of instrumentation that relies on a
> stack frame.
>
> So split up the function, and set the __naked attribute on the initial
> part that sets up the stack, GDT, IDT and other pieces that are needed
> for ordinary C execution. Given that function calls are not permitted
> either, use the existing long return coded in an asm() block to call the
> second part of the function, which is an ordinary function that is
> permitted to call other functions as usual.
>
> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: linux-hyperv@vger.kernel.org
> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> v2: apply some asm tweaks suggested by Uros and Andrew
Looking better. FWIW, Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com> (asm parts, not hv parts).
Two minor suggestions, probably left to the maintainers digression.
> diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
> index 92da1b4f2e73..1c0965eb346e 100644
> --- a/arch/x86/hyperv/hv_crash.c
> +++ b/arch/x86/hyperv/hv_crash.c
> @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
> native_p4d_clear(p4d);
> }
>
> +
> +static void __noreturn hv_crash_handle(void)
> +{
> + hv_crash_restore_tss();
> + hv_crash_clear_kernpt();
> +
> + /* we are now fully in devirtualized normal kernel mode */
> + __crash_kexec(NULL);
> +
> + hv_panic_timeout_reboot();
> +}
> +
> +/*
> + * __naked functions do not permit function calls, not even to __always_inline
> + * functions that only contain asm() blocks themselves. So use a macro instead.
> + */
> +#define hv_wrmsr(msr, val) \
> + asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
How about naming it hv_crash_wrmsr()?
It's important that this wrapper is not reused elsewhere. Elsewhere
should use the regular MSR accessors.
> +
> /*
> * This is the C entry point from the asm glue code after the disable hypercall.
> * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
> @@ -133,49 +150,35 @@ static noinline void hv_crash_clear_kernpt(void)
> * available. We restore kernel GDT, and rest of the context, and continue
> * to kexec.
> */
> -static asmlinkage void __noreturn hv_crash_c_entry(void)
> +static void __naked hv_crash_c_entry(void)
> {
> - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
> -
> /* first thing, restore kernel gdt */
> - native_load_gdt(&ctxt->gdtr);
> + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
>
> - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
> - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
> + asm volatile("movw %0, %%ss" : : "m"(hv_crash_ctxt.ss));
> + asm volatile("movq %0, %%rsp" : : "m"(hv_crash_ctxt.rsp));
>
> - asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds));
> - asm volatile("movw %%ax, %%es" : : "a"(ctxt->es));
> - asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs));
> - asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs));
> + asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds));
> + asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es));
> + asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs));
> + asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs));
>
> - native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat);
> - asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0));
> + hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat);
> + asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0));
>
> - asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8));
> - asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4));
> - asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4));
> + asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8));
> + asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4));
> + asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr4));
>
> - native_load_idt(&ctxt->idtr);
> - native_wrmsrq(MSR_GS_BASE, ctxt->gsbase);
> - native_wrmsrq(MSR_EFER, ctxt->efer);
> + asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr));
> + hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase);
> + hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer);
>
> /* restore the original kernel CS now via far return */
> - asm volatile("movzwq %0, %%rax\n\t"
> - "pushq %%rax\n\t"
> - "pushq $1f\n\t"
> - "lretq\n\t"
> - "1:nop\n\t" : : "m"(ctxt->cs) : "rax");
> -
> - /* We are in asmlinkage without stack frame, hence make C function
> - * calls which will buy stack frames.
> - */
> - hv_crash_restore_tss();
> - hv_crash_clear_kernpt();
> -
> - /* we are now fully in devirtualized normal kernel mode */
> - __crash_kexec(NULL);
> -
> - hv_panic_timeout_reboot();
> + asm volatile("pushq %q0\n\t"
> + "pushq %q1\n\t"
> + "lretq"
> + :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle));
> }
> /* Tell gcc we are using lretq long jump in the above function intentionally */
> STACK_FRAME_NON_STANDARD(hv_crash_c_entry);
How about fixing the comment to say objtool? It's not GCC which cares.
~Andrew
On 2/27/26 14:40, Ard Biesheuvel wrote:
> hv_crash_c_entry() is a C function that is entered without a stack,
> and this is only allowed for functions that have the __naked attribute,
> which informs the compiler that it must not emit the usual prologue and
> epilogue or emit any other kind of instrumentation that relies on a
> stack frame.
>
> So split up the function, and set the __naked attribute on the initial
> part that sets up the stack, GDT, IDT and other pieces that are needed
> for ordinary C execution. Given that function calls are not permitted
> either, use the existing long return coded in an asm() block to call the
> second part of the function, which is an ordinary function that is
> permitted to call other functions as usual.
Thank you for the patch. I'll start a build on the side and test it
out and let you know.
Thanks,
-Mukesh
> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Uros Bizjak <ubizjak@gmail.com>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: linux-hyperv@vger.kernel.org
> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
> v2: apply some asm tweaks suggested by Uros and Andrew
>
> arch/x86/hyperv/hv_crash.c | 79 ++++++++++----------
> 1 file changed, 41 insertions(+), 38 deletions(-)
>
> diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
> index 92da1b4f2e73..1c0965eb346e 100644
> --- a/arch/x86/hyperv/hv_crash.c
> +++ b/arch/x86/hyperv/hv_crash.c
> @@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
> cpu_relax();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline __noclone void hv_crash_restore_tss(void)
> +static void hv_crash_restore_tss(void)
> {
> load_TR_desc();
> }
>
> -/* This cannot be inlined as it needs stack */
> -static noinline void hv_crash_clear_kernpt(void)
> +static void hv_crash_clear_kernpt(void)
> {
> pgd_t *pgd;
> p4d_t *p4d;
> @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
> native_p4d_clear(p4d);
> }
>
> +
> +static void __noreturn hv_crash_handle(void)
> +{
> + hv_crash_restore_tss();
> + hv_crash_clear_kernpt();
> +
> + /* we are now fully in devirtualized normal kernel mode */
> + __crash_kexec(NULL);
> +
> + hv_panic_timeout_reboot();
> +}
> +
> +/*
> + * __naked functions do not permit function calls, not even to __always_inline
> + * functions that only contain asm() blocks themselves. So use a macro instead.
> + */
> +#define hv_wrmsr(msr, val) \
> + asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
> +
> /*
> * This is the C entry point from the asm glue code after the disable hypercall.
> * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
> @@ -133,49 +150,35 @@ static noinline void hv_crash_clear_kernpt(void)
> * available. We restore kernel GDT, and rest of the context, and continue
> * to kexec.
> */
> -static asmlinkage void __noreturn hv_crash_c_entry(void)
> +static void __naked hv_crash_c_entry(void)
> {
> - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
> -
> /* first thing, restore kernel gdt */
> - native_load_gdt(&ctxt->gdtr);
> + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
>
> - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
> - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
> + asm volatile("movw %0, %%ss" : : "m"(hv_crash_ctxt.ss));
> + asm volatile("movq %0, %%rsp" : : "m"(hv_crash_ctxt.rsp));
>
> - asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds));
> - asm volatile("movw %%ax, %%es" : : "a"(ctxt->es));
> - asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs));
> - asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs));
> + asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds));
> + asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es));
> + asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs));
> + asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs));
>
> - native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat);
> - asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0));
> + hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat);
> + asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0));
>
> - asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8));
> - asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4));
> - asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4));
> + asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8));
> + asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4));
> + asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr4));
>
> - native_load_idt(&ctxt->idtr);
> - native_wrmsrq(MSR_GS_BASE, ctxt->gsbase);
> - native_wrmsrq(MSR_EFER, ctxt->efer);
> + asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr));
> + hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase);
> + hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer);
>
> /* restore the original kernel CS now via far return */
> - asm volatile("movzwq %0, %%rax\n\t"
> - "pushq %%rax\n\t"
> - "pushq $1f\n\t"
> - "lretq\n\t"
> - "1:nop\n\t" : : "m"(ctxt->cs) : "rax");
> -
> - /* We are in asmlinkage without stack frame, hence make C function
> - * calls which will buy stack frames.
> - */
> - hv_crash_restore_tss();
> - hv_crash_clear_kernpt();
> -
> - /* we are now fully in devirtualized normal kernel mode */
> - __crash_kexec(NULL);
> -
> - hv_panic_timeout_reboot();
> + asm volatile("pushq %q0\n\t"
> + "pushq %q1\n\t"
> + "lretq"
> + :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle));
> }
> /* Tell gcc we are using lretq long jump in the above function intentionally */
> STACK_FRAME_NON_STANDARD(hv_crash_c_entry);
On 2/27/26 15:03, Mukesh R wrote:
> On 2/27/26 14:40, Ard Biesheuvel wrote:
>> hv_crash_c_entry() is a C function that is entered without a stack,
>> and this is only allowed for functions that have the __naked attribute,
>> which informs the compiler that it must not emit the usual prologue and
>> epilogue or emit any other kind of instrumentation that relies on a
>> stack frame.
>>
>> So split up the function, and set the __naked attribute on the initial
>> part that sets up the stack, GDT, IDT and other pieces that are needed
>> for ordinary C execution. Given that function calls are not permitted
>> either, use the existing long return coded in an asm() block to call the
>> second part of the function, which is an ordinary function that is
>> permitted to call other functions as usual.
>
> Thank you for the patch. I'll start a build on the side and test it
> out and let you know.
Well, never that simple. I am able to generate cores, both before and
after the patch, but the crash command hangs on the vmcore (with correct
vmlinux). But, since the kexec happened, I think it is fair to say that
the patch works. With that:
Reviewed-by: Mukesh R <mrathor@linux.microsoft.com>
However, I did notice a pre-exising cut-n-paste oopsie:
asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr4)); <== cr2, not cr4
So, if you happen to do another churn, feel free to fix it. Otherwise,
no worries, I'll submit another patch.
Thanks for all your help.
-Mukesh
> Thanks,
> -Mukesh
>
>
>
>> Cc: Mukesh Rathor <mrathor@linux.microsoft.com>
>> Cc: Wei Liu <wei.liu@kernel.org>
>> Cc: Uros Bizjak <ubizjak@gmail.com>
>> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
>> Cc: linux-hyperv@vger.kernel.org
>> Fixes: 94212d34618c ("x86/hyperv: Implement hypervisor RAM collection into vmcore")
>> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
>> ---
>> v2: apply some asm tweaks suggested by Uros and Andrew
>>
>> arch/x86/hyperv/hv_crash.c | 79 ++++++++++----------
>> 1 file changed, 41 insertions(+), 38 deletions(-)
>>
>> diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
>> index 92da1b4f2e73..1c0965eb346e 100644
>> --- a/arch/x86/hyperv/hv_crash.c
>> +++ b/arch/x86/hyperv/hv_crash.c
>> @@ -107,14 +107,12 @@ static void __noreturn hv_panic_timeout_reboot(void)
>> cpu_relax();
>> }
>> -/* This cannot be inlined as it needs stack */
>> -static noinline __noclone void hv_crash_restore_tss(void)
>> +static void hv_crash_restore_tss(void)
>> {
>> load_TR_desc();
>> }
>> -/* This cannot be inlined as it needs stack */
>> -static noinline void hv_crash_clear_kernpt(void)
>> +static void hv_crash_clear_kernpt(void)
>> {
>> pgd_t *pgd;
>> p4d_t *p4d;
>> @@ -125,6 +123,25 @@ static noinline void hv_crash_clear_kernpt(void)
>> native_p4d_clear(p4d);
>> }
>> +
>> +static void __noreturn hv_crash_handle(void)
>> +{
>> + hv_crash_restore_tss();
>> + hv_crash_clear_kernpt();
>> +
>> + /* we are now fully in devirtualized normal kernel mode */
>> + __crash_kexec(NULL);
>> +
>> + hv_panic_timeout_reboot();
>> +}
>> +
>> +/*
>> + * __naked functions do not permit function calls, not even to __always_inline
>> + * functions that only contain asm() blocks themselves. So use a macro instead.
>> + */
>> +#define hv_wrmsr(msr, val) \
>> + asm("wrmsr" :: "c"(msr), "a"((u32)val), "d"((u32)(val >> 32)) : "memory")
>> +
>> /*
>> * This is the C entry point from the asm glue code after the disable hypercall.
>> * We enter here in IA32-e long mode, ie, full 64bit mode running on kernel
>> @@ -133,49 +150,35 @@ static noinline void hv_crash_clear_kernpt(void)
>> * available. We restore kernel GDT, and rest of the context, and continue
>> * to kexec.
>> */
>> -static asmlinkage void __noreturn hv_crash_c_entry(void)
>> +static void __naked hv_crash_c_entry(void)
>> {
>> - struct hv_crash_ctxt *ctxt = &hv_crash_ctxt;
>> -
>> /* first thing, restore kernel gdt */
>> - native_load_gdt(&ctxt->gdtr);
>> + asm volatile("lgdt %0" : : "m" (hv_crash_ctxt.gdtr));
>> - asm volatile("movw %%ax, %%ss" : : "a"(ctxt->ss));
>> - asm volatile("movq %0, %%rsp" : : "m"(ctxt->rsp));
>> + asm volatile("movw %0, %%ss" : : "m"(hv_crash_ctxt.ss));
>> + asm volatile("movq %0, %%rsp" : : "m"(hv_crash_ctxt.rsp));
>> - asm volatile("movw %%ax, %%ds" : : "a"(ctxt->ds));
>> - asm volatile("movw %%ax, %%es" : : "a"(ctxt->es));
>> - asm volatile("movw %%ax, %%fs" : : "a"(ctxt->fs));
>> - asm volatile("movw %%ax, %%gs" : : "a"(ctxt->gs));
>> + asm volatile("movw %0, %%ds" : : "m"(hv_crash_ctxt.ds));
>> + asm volatile("movw %0, %%es" : : "m"(hv_crash_ctxt.es));
>> + asm volatile("movw %0, %%fs" : : "m"(hv_crash_ctxt.fs));
>> + asm volatile("movw %0, %%gs" : : "m"(hv_crash_ctxt.gs));
>> - native_wrmsrq(MSR_IA32_CR_PAT, ctxt->pat);
>> - asm volatile("movq %0, %%cr0" : : "r"(ctxt->cr0));
>> + hv_wrmsr(MSR_IA32_CR_PAT, hv_crash_ctxt.pat);
>> + asm volatile("movq %0, %%cr0" : : "r"(hv_crash_ctxt.cr0));
>> - asm volatile("movq %0, %%cr8" : : "r"(ctxt->cr8));
>> - asm volatile("movq %0, %%cr4" : : "r"(ctxt->cr4));
>> - asm volatile("movq %0, %%cr2" : : "r"(ctxt->cr4));
>> + asm volatile("movq %0, %%cr8" : : "r"(hv_crash_ctxt.cr8));
>> + asm volatile("movq %0, %%cr4" : : "r"(hv_crash_ctxt.cr4));
>> + asm volatile("movq %0, %%cr2" : : "r"(hv_crash_ctxt.cr4));
>> - native_load_idt(&ctxt->idtr);
>> - native_wrmsrq(MSR_GS_BASE, ctxt->gsbase);
>> - native_wrmsrq(MSR_EFER, ctxt->efer);
>> + asm volatile("lidt %0" : : "m" (hv_crash_ctxt.idtr));
>> + hv_wrmsr(MSR_GS_BASE, hv_crash_ctxt.gsbase);
>> + hv_wrmsr(MSR_EFER, hv_crash_ctxt.efer);
>> /* restore the original kernel CS now via far return */
>> - asm volatile("movzwq %0, %%rax\n\t"
>> - "pushq %%rax\n\t"
>> - "pushq $1f\n\t"
>> - "lretq\n\t"
>> - "1:nop\n\t" : : "m"(ctxt->cs) : "rax");
>> -
>> - /* We are in asmlinkage without stack frame, hence make C function
>> - * calls which will buy stack frames.
>> - */
>> - hv_crash_restore_tss();
>> - hv_crash_clear_kernpt();
>> -
>> - /* we are now fully in devirtualized normal kernel mode */
>> - __crash_kexec(NULL);
>> -
>> - hv_panic_timeout_reboot();
>> + asm volatile("pushq %q0\n\t"
>> + "pushq %q1\n\t"
>> + "lretq"
>> + :: "r"(hv_crash_ctxt.cs), "r"(hv_crash_handle));
>> }
>> /* Tell gcc we are using lretq long jump in the above function intentionally */
>> STACK_FRAME_NON_STANDARD(hv_crash_c_entry);
>
© 2016 - 2026 Red Hat, Inc.