When a root partition running on MSHV is powered off, the default
behavior is to write ACPI registers to power-off. However, this ACPI
write is intercepted by MSHV and will result in a Machine Check
Exception(MCE).
The root partition eventually panics with a trace similar to:
[ 81.306348] reboot: Power down
[ 81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
[ 81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
[ 81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
[ 81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
[ 81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
[ 81.314717] Kernel panic - not syncing: Fatal machine check
To correctly shutdown a root partition running on MSHV hypervisor, sleep
state information must be configured within the hypervsior. Later, the
HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
the shutdown sequence.
The previous patch configures the sleep state information and this patch
invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
partition.
Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
---
arch/x86/include/asm/mshyperv.h | 2 ++
arch/x86/kernel/cpu/mshyperv.c | 2 ++
drivers/hv/mshv_common.c | 18 ++++++++++++++++++
3 files changed, 22 insertions(+)
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 166053df0484..4c22f3257368 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -183,9 +183,11 @@ void hv_apic_init(void);
void __init hv_init_spinlocks(void);
bool hv_vcpu_is_preempted(int vcpu);
void hv_sleep_notifiers_register(void);
+void hv_machine_power_off(void);
#else
static inline void hv_apic_init(void) {}
static inline void hv_sleep_notifiers_register(void) {};
+static inline void hv_machine_power_off(void) {};
#endif
struct irq_domain *hv_create_pci_msi_domain(void);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index fac9953a72ef..579fb2c64cfd 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -621,6 +621,8 @@ static void __init ms_hyperv_init_platform(void)
#endif
#if IS_ENABLED(CONFIG_HYPERV)
+ if (hv_root_partition())
+ machine_ops.power_off = hv_machine_power_off;
#if defined(CONFIG_KEXEC_CORE)
machine_ops.shutdown = hv_machine_shutdown;
#endif
diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
index f1d4e81107ee..28905e3ed9c0 100644
--- a/drivers/hv/mshv_common.c
+++ b/drivers/hv/mshv_common.c
@@ -217,4 +217,22 @@ void hv_sleep_notifiers_register(void)
pr_err("%s: cannot register reboot notifier %d\n", __func__,
ret);
}
+
+/*
+ * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
+ * This call does not return if successful.
+ */
+void hv_machine_power_off(void)
+{
+ unsigned long flags;
+ struct hv_input_enter_sleep_state *in;
+
+ local_irq_save(flags);
+ in = *this_cpu_ptr(hyperv_pcpu_input_arg);
+ in->sleep_state = HV_SLEEP_STATE_S5;
+
+ (void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
+ local_irq_restore(flags);
+
+}
#endif
--
2.51.0
On 11/26/2025 1:49 PM, Praveen K Paladugu wrote:
> When a root partition running on MSHV is powered off, the default
> behavior is to write ACPI registers to power-off. However, this ACPI
> write is intercepted by MSHV and will result in a Machine Check
> Exception(MCE).
>
> The root partition eventually panics with a trace similar to:
>
> [ 81.306348] reboot: Power down
> [ 81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
> [ 81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
> [ 81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
> [ 81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
> [ 81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
> [ 81.314717] Kernel panic - not syncing: Fatal machine check
>
> To correctly shutdown a root partition running on MSHV hypervisor, sleep
> state information must be configured within the hypervsior. Later, the
> HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
> the shutdown sequence.
>
> The previous patch configures the sleep state information and this patch
> invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
> partition.
>
Avoid statements like "The previous patch", since these patches may not
always follow each other directly once pulled into other trees, ported, etc.
You could explicitly mention the dependency on calling
hv_sleep_notifiers_register() before HVCALL_ENTER_SLEEP_STATE will work
correctly.
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
> Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
> Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
> ---
> arch/x86/include/asm/mshyperv.h | 2 ++
> arch/x86/kernel/cpu/mshyperv.c | 2 ++
> drivers/hv/mshv_common.c | 18 ++++++++++++++++++
> 3 files changed, 22 insertions(+)
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 166053df0484..4c22f3257368 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -183,9 +183,11 @@ void hv_apic_init(void);
> void __init hv_init_spinlocks(void);
> bool hv_vcpu_is_preempted(int vcpu);
> void hv_sleep_notifiers_register(void);
> +void hv_machine_power_off(void);
> #else
> static inline void hv_apic_init(void) {}
> static inline void hv_sleep_notifiers_register(void) {};
> +static inline void hv_machine_power_off(void) {};
> #endif
>
> struct irq_domain *hv_create_pci_msi_domain(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index fac9953a72ef..579fb2c64cfd 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -621,6 +621,8 @@ static void __init ms_hyperv_init_platform(void)
> #endif
>
> #if IS_ENABLED(CONFIG_HYPERV)
> + if (hv_root_partition())
> + machine_ops.power_off = hv_machine_power_off;
> #if defined(CONFIG_KEXEC_CORE)
> machine_ops.shutdown = hv_machine_shutdown;
> #endif
> diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
> index f1d4e81107ee..28905e3ed9c0 100644
> --- a/drivers/hv/mshv_common.c
> +++ b/drivers/hv/mshv_common.c
> @@ -217,4 +217,22 @@ void hv_sleep_notifiers_register(void)
> pr_err("%s: cannot register reboot notifier %d\n", __func__,
> ret);
> }
> +
> +/*
> + * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
> + * This call does not return if successful.
> + */
> +void hv_machine_power_off(void)
> +{
> + unsigned long flags;
> + struct hv_input_enter_sleep_state *in;
> +
> + local_irq_save(flags);
> + in = *this_cpu_ptr(hyperv_pcpu_input_arg);
> + in->sleep_state = HV_SLEEP_STATE_S5;
> +
> + (void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
> + local_irq_restore(flags);
We don't expect to return here. If we do for some reason, it is surely
a bug.
I suggest either:
- fall back to native_machine_power_off(), or
- BUG(), since the machine shouldn't continue running
> +
> +}
> #endif
On Wed, Nov 26, 2025 at 03:49:53PM -0600, Praveen K Paladugu wrote:
> When a root partition running on MSHV is powered off, the default
> behavior is to write ACPI registers to power-off. However, this ACPI
> write is intercepted by MSHV and will result in a Machine Check
> Exception(MCE).
>
> The root partition eventually panics with a trace similar to:
>
> [ 81.306348] reboot: Power down
> [ 81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
> [ 81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
> [ 81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
> [ 81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
> [ 81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
> [ 81.314717] Kernel panic - not syncing: Fatal machine check
>
> To correctly shutdown a root partition running on MSHV hypervisor, sleep
> state information must be configured within the hypervsior. Later, the
> HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
> the shutdown sequence.
>
> The previous patch configures the sleep state information and this patch
> invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
> partition.
>
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
> Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
> Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
> ---
> arch/x86/include/asm/mshyperv.h | 2 ++
> arch/x86/kernel/cpu/mshyperv.c | 2 ++
> drivers/hv/mshv_common.c | 18 ++++++++++++++++++
> 3 files changed, 22 insertions(+)
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 166053df0484..4c22f3257368 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -183,9 +183,11 @@ void hv_apic_init(void);
> void __init hv_init_spinlocks(void);
> bool hv_vcpu_is_preempted(int vcpu);
> void hv_sleep_notifiers_register(void);
> +void hv_machine_power_off(void);
> #else
> static inline void hv_apic_init(void) {}
> static inline void hv_sleep_notifiers_register(void) {};
> +static inline void hv_machine_power_off(void) {};
> #endif
>
> struct irq_domain *hv_create_pci_msi_domain(void);
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index fac9953a72ef..579fb2c64cfd 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -621,6 +621,8 @@ static void __init ms_hyperv_init_platform(void)
> #endif
>
> #if IS_ENABLED(CONFIG_HYPERV)
> + if (hv_root_partition())
> + machine_ops.power_off = hv_machine_power_off;
> #if defined(CONFIG_KEXEC_CORE)
> machine_ops.shutdown = hv_machine_shutdown;
> #endif
> diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
> index f1d4e81107ee..28905e3ed9c0 100644
> --- a/drivers/hv/mshv_common.c
> +++ b/drivers/hv/mshv_common.c
> @@ -217,4 +217,22 @@ void hv_sleep_notifiers_register(void)
> pr_err("%s: cannot register reboot notifier %d\n", __func__,
> ret);
> }
> +
> +/*
> + * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
> + * This call does not return if successful.
> + */
> +void hv_machine_power_off(void)
> +{
> + unsigned long flags;
> + struct hv_input_enter_sleep_state *in;
> +
> + local_irq_save(flags);
> + in = *this_cpu_ptr(hyperv_pcpu_input_arg);
> + in->sleep_state = HV_SLEEP_STATE_S5;
> +
> + (void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
Should this the error be printed?
Acked-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
> + local_irq_restore(flags);
> +
> +}
> #endif
> --
> 2.51.0
On 12/1/2025 11:05 AM, Stanislav Kinsburskii wrote:
> On Wed, Nov 26, 2025 at 03:49:53PM -0600, Praveen K Paladugu wrote:
>> When a root partition running on MSHV is powered off, the default
>> behavior is to write ACPI registers to power-off. However, this ACPI
>> write is intercepted by MSHV and will result in a Machine Check
>> Exception(MCE).
>>
>> The root partition eventually panics with a trace similar to:
>>
>> [ 81.306348] reboot: Power down
>> [ 81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
>> [ 81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
>> [ 81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
>> [ 81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
>> [ 81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
>> [ 81.314717] Kernel panic - not syncing: Fatal machine check
>>
>> To correctly shutdown a root partition running on MSHV hypervisor, sleep
>> state information must be configured within the hypervsior. Later, the
>> HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
>> the shutdown sequence.
>>
>> The previous patch configures the sleep state information and this patch
>> invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
>> partition.
>>
>> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
>> Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
>> Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
>> Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
>> ---
>> arch/x86/include/asm/mshyperv.h | 2 ++
>> arch/x86/kernel/cpu/mshyperv.c | 2 ++
>> drivers/hv/mshv_common.c | 18 ++++++++++++++++++
>> 3 files changed, 22 insertions(+)
>>
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 166053df0484..4c22f3257368 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -183,9 +183,11 @@ void hv_apic_init(void);
>> void __init hv_init_spinlocks(void);
>> bool hv_vcpu_is_preempted(int vcpu);
>> void hv_sleep_notifiers_register(void);
>> +void hv_machine_power_off(void);
>> #else
>> static inline void hv_apic_init(void) {}
>> static inline void hv_sleep_notifiers_register(void) {};
>> +static inline void hv_machine_power_off(void) {};
>> #endif
>>
>> struct irq_domain *hv_create_pci_msi_domain(void);
>> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
>> index fac9953a72ef..579fb2c64cfd 100644
>> --- a/arch/x86/kernel/cpu/mshyperv.c
>> +++ b/arch/x86/kernel/cpu/mshyperv.c
>> @@ -621,6 +621,8 @@ static void __init ms_hyperv_init_platform(void)
>> #endif
>>
>> #if IS_ENABLED(CONFIG_HYPERV)
>> + if (hv_root_partition())
>> + machine_ops.power_off = hv_machine_power_off;
>> #if defined(CONFIG_KEXEC_CORE)
>> machine_ops.shutdown = hv_machine_shutdown;
>> #endif
>> diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
>> index f1d4e81107ee..28905e3ed9c0 100644
>> --- a/drivers/hv/mshv_common.c
>> +++ b/drivers/hv/mshv_common.c
>> @@ -217,4 +217,22 @@ void hv_sleep_notifiers_register(void)
>> pr_err("%s: cannot register reboot notifier %d\n", __func__,
>> ret);
>> }
>> +
>> +/*
>> + * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
>> + * This call does not return if successful.
>> + */
>> +void hv_machine_power_off(void)
>> +{
>> + unsigned long flags;
>> + struct hv_input_enter_sleep_state *in;
>> +
>> + local_irq_save(flags);
>> + in = *this_cpu_ptr(hyperv_pcpu_input_arg);
>> + in->sleep_state = HV_SLEEP_STATE_S5;
>> +
>> + (void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
>
> Should this the error be printed?
>
> Acked-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com>
I will add a BUG() here, as the hypercall is not supposed to return.
>> + local_irq_restore(flags);
>> +
>> +}
>> #endif
>> --
>> 2.51.0
>
--
Regards,
Praveen K Paladugu
© 2016 - 2026 Red Hat, Inc.