[PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV

Praveen K Paladugu posted 3 patches 2 weeks ago
There is a newer version of this series
[PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV
Posted by Praveen K Paladugu 2 weeks ago
When a root partition running on MSHV is powered off, the default
behavior is to write ACPI registers to power-off. However, this ACPI
write is intercepted by MSHV and will result in a Machine Check
Exception(MCE).

The root partition eventually panics with a trace similar to:

  [   81.306348] reboot: Power down
  [   81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
  [   81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
  [   81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
  [   81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
  [   81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
  [   81.314717] Kernel panic - not syncing: Fatal machine check

To correctly shutdown a root partition running on MSHV hypervisor, sleep
state information must be configured within the hypervsior. Later, the
HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
the shutdown sequence.

The previous patch configures the sleep state information and this patch
invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
partition.

Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
---
 arch/x86/hyperv/hv_init.c       |  2 ++
 arch/x86/include/asm/mshyperv.h |  2 ++
 drivers/hv/mshv_common.c        | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+)

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 645b52dd732e..24824534ff8d 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -34,6 +34,7 @@
 #include <clocksource/hyperv_timer.h>
 #include <linux/highmem.h>
 #include <linux/export.h>
+#include <asm/reboot.h>
 
 void *hv_hypercall_pg;
 
@@ -562,6 +563,7 @@ void __init hyperv_init(void)
 		 * failures here.
 		 */
 		hv_sleep_notifiers_register();
+		machine_ops.power_off = hv_machine_power_off;
 	} else {
 		hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
 		wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 166053df0484..4c22f3257368 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -183,9 +183,11 @@ void hv_apic_init(void);
 void __init hv_init_spinlocks(void);
 bool hv_vcpu_is_preempted(int vcpu);
 void hv_sleep_notifiers_register(void);
+void hv_machine_power_off(void);
 #else
 static inline void hv_apic_init(void) {}
 static inline void hv_sleep_notifiers_register(void) {};
+static inline void hv_machine_power_off(void) {};
 #endif
 
 struct irq_domain *hv_create_pci_msi_domain(void);
diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
index ee733ba1575e..73505cbdc324 100644
--- a/drivers/hv/mshv_common.c
+++ b/drivers/hv/mshv_common.c
@@ -216,3 +216,21 @@ void hv_sleep_notifiers_register(void)
 		pr_err("%s: cannot register reboot notifier %d\n", __func__,
 		       ret);
 }
+
+/*
+ * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
+ * This call does not return if successful.
+ */
+void hv_machine_power_off(void)
+{
+	unsigned long flags;
+	struct hv_input_enter_sleep_state *in;
+
+	local_irq_save(flags);
+	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
+	in->sleep_state = HV_SLEEP_STATE_S5;
+
+	(void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
+	local_irq_restore(flags);
+
+}
-- 
2.51.0
Re: [PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV
Posted by kernel test robot 1 week, 6 days ago
Hi Praveen,

kernel test robot noticed the following build warnings:

[auto build test WARNING on next-20251117]
[cannot apply to tip/x86/core linus/master v6.18-rc6 v6.18-rc5 v6.18-rc4 v6.18-rc6]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Praveen-K-Paladugu/hyperv-Add-definitions-for-MSHV-sleep-state-configuration/20251118-051204
base:   next-20251117
patch link:    https://lore.kernel.org/r/20251117210855.108126-4-prapal%40linux.microsoft.com
patch subject: [PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV
config: arm64-randconfig-003-20251118 (https://download.01.org/0day-ci/archive/20251118/202511182353.5FvVmUhR-lkp@intel.com/config)
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251118/202511182353.5FvVmUhR-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511182353.5FvVmUhR-lkp@intel.com/

All warnings (new ones prefixed by >>):

   drivers/hv/mshv_common.c:210:6: warning: no previous prototype for function 'hv_sleep_notifiers_register' [-Wmissing-prototypes]
     210 | void hv_sleep_notifiers_register(void)
         |      ^
   drivers/hv/mshv_common.c:210:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     210 | void hv_sleep_notifiers_register(void)
         | ^
         | static 
>> drivers/hv/mshv_common.c:224:6: warning: no previous prototype for function 'hv_machine_power_off' [-Wmissing-prototypes]
     224 | void hv_machine_power_off(void)
         |      ^
   drivers/hv/mshv_common.c:224:1: note: declare 'static' if the function is not intended to be used outside of this translation unit
     224 | void hv_machine_power_off(void)
         | ^
         | static 
   2 warnings generated.


vim +/hv_machine_power_off +224 drivers/hv/mshv_common.c

   219	
   220	/*
   221	 * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
   222	 * This call does not return if successful.
   223	 */
 > 224	void hv_machine_power_off(void)
   225	{
   226		unsigned long flags;
   227		struct hv_input_enter_sleep_state *in;
   228	
   229		local_irq_save(flags);
   230		in = *this_cpu_ptr(hyperv_pcpu_input_arg);
   231		in->sleep_state = HV_SLEEP_STATE_S5;
   232	
   233		(void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
   234		local_irq_restore(flags);
   235	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV
Posted by Stanislav Kinsburskii 2 weeks ago
On Mon, Nov 17, 2025 at 03:08:18PM -0600, Praveen K Paladugu wrote:
> When a root partition running on MSHV is powered off, the default
> behavior is to write ACPI registers to power-off. However, this ACPI
> write is intercepted by MSHV and will result in a Machine Check
> Exception(MCE).
> 
> The root partition eventually panics with a trace similar to:
> 
>   [   81.306348] reboot: Power down
>   [   81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
>   [   81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
>   [   81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
>   [   81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
>   [   81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
>   [   81.314717] Kernel panic - not syncing: Fatal machine check
> 
> To correctly shutdown a root partition running on MSHV hypervisor, sleep
> state information must be configured within the hypervsior. Later, the
> HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
> the shutdown sequence.
> 
> The previous patch configures the sleep state information and this patch
> invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
> partition.
> 
> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
> Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
> Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
> ---
>  arch/x86/hyperv/hv_init.c       |  2 ++
>  arch/x86/include/asm/mshyperv.h |  2 ++
>  drivers/hv/mshv_common.c        | 18 ++++++++++++++++++
>  3 files changed, 22 insertions(+)
> 
> diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> index 645b52dd732e..24824534ff8d 100644
> --- a/arch/x86/hyperv/hv_init.c
> +++ b/arch/x86/hyperv/hv_init.c
> @@ -34,6 +34,7 @@
>  #include <clocksource/hyperv_timer.h>
>  #include <linux/highmem.h>
>  #include <linux/export.h>
> +#include <asm/reboot.h>
>  
>  void *hv_hypercall_pg;
>  
> @@ -562,6 +563,7 @@ void __init hyperv_init(void)
>  		 * failures here.
>  		 */
>  		hv_sleep_notifiers_register();
> +		machine_ops.power_off = hv_machine_power_off;

It looks more natural to me to gather all the machine_ops hooks in one
place (meaning in ms_hyperv_init_platform).
It is better moving this assignment there and do the branching on the
partition type in the power_off callback instead.

Thanks,
Stanislav


>  	} else {
>  		hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
>  		wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 166053df0484..4c22f3257368 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -183,9 +183,11 @@ void hv_apic_init(void);
>  void __init hv_init_spinlocks(void);
>  bool hv_vcpu_is_preempted(int vcpu);
>  void hv_sleep_notifiers_register(void);
> +void hv_machine_power_off(void);
>  #else
>  static inline void hv_apic_init(void) {}
>  static inline void hv_sleep_notifiers_register(void) {};
> +static inline void hv_machine_power_off(void) {};
>  #endif
>  
>  struct irq_domain *hv_create_pci_msi_domain(void);
> diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
> index ee733ba1575e..73505cbdc324 100644
> --- a/drivers/hv/mshv_common.c
> +++ b/drivers/hv/mshv_common.c
> @@ -216,3 +216,21 @@ void hv_sleep_notifiers_register(void)
>  		pr_err("%s: cannot register reboot notifier %d\n", __func__,
>  		       ret);
>  }
> +
> +/*
> + * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
> + * This call does not return if successful.
> + */
> +void hv_machine_power_off(void)
> +{
> +	unsigned long flags;
> +	struct hv_input_enter_sleep_state *in;
> +
> +	local_irq_save(flags);
> +	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
> +	in->sleep_state = HV_SLEEP_STATE_S5;
> +
> +	(void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
> +	local_irq_restore(flags);
> +
> +}
> -- 
> 2.51.0
Re: [PATCH v5 3/3] hyperv: Cleanly shutdown root partition with MSHV
Posted by Praveen Paladugu 1 week, 4 days ago
On Mon, Nov 17, 2025 at 03:45:36PM -0800, Stanislav Kinsburskii wrote:
> On Mon, Nov 17, 2025 at 03:08:18PM -0600, Praveen K Paladugu wrote:
> > When a root partition running on MSHV is powered off, the default
> > behavior is to write ACPI registers to power-off. However, this ACPI
> > write is intercepted by MSHV and will result in a Machine Check
> > Exception(MCE).
> > 
> > The root partition eventually panics with a trace similar to:
> > 
> >   [   81.306348] reboot: Power down
> >   [   81.314709] mce: [Hardware Error]: CPU 0: Machine Check Exception: 4 Bank 0: b2000000c0060001
> >   [   81.314711] mce: [Hardware Error]: TSC 3b8cb60a66 PPIN 11d98332458e4ea9
> >   [   81.314713] mce: [Hardware Error]: PROCESSOR 0:606a6 TIME 1759339405 SOCKET 0 APIC 0 microcode ffffffff
> >   [   81.314715] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
> >   [   81.314716] mce: [Hardware Error]: Machine check: Processor context corrupt
> >   [   81.314717] Kernel panic - not syncing: Fatal machine check
> > 
> > To correctly shutdown a root partition running on MSHV hypervisor, sleep
> > state information must be configured within the hypervsior. Later, the
> > HVCALL_ENTER_SLEEP_STATE hypercall should be invoked as the last step in
> > the shutdown sequence.
> > 
> > The previous patch configures the sleep state information and this patch
> > invokes HVCALL_ENTER_SLEEP_STATE hypercall to cleanly shutdown the root
> > partition.
> > 
> > Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com>
> > Co-developed-by: Anatol Belski <anbelski@linux.microsoft.com>
> > Signed-off-by: Anatol Belski <anbelski@linux.microsoft.com>
> > Reviewed-by: Easwar Hariharan <easwar.hariharan@linux.microsoft.com>
> > ---
> >  arch/x86/hyperv/hv_init.c       |  2 ++
> >  arch/x86/include/asm/mshyperv.h |  2 ++
> >  drivers/hv/mshv_common.c        | 18 ++++++++++++++++++
> >  3 files changed, 22 insertions(+)
> > 
> > diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
> > index 645b52dd732e..24824534ff8d 100644
> > --- a/arch/x86/hyperv/hv_init.c
> > +++ b/arch/x86/hyperv/hv_init.c
> > @@ -34,6 +34,7 @@
> >  #include <clocksource/hyperv_timer.h>
> >  #include <linux/highmem.h>
> >  #include <linux/export.h>
> > +#include <asm/reboot.h>
> >  
> >  void *hv_hypercall_pg;
> >  
> > @@ -562,6 +563,7 @@ void __init hyperv_init(void)
> >  		 * failures here.
> >  		 */
> >  		hv_sleep_notifiers_register();
> > +		machine_ops.power_off = hv_machine_power_off;
> 
> It looks more natural to me to gather all the machine_ops hooks in one
> place (meaning in ms_hyperv_init_platform).
> It is better moving this assignment there and do the branching on the
> partition type in the power_off callback instead.
>

Moving machine_ops hooks assignment to ms_hyperv_init_platform sounds
reasonable to me. I have a concern about doing the branching within the
callback though.

This assignment overwrites the default of using ACPI for poweroff.
By moving the branching into hv_machine_power_off, it would look like:

hv_machine_power_off {
    if not root {
        fallback to default
    } else {
        Use mshv hypercall to poweroff   
    }
}

I would rather do something cleaner like:

if root {
    machine_ops.power_off = hv_machine_power_off;
}

Praveen


> Thanks,
> Stanislav
> 
> 
> >  	} else {
> >  		hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
> >  		wrmsrq(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
> > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> > index 166053df0484..4c22f3257368 100644
> > --- a/arch/x86/include/asm/mshyperv.h
> > +++ b/arch/x86/include/asm/mshyperv.h
> > @@ -183,9 +183,11 @@ void hv_apic_init(void);
> >  void __init hv_init_spinlocks(void);
> >  bool hv_vcpu_is_preempted(int vcpu);
> >  void hv_sleep_notifiers_register(void);
> > +void hv_machine_power_off(void);
> >  #else
> >  static inline void hv_apic_init(void) {}
> >  static inline void hv_sleep_notifiers_register(void) {};
> > +static inline void hv_machine_power_off(void) {};
> >  #endif
> >  
> >  struct irq_domain *hv_create_pci_msi_domain(void);
> > diff --git a/drivers/hv/mshv_common.c b/drivers/hv/mshv_common.c
> > index ee733ba1575e..73505cbdc324 100644
> > --- a/drivers/hv/mshv_common.c
> > +++ b/drivers/hv/mshv_common.c
> > @@ -216,3 +216,21 @@ void hv_sleep_notifiers_register(void)
> >  		pr_err("%s: cannot register reboot notifier %d\n", __func__,
> >  		       ret);
> >  }
> > +
> > +/*
> > + * Power off the machine by entering S5 sleep state via Hyper-V hypercall.
> > + * This call does not return if successful.
> > + */
> > +void hv_machine_power_off(void)
> > +{
> > +	unsigned long flags;
> > +	struct hv_input_enter_sleep_state *in;
> > +
> > +	local_irq_save(flags);
> > +	in = *this_cpu_ptr(hyperv_pcpu_input_arg);
> > +	in->sleep_state = HV_SLEEP_STATE_S5;
> > +
> > +	(void)hv_do_hypercall(HVCALL_ENTER_SLEEP_STATE, in, NULL);
> > +	local_irq_restore(flags);
> > +
> > +}
> > -- 
> > 2.51.0