Handle Machine Checks (#MC) that happen on VM-Enter (VMX or TDX) outside
of KVM's fastpath so that as much host state as possible is re-loaded
before invoking the kernel's #MC handler. The only requirement is that
KVM invokes the #MC handler before enabling IRQs (and even that could
_probably_ be related to handling #MCs before enabling preemption).
Waiting to handle #MCs until "more" host state is loaded hardens KVM
against flaws in the #MC handler, which has historically been quite
brittle. E.g. prior to commit 5567d11c21a1 ("x86/mce: Send #MC singal from
task work"), the #MC code could trigger a schedule() with IRQs and
preemption disabled. That led to a KVM hack-a-fix in commit 1811d979c716
("x86/kvm: move kvm_load/put_guest_xcr0 into atomic context").
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/main.c | 13 ++++++++++++-
arch/x86/kvm/vmx/tdx.c | 3 ---
arch/x86/kvm/vmx/vmx.c | 3 ---
3 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 0eb2773b2ae2..1beaec5b9727 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -608,6 +608,17 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
}
+static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+{
+ if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
+ kvm_machine_check();
+
+ if (is_td_vcpu(vcpu))
+ return;
+
+ return vmx_handle_exit_irqoff(vcpu);
+}
+
static void vt_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
{
if (is_td_vcpu(vcpu))
@@ -969,7 +980,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.load_mmu_pgd = vt_op(load_mmu_pgd),
.check_intercept = vmx_check_intercept,
- .handle_exit_irqoff = vmx_handle_exit_irqoff,
+ .handle_exit_irqoff = vt_op(handle_exit_irqoff),
.update_cpu_dirty_logging = vt_op(update_cpu_dirty_logging),
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 326db9b9c567..a2f6ba3268d1 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1069,9 +1069,6 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (unlikely((tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR))
return EXIT_FASTPATH_NONE;
- if (unlikely(vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
- kvm_machine_check();
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(tdx_failed_vmentry(vcpu)))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1021d3b65ea0..123dae8cf46b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7527,9 +7527,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
- if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
- kvm_machine_check();
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry))
--
2.51.1.930.gacf6e81ea2-goog
Hi,
On Thu, Oct 30, 2025 at 03:42:44PM -0700, Sean Christopherson wrote:
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -608,6 +608,17 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
> }
>
> +static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> +{
> + if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
> + kvm_machine_check();
> +
> + if (is_td_vcpu(vcpu))
> + return;
> +
> + return vmx_handle_exit_irqoff(vcpu);
> +}
I bisected kvm-x86/next down to this change for a TDX guest not booting
and host producing errors like:
watchdog: CPU118: Watchdog detected hard LOCKUP on cpu 118
Dropping the is_td_vcpu(vcpu) check above fixes the issue. Earlier the
call for vmx_handle_exit_irqoff() was unconditional.
Probably the (u16) cast above can be dropped too? It was never used for
TDX looking at the patch.
Regards,
Tony
On Mon, 2025-11-17 at 14:38 +0200, Tony Lindgren wrote: > I bisected kvm-x86/next down to this change for a TDX guest not > booting and host producing errors like: > > watchdog: CPU118: Watchdog detected hard LOCKUP on cpu 118 > > Dropping the is_td_vcpu(vcpu) check above fixes the issue. Earlier > the call for vmx_handle_exit_irqoff() was unconditional. > > Probably the (u16) cast above can be dropped too? It was never used > for TDX looking at the patch. Ah! Thanks for picking this up. I had almost got there but lost my TDX machine for a bit.
On Mon, Nov 17, 2025, Tony Lindgren wrote:
> Hi,
>
> On Thu, Oct 30, 2025 at 03:42:44PM -0700, Sean Christopherson wrote:
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -608,6 +608,17 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> > vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
> > }
> >
> > +static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> > +{
> > + if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
> > + kvm_machine_check();
> > +
> > + if (is_td_vcpu(vcpu))
> > + return;
> > +
> > + return vmx_handle_exit_irqoff(vcpu);
> > +}
>
> I bisected kvm-x86/next down to this change for a TDX guest not booting
> and host producing errors like:
>
> watchdog: CPU118: Watchdog detected hard LOCKUP on cpu 118
>
> Dropping the is_td_vcpu(vcpu) check above fixes the issue. Earlier the
> call for vmx_handle_exit_irqoff() was unconditional.
Ugh, once you see it, it's obvious. Sorry :-(
I'll drop the entire series and send a v2. There's only one other patch that I
already sent the "thank you" for, so I think it's worth unwinding to avoid
breaking bisection for TDX (and because the diff can be very different).
Lightly tested, but I think this patch can instead be:
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 163f854a39f2..6d41d2fc8043 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1063,9 +1063,6 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (unlikely((tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR))
return EXIT_FASTPATH_NONE;
- if (unlikely(vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
- kvm_machine_check();
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(tdx_failed_vmentry(vcpu)))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d98107a7bdaa..d1117da5463f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7035,10 +7035,19 @@ void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
if (to_vt(vcpu)->emulation_required)
return;
- if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXTERNAL_INTERRUPT)
+ switch (vmx_get_exit_reason(vcpu).basic) {
+ case EXIT_REASON_EXTERNAL_INTERRUPT:
handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu));
- else if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXCEPTION_NMI)
+ break;
+ case EXIT_REASON_EXCEPTION_NMI:
handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu));
+ break;
+ case EXIT_REASON_MCE_DURING_VMENTRY:
+ kvm_machine_check();
+ break;
+ default:
+ break;
+ }
}
/*
@@ -7501,9 +7510,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
- if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
- kvm_machine_check();
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry))
On Mon, Nov 17, 2025 at 07:47:49AM -0800, Sean Christopherson wrote:
> On Mon, Nov 17, 2025, Tony Lindgren wrote:
> > Hi,
> >
> > On Thu, Oct 30, 2025 at 03:42:44PM -0700, Sean Christopherson wrote:
> > > --- a/arch/x86/kvm/vmx/main.c
> > > +++ b/arch/x86/kvm/vmx/main.c
> > > @@ -608,6 +608,17 @@ static void vt_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa,
> > > vmx_load_mmu_pgd(vcpu, root_hpa, pgd_level);
> > > }
> > >
> > > +static void vt_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> > > +{
> > > + if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
> > > + kvm_machine_check();
> > > +
> > > + if (is_td_vcpu(vcpu))
> > > + return;
> > > +
> > > + return vmx_handle_exit_irqoff(vcpu);
> > > +}
> >
> > I bisected kvm-x86/next down to this change for a TDX guest not booting
> > and host producing errors like:
> >
> > watchdog: CPU118: Watchdog detected hard LOCKUP on cpu 118
> >
> > Dropping the is_td_vcpu(vcpu) check above fixes the issue. Earlier the
> > call for vmx_handle_exit_irqoff() was unconditional.
>
> Ugh, once you see it, it's obvious. Sorry :-(
>
> I'll drop the entire series and send a v2. There's only one other patch that I
> already sent the "thank you" for, so I think it's worth unwinding to avoid
> breaking bisection for TDX (and because the diff can be very different).
OK thanks.
> Lightly tested, but I think this patch can instead be:
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 163f854a39f2..6d41d2fc8043 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -1063,9 +1063,6 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
> if (unlikely((tdx->vp_enter_ret & TDX_SW_ERROR) == TDX_SW_ERROR))
> return EXIT_FASTPATH_NONE;
>
> - if (unlikely(vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
> - kvm_machine_check();
> -
> trace_kvm_exit(vcpu, KVM_ISA_VMX);
>
> if (unlikely(tdx_failed_vmentry(vcpu)))
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d98107a7bdaa..d1117da5463f 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7035,10 +7035,19 @@ void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> if (to_vt(vcpu)->emulation_required)
> return;
>
> - if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXTERNAL_INTERRUPT)
> + switch (vmx_get_exit_reason(vcpu).basic) {
> + case EXIT_REASON_EXTERNAL_INTERRUPT:
> handle_external_interrupt_irqoff(vcpu, vmx_get_intr_info(vcpu));
> - else if (vmx_get_exit_reason(vcpu).basic == EXIT_REASON_EXCEPTION_NMI)
> + break;
> + case EXIT_REASON_EXCEPTION_NMI:
> handle_exception_irqoff(vcpu, vmx_get_intr_info(vcpu));
> + break;
> + case EXIT_REASON_MCE_DURING_VMENTRY:
> + kvm_machine_check();
> + break;
> + default:
> + break;
> + }
> }
>
> /*
> @@ -7501,9 +7510,6 @@ fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, u64 run_flags)
> if (unlikely(vmx->fail))
> return EXIT_FASTPATH_NONE;
>
> - if (unlikely((u16)vmx_get_exit_reason(vcpu).basic == EXIT_REASON_MCE_DURING_VMENTRY))
> - kvm_machine_check();
> -
> trace_kvm_exit(vcpu, KVM_ISA_VMX);
>
> if (unlikely(vmx_get_exit_reason(vcpu).failed_vmentry))
Looks good to me.
Regards,
Tony
© 2016 - 2026 Red Hat, Inc.