Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
the NMI is handled prior to leaving the safety of noinstr. Handling the
NMI after leaving noinstr exposes the kernel to potential ordering
problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
will unblock NMIs when IRETing back to the faulting instruction.
Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmcs.h | 4 ++--
arch/x86/kvm/vmx/vmenter.S | 8 ++++----
arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++-------------
arch/x86/kvm/x86.h | 6 +++---
4 files changed, 30 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index ac290a44a693..7c1996b433e2 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -75,7 +75,7 @@ struct loaded_vmcs {
struct vmcs_controls_shadow controls_shadow;
};
-static inline bool is_intr_type(u32 intr_info, u32 type)
+static __always_inline bool is_intr_type(u32 intr_info, u32 type)
{
const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
@@ -146,7 +146,7 @@ static inline bool is_icebp(u32 intr_info)
return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
}
-static inline bool is_nmi(u32 intr_info)
+static __always_inline bool is_nmi(u32 intr_info)
{
return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
}
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 9d987e7e48c4..059243085211 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -299,6 +299,10 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+ VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+
.section .text, "ax"
@@ -353,10 +357,6 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c242e2591896..b03020ca1840 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -5095,8 +5095,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx_get_intr_info(vcpu);
+ /*
+ * Machine checks are handled by handle_exception_irqoff(), or by
+ * vmx_vcpu_run() if a #MC occurs on VM-Entry. NMIs are handled by
+ * vmx_vcpu_enter_exit().
+ */
if (is_machine_check(intr_info) || is_nmi(intr_info))
- return 1; /* handled by handle_exception_nmi_irqoff() */
+ return 1;
/*
* Queue the exception here instead of in handle_nm_fault_irqoff().
@@ -6809,7 +6814,7 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
}
-static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
+static void handle_exception_irqoff(struct vcpu_vmx *vmx)
{
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
@@ -6822,12 +6827,6 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
/* Handle machine checks before interrupts are enabled */
else if (is_machine_check(intr_info))
kvm_machine_check();
- /* We need to handle NMIs before interrupts are enabled */
- else if (is_nmi(intr_info)) {
- kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
- kvm_after_interrupt(&vmx->vcpu);
- }
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6857,7 +6856,7 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
- handle_exception_nmi_irqoff(vmx);
+ handle_exception_irqoff(vmx);
}
/*
@@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
vmx_enable_fb_clear(vmx);
+ if (unlikely(vmx->fail))
+ vmx->exit_reason.full = 0xdead;
+ else
+ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+
+ if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ is_nmi(vmx_get_intr_info(vcpu))) {
+ kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(vcpu);
+ }
+
guest_state_exit_irqoff();
}
@@ -7260,12 +7271,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
- if (unlikely(vmx->fail)) {
- vmx->exit_reason.full = 0xdead;
+ if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
- }
- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 9de72586f406..44d1827f0a30 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -382,13 +382,13 @@ enum kvm_intr_type {
KVM_HANDLING_NMI,
};
-static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
- enum kvm_intr_type intr)
+static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
+ enum kvm_intr_type intr)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
}
-static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
+static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
{
WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
}
--
2.39.0.rc1.256.g54fd8350bd-goog
On 13/12/2022 2:09 pm, Sean Christopherson wrote:
> Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
> the NMI is handled prior to leaving the safety of noinstr. Handling the
> NMI after leaving noinstr exposes the kernel to potential ordering
> problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
> will unblock NMIs when IRETing back to the faulting instruction.
>
> Reported-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/vmx/vmcs.h | 4 ++--
> arch/x86/kvm/vmx/vmenter.S | 8 ++++----
> arch/x86/kvm/vmx/vmx.c | 34 +++++++++++++++++++++-------------
> arch/x86/kvm/x86.h | 6 +++---
> 4 files changed, 30 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
> index ac290a44a693..7c1996b433e2 100644
> --- a/arch/x86/kvm/vmx/vmcs.h
> +++ b/arch/x86/kvm/vmx/vmcs.h
> @@ -75,7 +75,7 @@ struct loaded_vmcs {
> struct vmcs_controls_shadow controls_shadow;
> };
>
> -static inline bool is_intr_type(u32 intr_info, u32 type)
> +static __always_inline bool is_intr_type(u32 intr_info, u32 type)
> {
> const u32 mask = INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK;
>
> @@ -146,7 +146,7 @@ static inline bool is_icebp(u32 intr_info)
> return is_intr_type(intr_info, INTR_TYPE_PRIV_SW_EXCEPTION);
> }
>
> -static inline bool is_nmi(u32 intr_info)
> +static __always_inline bool is_nmi(u32 intr_info)
> {
> return is_intr_type(intr_info, INTR_TYPE_NMI_INTR);
> }
> diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
> index 9d987e7e48c4..059243085211 100644
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -299,6 +299,10 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
>
> SYM_FUNC_END(__vmx_vcpu_run)
>
> +SYM_FUNC_START(vmx_do_nmi_irqoff)
> + VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
> +SYM_FUNC_END(vmx_do_nmi_irqoff)
> +
>
> .section .text, "ax"
>
> @@ -353,10 +357,6 @@ SYM_FUNC_START(vmread_error_trampoline)
> SYM_FUNC_END(vmread_error_trampoline)
> #endif
>
> -SYM_FUNC_START(vmx_do_nmi_irqoff)
> - VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
> -SYM_FUNC_END(vmx_do_nmi_irqoff)
> -
> SYM_FUNC_START(vmx_do_interrupt_irqoff)
> VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
> SYM_FUNC_END(vmx_do_interrupt_irqoff)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index c242e2591896..b03020ca1840 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -5095,8 +5095,13 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
> vect_info = vmx->idt_vectoring_info;
> intr_info = vmx_get_intr_info(vcpu);
>
> + /*
> + * Machine checks are handled by handle_exception_irqoff(), or by
> + * vmx_vcpu_run() if a #MC occurs on VM-Entry. NMIs are handled by
> + * vmx_vcpu_enter_exit().
> + */
> if (is_machine_check(intr_info) || is_nmi(intr_info))
> - return 1; /* handled by handle_exception_nmi_irqoff() */
> + return 1;
>
> /*
> * Queue the exception here instead of in handle_nm_fault_irqoff().
> @@ -6809,7 +6814,7 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
> rdmsrl(MSR_IA32_XFD_ERR, vcpu->arch.guest_fpu.xfd_err);
> }
>
> -static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
> +static void handle_exception_irqoff(struct vcpu_vmx *vmx)
> {
> u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
>
> @@ -6822,12 +6827,6 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
> /* Handle machine checks before interrupts are enabled */
> else if (is_machine_check(intr_info))
> kvm_machine_check();
> - /* We need to handle NMIs before interrupts are enabled */
> - else if (is_nmi(intr_info)) {
> - kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
> - vmx_do_nmi_irqoff();
> - kvm_after_interrupt(&vmx->vcpu);
> - }
> }
>
> static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
> @@ -6857,7 +6856,7 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
> if (vmx->exit_reason.basic == EXIT_REASON_EXTERNAL_INTERRUPT)
> handle_external_interrupt_irqoff(vcpu);
> else if (vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI)
> - handle_exception_nmi_irqoff(vmx);
> + handle_exception_irqoff(vmx);
> }
>
> /*
> @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>
> vmx_enable_fb_clear(vmx);
>
> + if (unlikely(vmx->fail))
> + vmx->exit_reason.full = 0xdead;
> + else
> + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> +
> + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> + is_nmi(vmx_get_intr_info(vcpu))) {
> + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> + vmx_do_nmi_irqoff();
> + kvm_after_interrupt(vcpu);
> + }
This part of the change brings at least two types of regression:
(1) A simple reproducable case would be to collect PMI inside the guest (e.g.
via perf),
where the number of guest PMI (a type of NMI) would be very sparse. Further,
this part of the change doesn't call asm_exc_nmi_kvm_vmx as expected and
injects PMI into the guest.
This is because vmx_get_intr_info() depends on this line:
vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
, which is executed after is_nmi(vmx_get_intr_info(vcpu)).
In this case, vmx_get_intr_info() always returned the old value instead of the
latest
value based on vmcs_read32(VM_EXIT_INTR_INFO).
(2) Even worse, when the control flow is transferred to the host nmi hanlder,
the context
needed by the host NMI handler is not properly restored:
- update_debugctlmsr(vmx->host_debugctlmsr);
- pt_guest_exit();
- kvm_load_host_xsave_state(); // restore Arch LBR for host nmi hnalder
(3) In addition, trace_kvm_exit() should ideally appear before the host NMI
trace logs,
which makes it easier to understand.
A proposal fix is to delay vmx_do_nmi_irqoff() a little bit, but not a revert move:
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e6849f780dba..1f29b7f22da7 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7230,13 +7230,6 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu
*vcpu,
else
vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
- if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
- is_nmi(vmx_get_intr_info(vcpu))) {
- kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
- kvm_after_interrupt(vcpu);
- }
-
guest_state_exit_irqoff();
}
@@ -7389,6 +7382,13 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
trace_kvm_exit(vcpu, KVM_ISA_VMX);
+ if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
+ is_nmi(vmx_get_intr_info(vcpu))) {
+ kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(vcpu);
+ }
+
if (unlikely(vmx->exit_reason.failed_vmentry))
return EXIT_FASTPATH_NONE;
Please help confirm this change, at least it fixes a lot of vPMI tests.
> +
> guest_state_exit_irqoff();
> }
>
> @@ -7260,12 +7271,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> vmx->idt_vectoring_info = 0;
>
> - if (unlikely(vmx->fail)) {
> - vmx->exit_reason.full = 0xdead;
> + if (unlikely(vmx->fail))
> return EXIT_FASTPATH_NONE;
> - }
>
> - vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
> kvm_machine_check();
>
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..44d1827f0a30 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -382,13 +382,13 @@ enum kvm_intr_type {
> KVM_HANDLING_NMI,
> };
>
> -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> - enum kvm_intr_type intr)
> +static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> + enum kvm_intr_type intr)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
> }
>
> -static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> +static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
> }
On Thu, Aug 24, 2023, Like Xu wrote:
> On 13/12/2022 2:09 pm, Sean Christopherson wrote:
> > Move VMX's handling of NMI VM-Exits into vmx_vcpu_enter_exit() so that
> > the NMI is handled prior to leaving the safety of noinstr. Handling the
> > NMI after leaving noinstr exposes the kernel to potential ordering
> > problems as an instrumentation-induced fault, e.g. #DB, #BP, #PF, etc.
> > will unblock NMIs when IRETing back to the faulting instruction.
> (3) In addition, trace_kvm_exit() should ideally appear before the host NMI
> trace logs, which makes it easier to understand.
Ideally, yes, but tracepoints are not remotely noinstr friendly.
> A proposal fix is to delay vmx_do_nmi_irqoff() a little bit, but not a revert move:
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index e6849f780dba..1f29b7f22da7 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -7230,13 +7230,6 @@ static noinstr void vmx_vcpu_enter_exit(struct
> kvm_vcpu *vcpu,
> else
> vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
>
> - if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> - is_nmi(vmx_get_intr_info(vcpu))) {
> - kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> - vmx_do_nmi_irqoff();
> - kvm_after_interrupt(vcpu);
> - }
> -
> guest_state_exit_irqoff();
> }
>
> @@ -7389,6 +7382,13 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
>
> trace_kvm_exit(vcpu, KVM_ISA_VMX);
>
> + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> + is_nmi(vmx_get_intr_info(vcpu))) {
> + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> + vmx_do_nmi_irqoff();
> + kvm_after_interrupt(vcpu);
> + }
No, the whole point of doing NMI handling in vmx_vcpu_enter_exit() is so that NMIs
are serviced before instrumentation is enabled.
I think the below is sufficient (untested at this point). Not quite minimal, e.g.
I'm pretty sure there's (currently) no need to snapshot IDT_VECTORING_INFO_FIELD
so early, but I can't think of any reason to wait.
--
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 24 Aug 2023 06:49:36 -0700
Subject: [PATCH] KVM: VMX: Refresh available regs and IDT vectoring info
before NMI handling
Reset the mask of available "registers" and refresh the IDT vectoring
info snapshot in vmx_vcpu_enter_exit(), before KVM potentially handles a
an NMI VM-Exit. One of the "registers" that KVM VMX lazily loads is the
vmcs.VM_EXIT_INTR_INFO field, which is holds the vector+type on "exception
or NMI" VM-Exits, i.e. is needed to identify NMIs. Clearing the available
registers bitmask after handling NMIs results in KVM querying info from
the last VM-Exit that read vmcs.VM_EXIT_INTR_INFO, and leads to both
missed NMIs and spurious NMIs from the guest's perspective.
Opportunistically grab vmcs.IDT_VECTORING_INFO_FIELD early in the VM-Exit
path too, e.g. to guard against similar consumption of stale data. The
field is read on every "normal" VM-Exit, and there's no point in delaying
the inevitable.
Reported-by: Like Xu <like.xu.linux@gmail.com>
Fixes: 11df586d774f ("KVM: VMX: Handle NMI VM-Exits in noinstr region")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmx.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e6849f780dba..d2b78ab7a9f2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7222,13 +7222,20 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
flags);
vcpu->arch.cr2 = native_read_cr2();
+ vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+ vmx->idt_vectoring_info = 0;
vmx_enable_fb_clear(vmx);
- if (unlikely(vmx->fail))
+ if (unlikely(vmx->fail)) {
vmx->exit_reason.full = 0xdead;
- else
- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+ goto out;
+ }
+
+ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+ if (likely(!vmx->exit_reason.failed_vmentry))
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu))) {
@@ -7237,6 +7244,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
kvm_after_interrupt(vcpu);
}
+out:
guest_state_exit_irqoff();
}
@@ -7358,8 +7366,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
loadsegment(es, __USER_DS);
#endif
- vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
pt_guest_exit(vmx);
kvm_load_host_xsave_state(vcpu);
@@ -7376,17 +7382,12 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->nested.nested_run_pending = 0;
}
- vmx->idt_vectoring_info = 0;
-
if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
- if (likely(!vmx->exit_reason.failed_vmentry))
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(vmx->exit_reason.failed_vmentry))
base-commit: fff2e47e6c3b8050ca26656693caa857e3a8b740
--
On Thu, Aug 24, 2023, Sean Christopherson wrote:
> On Thu, Aug 24, 2023, Like Xu wrote:
> > @@ -7389,6 +7382,13 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
> >
> > trace_kvm_exit(vcpu, KVM_ISA_VMX);
> >
> > + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> > + is_nmi(vmx_get_intr_info(vcpu))) {
> > + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> > + vmx_do_nmi_irqoff();
> > + kvm_after_interrupt(vcpu);
> > + }
>
> No, the whole point of doing NMI handling in vmx_vcpu_enter_exit() is so that NMIs
> are serviced before instrumentation is enabled.
>
> I think the below is sufficient (untested at this point). Not quite minimal, e.g.
> I'm pretty sure there's (currently) no need to snapshot IDT_VECTORING_INFO_FIELD
> so early, but I can't think of any reason to wait.
>
> --
> From: Sean Christopherson <seanjc@google.com>
> Date: Thu, 24 Aug 2023 06:49:36 -0700
> Subject: [PATCH] KVM: VMX: Refresh available regs and IDT vectoring info
> before NMI handling
>
> Reset the mask of available "registers" and refresh the IDT vectoring
> info snapshot in vmx_vcpu_enter_exit(), before KVM potentially handles a
> an NMI VM-Exit. One of the "registers" that KVM VMX lazily loads is the
> vmcs.VM_EXIT_INTR_INFO field, which is holds the vector+type on "exception
> or NMI" VM-Exits, i.e. is needed to identify NMIs. Clearing the available
> registers bitmask after handling NMIs results in KVM querying info from
> the last VM-Exit that read vmcs.VM_EXIT_INTR_INFO, and leads to both
> missed NMIs and spurious NMIs from the guest's perspective.
Oof, it's not just from the guest's perspective, NMIs that are destined for host
consumption will suffer the same fate.
On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:
> @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
>
> vmx_enable_fb_clear(vmx);
>
> + if (unlikely(vmx->fail))
> + vmx->exit_reason.full = 0xdead;
> + else
> + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> +
> + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> + is_nmi(vmx_get_intr_info(vcpu))) {
> + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> + vmx_do_nmi_irqoff();
> + kvm_after_interrupt(vcpu);
> + }
> +
> guest_state_exit_irqoff();
> }
I think we're going to have to sprinkle __always_inline on the
kvm_{before,after}_interrupt() things (or I missed the earlier patches
doing this already), sometimes compilers are just weird.
On Thu, Jan 19, 2023, Peter Zijlstra wrote:
> On Tue, Dec 13, 2022 at 06:09:12AM +0000, Sean Christopherson wrote:
>
> > @@ -7119,6 +7118,18 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> >
> > vmx_enable_fb_clear(vmx);
> >
> > + if (unlikely(vmx->fail))
> > + vmx->exit_reason.full = 0xdead;
> > + else
> > + vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
> > +
> > + if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
> > + is_nmi(vmx_get_intr_info(vcpu))) {
> > + kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
> > + vmx_do_nmi_irqoff();
> > + kvm_after_interrupt(vcpu);
> > + }
> > +
> > guest_state_exit_irqoff();
> > }
>
> I think we're going to have to sprinkle __always_inline on the
> kvm_{before,after}_interrupt() things (or I missed the earlier patches
> doing this already), sometimes compilers are just weird.
It's in this patch, just lurking at the bottom.
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..44d1827f0a30 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -382,13 +382,13 @@ enum kvm_intr_type {
> KVM_HANDLING_NMI,
> };
>
> -static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> - enum kvm_intr_type intr)
> +static __always_inline void kvm_before_interrupt(struct kvm_vcpu *vcpu,
> + enum kvm_intr_type intr)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, (u8)intr);
> }
>
> -static inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> +static __always_inline void kvm_after_interrupt(struct kvm_vcpu *vcpu)
> {
> WRITE_ONCE(vcpu->arch.handling_intr_from_guest, 0);
> }
© 2016 - 2025 Red Hat, Inc.