Split the asm subroutines for handling NMIs versus IRQs that occur in the
guest so that the NMI handler can be called from a noinstr section. As a
bonus, the NMI path doesn't need an indirect branch.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmenter.S | 70 +++++++++++++++++++++-----------------
arch/x86/kvm/vmx/vmx.c | 26 ++++++--------
2 files changed, 50 insertions(+), 46 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 766c6b3ef5ed..9d987e7e48c4 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,6 +31,39 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
+.macro VMX_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %_ASM_BP
+ mov %_ASM_SP, %_ASM_BP
+
+#ifdef CONFIG_X86_64
+ /*
+ * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
+ * creating the synthetic interrupt stack frame for the IRQ/NMI.
+ */
+ and $-16, %rsp
+ push $__KERNEL_DS
+ push %rbp
+#endif
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ mov %_ASM_BP, %_ASM_SP
+ pop %_ASM_BP
+ RET
+.endm
+
.section .noinstr.text, "ax"
/**
@@ -320,35 +353,10 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
-SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
+SYM_FUNC_START(vmx_do_nmi_irqoff)
+ VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(vmx_do_nmi_irqoff)
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- CALL_NOSPEC _ASM_ARG1
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- mov %_ASM_BP, %_ASM_SP
- pop %_ASM_BP
- RET
-SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
+SYM_FUNC_START(vmx_do_interrupt_irqoff)
+ VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7ace22ee240d..c242e2591896 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6786,17 +6786,8 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
-void vmx_do_interrupt_nmi_irqoff(unsigned long entry);
-
-static void handle_interrupt_nmi_irqoff(struct kvm_vcpu *vcpu,
- unsigned long entry)
-{
- bool is_nmi = entry == (unsigned long)asm_exc_nmi_kvm_vmx;
-
- kvm_before_interrupt(vcpu, is_nmi ? KVM_HANDLING_NMI : KVM_HANDLING_IRQ);
- vmx_do_interrupt_nmi_irqoff(entry);
- kvm_after_interrupt(vcpu);
-}
+void vmx_do_interrupt_irqoff(unsigned long entry);
+void vmx_do_nmi_irqoff(void);
static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
{
@@ -6820,7 +6811,6 @@ static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
- const unsigned long nmi_entry = (unsigned long)asm_exc_nmi_kvm_vmx;
u32 intr_info = vmx_get_intr_info(&vmx->vcpu);
/* if exit due to PF check for async PF */
@@ -6833,8 +6823,11 @@ static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
else if (is_machine_check(intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
- else if (is_nmi(intr_info))
- handle_interrupt_nmi_irqoff(&vmx->vcpu, nmi_entry);
+ else if (is_nmi(intr_info)) {
+ kvm_before_interrupt(&vmx->vcpu, KVM_HANDLING_NMI);
+ vmx_do_nmi_irqoff();
+ kvm_after_interrupt(&vmx->vcpu);
+ }
}
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
@@ -6847,7 +6840,10 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+ kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
+ vmx_do_interrupt_irqoff(gate_offset(desc));
+ kvm_after_interrupt(vcpu);
+
vcpu->arch.at_instruction_boundary = true;
}
--
2.39.0.rc1.256.g54fd8350bd-goog
> + > + /* > + * "Restore" RSP from RBP, even though IRET has already unwound > RSP to > + * the correct value. objtool doesn't know the callee will IRET and, > + * without the explicit restore, thinks the stack is getting walloped. > + * Using an unwind hint is problematic due to x86-64's dynamic > alignment. > + */ > + mov %_ASM_BP, %_ASM_SP > + pop %_ASM_BP > + RET For NMI, after this RET instruction, we continue to block NMIs. IRET instead? > +.endm > + > .section .noinstr.text, "ax"
On Wed, Dec 14, 2022, Li, Xin3 wrote: > > + > > + /* > > + * "Restore" RSP from RBP, even though IRET has already unwound > > RSP to > > + * the correct value. objtool doesn't know the callee will IRET and, > > + * without the explicit restore, thinks the stack is getting walloped. > > + * Using an unwind hint is problematic due to x86-64's dynamic > > alignment. > > + */ > > + mov %_ASM_BP, %_ASM_SP > > + pop %_ASM_BP > > + RET > > For NMI, after this RET instruction, we continue to block NMIs. IRET instead? No, IRET has already been done by the kernel-provided handler, e.g. asm_exc_nmi_kvm_vmx() by way of error_return(). That's why the CALL above (that got snipped) is preceded by the creation of a synthetic NMI/IRQ stack frame: the target returns from the CALL via IRET, not RET.
> > > + * "Restore" RSP from RBP, even though IRET has already unwound > > > RSP to > > > + * the correct value. objtool doesn't know the callee will IRET and, > > > + * without the explicit restore, thinks the stack is getting walloped. > > > + * Using an unwind hint is problematic due to x86-64's dynamic > > > alignment. > > > + */ > > > + mov %_ASM_BP, %_ASM_SP > > > + pop %_ASM_BP > > > + RET > > > > For NMI, after this RET instruction, we continue to block NMIs. IRET > instead? > > No, IRET has already been done by the kernel-provided handler, e.g. > asm_exc_nmi_kvm_vmx() > by way of error_return(). That's why the CALL above (that got snipped) is > preceded > by the creation of a synthetic NMI/IRQ stack frame: the target returns from > the CALL > via IRET, not RET. You're right, this assembly makes another call into the asm entry point, which returns here with IRET.
> > > > + * "Restore" RSP from RBP, even though IRET has already unwound > > > > RSP to > > > > + * the correct value. objtool doesn't know the callee will IRET and, > > > > + * without the explicit restore, thinks the stack is getting walloped. > > > > + * Using an unwind hint is problematic due to x86-64's dynamic > > > > alignment. > > > > + */ > > > > + mov %_ASM_BP, %_ASM_SP > > > > + pop %_ASM_BP > > > > + RET > > > > > > For NMI, after this RET instruction, we continue to block NMIs. IRET > > instead? > > > > No, IRET has already been done by the kernel-provided handler, e.g. > > asm_exc_nmi_kvm_vmx() > > by way of error_return(). That's why the CALL above (that got snipped) is > > preceded > > by the creation of a synthetic NMI/IRQ stack frame: the target returns from > > the CALL > > via IRET, not RET. > > > You're right, this assembly makes another call into the asm entry point, which > returns here with IRET. Like IRET for IDT, we _need_ ERETS for FRED to unblock NMI ASAP. However a FRED stack frame has way more information than an IDT stack frame, thus it's complicated to create a FRED stack frame with assembly. So I prefer "INT $2" for FRED now. I will post the FRED patch set soon, lets sync up on this afterwards. Xin
© 2016 - 2025 Red Hat, Inc.