[PATCH 09/12] KVM: X86: Add debugfs to inject machine check on VM exit

isaku.yamahata@intel.com posted 12 patches 2 years, 2 months ago
[PATCH 09/12] KVM: X86: Add debugfs to inject machine check on VM exit
Posted by isaku.yamahata@intel.com 2 years, 2 months ago
From: Isaku Yamahata <isaku.yamahata@intel.com>

The KVM/x86 handles machine-check in the guest specially.  It sets up the
guest so that vcpu exits from running guests, checks the exit reason and,
manually raises the machine check by calling do_machine_check().

To test the KVM machine check execution path, KVM wants to inject the
machine check in the context of vcpu instead of the context of the process
of MCE injection.  Wire up the MCE injection framework for KVM to trigger
MCE in the vcpu context.  Add a kvm vcpu debugfs entry for an operator to
tell KVM to inject MCE.

The operation flow is as follows:
- Set notrigger to 1 to tell the x86 MCE injector to suppress it from
  injecting machine check.
  echo 1 > /sys/kernel/debug/mce-inject/notrigger
- Set MCE parameters via x86 MCE injector debugfs
  /sys/kernel/debug/mce-inject/{addr, bank, flags, mcgstatus, misc, status}
- Tell KVM to inject MCE
  echo 1 > /sys/kernel/debug/kvm/<pid>-<vm-fd>/vcpu<vcpuid>/mce-inject

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/debugfs.c          | 22 ++++++++++++++++++++++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 3 files changed, 37 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 17715cb8731d..9286f3d02f30 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
 	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_HV_TLB_FLUSH \
 	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MCE_INJECT		KVM_ARCH_REQ(33)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index ee8c4c3496ed..fee208f30400 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -56,6 +56,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n");
 
+static int vcpu_mce_inject_set(void *data, u64 val)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *) data;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (val != 1)
+		return -EINVAL;
+	kvm_make_request(KVM_REQ_MCE_INJECT, vcpu);
+	kvm_vcpu_kick(vcpu);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_mce_inject_fops, NULL, vcpu_mce_inject_set, "%llx\n");
+
 void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry)
 {
 	debugfs_create_file("guest_mode", 0444, debugfs_dentry, vcpu,
@@ -76,6 +92,12 @@ void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_
 				    debugfs_dentry, vcpu,
 				    &vcpu_tsc_scaling_frac_fops);
 	}
+
+	if (IS_ENABLED(CONFIG_X86_MCE_INJECT) &&
+	    boot_cpu_has(X86_FEATURE_MCE) && boot_cpu_has(X86_FEATURE_MCA))
+		debugfs_create_file("mce-inject", 0200,
+				    debugfs_dentry, vcpu,
+				    &vcpu_mce_inject_fops);
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9f18b06bbda6..e4c63ded4c9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10496,6 +10496,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	fastpath_t exit_fastpath;
 
 	bool req_immediate_exit = false;
+	bool req_mce_inject = false;
 
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
@@ -10642,6 +10643,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
 			static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
+
+		req_mce_inject = kvm_check_request(KVM_REQ_MCE_INJECT, vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -10676,6 +10679,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
+	if (unlikely(req_mce_inject))
+		mce_inject_lock();
 	preempt_disable();
 
 	static_call(kvm_x86_prepare_switch_to_guest)(vcpu);
@@ -10721,6 +10726,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		smp_wmb();
 		local_irq_enable();
 		preempt_enable();
+		if (unlikely(req_mce_inject)) {
+			kvm_make_request(KVM_REQ_MCE_INJECT, vcpu);
+			mce_inject_unlock();
+		}
 		kvm_vcpu_srcu_read_lock(vcpu);
 		r = 1;
 		goto cancel_injection;
@@ -10814,6 +10823,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		fpu_sync_guest_vmexit_xfd_state();
 
 	static_call(kvm_x86_handle_exit_irqoff)(vcpu);
+	if (unlikely(req_mce_inject)) {
+		mce_call_atomic_injector_chain(smp_processor_id());
+		kvm_machine_check();
+		mce_inject_unlock();
+	}
 
 	if (vcpu->arch.guest_fpu.xfd_err)
 		wrmsrl(MSR_IA32_XFD_ERR, 0);
-- 
2.25.1
Re: [PATCH 09/12] KVM: X86: Add debugfs to inject machine check on VM exit
Posted by Sean Christopherson 2 years, 2 months ago
On Tue, Oct 10, 2023, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> The KVM/x86 handles machine-check in the guest specially.  It sets up the
> guest so that vcpu exits from running guests, checks the exit reason and,
> manually raises the machine check by calling do_machine_check().
> 
> To test the KVM machine check execution path, KVM wants to inject the
> machine check in the context of vcpu instead of the context of the process
> of MCE injection.  Wire up the MCE injection framework for KVM to trigger
> MCE in the vcpu context.  Add a kvm vcpu debugfs entry for an operator to
> tell KVM to inject MCE.

But this isn't "injecting" a #MC, it's just having KVM call do_machine_check()
before enabling IRQs after a VM-Exit.  I don't see how that is interesting enough
to warrant a dedicated knob and code in KVM's run loop.

> @@ -10814,6 +10823,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  		fpu_sync_guest_vmexit_xfd_state();
>  
>  	static_call(kvm_x86_handle_exit_irqoff)(vcpu);
> +	if (unlikely(req_mce_inject)) {
> +		mce_call_atomic_injector_chain(smp_processor_id());
> +		kvm_machine_check();
> +		mce_inject_unlock();
> +	}
>  
>  	if (vcpu->arch.guest_fpu.xfd_err)
>  		wrmsrl(MSR_IA32_XFD_ERR, 0);
> -- 
> 2.25.1
>