[RFC PATCH 1/2] KVM: TDX: Retry in TDX when installing TD private/sept pages

Yan Zhao posted 2 patches 2 days, 17 hours ago
[RFC PATCH 1/2] KVM: TDX: Retry in TDX when installing TD private/sept pages
Posted by Yan Zhao 2 days, 17 hours ago
For tdh_mem_page_add, Just return error when TDX_OPERAND_BUSY is found.

For tdh_mem_sept_add(), tdh_mem_page_aug(),
- Return -EBUSY in KVM for TDX_OPERAND_BUSY to cause RET_PF_RETRY
  to be returned in kvm_mmu_do_page_fault()/kvm_mmu_page_fault().
- Inside TDX's EPT violation handler, retry on RET_PF_RETRY as
  long as there are no pending signals/interrupts.

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
 arch/x86/kvm/mmu/mmu.c |  2 +-
 arch/x86/kvm/vmx/tdx.c | 53 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 45 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index bd2eaa1dbebb..f16c0e7248eb 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6179,7 +6179,7 @@ int noinline kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 err
 		vcpu->stat.pf_spurious++;
 
 	if (r != RET_PF_EMULATE)
-		return 1;
+		return r;
 
 emulate:
 	return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 23e6f25dd837..60d9e9d050ad 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1705,8 +1705,9 @@ int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
 
 	err = tdh_mem_sept_add(to_kvm_tdx(kvm)->tdr_pa, gpa, tdx_level, hpa, &entry,
 			       &level_state);
-	if (unlikely(err == TDX_ERROR_SEPT_BUSY))
-		return -EAGAIN;
+	if (unlikely(err & TDX_OPERAND_BUSY))
+		return -EBUSY;
+
 	if (KVM_BUG_ON(err, kvm)) {
 		pr_tdx_error_2(TDH_MEM_SEPT_ADD, err, entry, level_state);
 		return -EIO;
@@ -1855,6 +1856,8 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
 {
 	gpa_t gpa = tdexit_gpa(vcpu);
 	unsigned long exit_qual;
+	bool local_retry = false;
+	int ret;
 
 	if (vt_is_tdx_private_gpa(vcpu->kvm, gpa)) {
 		if (tdx_is_sept_violation_unexpected_pending(vcpu)) {
@@ -1873,6 +1876,23 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
 		 * due to aliasing a single HPA to multiple GPAs.
 		 */
 		exit_qual = EPT_VIOLATION_ACC_WRITE;
+
+		/*
+		 * Mapping of private memory may meet RET_PF_RETRY due to
+		 * SEAMCALL contentions, e.g.
+		 * - TDH.MEM.PAGE.AUG/TDH.MEM.SEPT.ADD on local vCPU vs
+		 *   TDH.VP.ENTER with 0-step mitigation on a remote vCPU.
+		 * - TDH.MEM.PAGE.AUG/TDH.MEM.SEPT.ADD on local vCPU vs
+		 *   TDG.MEM.PAGE.ACCEPT on a remote vCPU.
+		 *
+		 * Retry internally in TDX to prevent exacerbating the
+		 * activation of 0-step mitigation on local vCPU.
+		 * However, despite these retries, the 0-step mitigation on the
+		 * local vCPU may still be triggered due to:
+		 * - Exiting on signals, interrupts.
+		 * - KVM_EXIT_MEMORY_FAULT.
+		 */
+		local_retry = true;
 	} else {
 		exit_qual = tdexit_exit_qual(vcpu);
 		/*
@@ -1885,7 +1905,24 @@ static int tdx_handle_ept_violation(struct kvm_vcpu *vcpu)
 	}
 
 	trace_kvm_page_fault(vcpu, tdexit_gpa(vcpu), exit_qual);
-	return __vmx_handle_ept_violation(vcpu, tdexit_gpa(vcpu), exit_qual);
+
+	while (1) {
+		ret = __vmx_handle_ept_violation(vcpu, gpa, exit_qual);
+
+		if (ret != RET_PF_RETRY || !local_retry)
+			break;
+
+		/*
+		 * Break and keep the orig return value.
+		 * Signal & irq handling will be done later in vcpu_run()
+		 */
+		if (signal_pending(current) || pi_has_pending_interrupt(vcpu) ||
+		    kvm_test_request(KVM_REQ_NMI, vcpu) || vcpu->arch.nmi_pending)
+			break;
+
+		cond_resched();
+	}
+	return ret;
 }
 
 int tdx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t fastpath)
@@ -3028,13 +3065,11 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
 	}
 
 	ret = 0;
-	do {
-		err = tdh_mem_page_add(kvm_tdx->tdr_pa, gpa, pfn_to_hpa(pfn),
-				       pfn_to_hpa(page_to_pfn(page)),
-				       &entry, &level_state);
-	} while (err == TDX_ERROR_SEPT_BUSY);
+	err = tdh_mem_page_add(kvm_tdx->tdr_pa, gpa, pfn_to_hpa(pfn),
+			       pfn_to_hpa(page_to_pfn(page)),
+			       &entry, &level_state);
 	if (err) {
-		ret = -EIO;
+		ret = unlikely(err & TDX_OPERAND_BUSY) ? -EBUSY : -EIO;
 		goto out;
 	}
 
-- 
2.43.2