[PATCH] KVM: x86/mmu: Only zap valid non-mirror roots in kvm_zap_gfn_range()

Yan Zhao posted 1 patch 1 week ago
arch/x86/kvm/mmu/mmu.c     | 20 ++++++++++++++++----
arch/x86/kvm/mmu/tdp_mmu.c | 11 ++++++++---
arch/x86/kvm/mmu/tdp_mmu.h |  3 ++-
3 files changed, 26 insertions(+), 8 deletions(-)
[PATCH] KVM: x86/mmu: Only zap valid non-mirror roots in kvm_zap_gfn_range()
Posted by Yan Zhao 1 week ago
Only zap valid, non-mirror roots in kvm_zap_gfn_range().

There are 3 callers to kvm_zap_gfn_range() in KVM.
(1) in __kvm_set_or_clear_apicv_inhibit().
(2) in sev_handle_rmp_fault().
(3) in kvm_noncoherent_dma_assignment_start_or_stop().

TDX inhibits apicv as soon as TD initialization occurs. Mirror roots do not
apply for SEV. So, kvm_zap_gfn_range() does no need to zap mirror roots in
cases (1) and (2).

Currently, TDX does not support the assignment of noncoherent DMA devices,
even to shared memory (there's no corresponding WBINVD emulation). Even if
TDX supports noncoherent DMA devices assignment in the future, the private
EPT (underlying the mirror roots) in TDX always forces the EPT memory types
to WB. Thus, kvm_zap_gfn_range() does not need to zap mirror roots in case
(3). Zapping only valid, non-mirror roots in kvm_zap_gfn_range() allows TDX
to avoid depending on the self-snoop feature when reusing VMX's op
get_mt_mask for TDX shared EPT.

Introduce a static helper kvm_zap_gfn_range_filtered() and have
kvm_zap_gfn_range() invoke it with the filter KVM_FILTER_SHARED.

Opportunistically, move EXPORT_SYMBOL_GPL of kvm_zap_gfn_range() closer to
the function itself.

Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
---
The code base is kvm-coco-queue.

Previously Paolo suggested dropping TDX's specific implmentation of the op
get_mt_mask and having TDX reuse VMX's implementation. [1]

Sean also suggested making the self-snoop feature a hard dependency for
enabling TDX [2].

That is because
- TDX shared EPT is able to reuse the memory type specified in VMX's code
  as long as guest MTRRs are not referenced.
- KVM does not call kvm_zap_gfn_range() when attaching/detaching
  non-coherent DMA devices when the CPU have feature self-snoop. [3]

However, [3] cannot be guaranteed after commit 9d70f3fec144 ("Revert "KVM:
VMX: Always honor guest PAT on CPUs that support self-snoop"), which was
due to a regression with the bochsdrm driver.

Although [3] may be added back in the future, rather than relying on or
waiting for that, a better approach would be to avoid zapping the private
EPT in kvm_zap_gfn_range().

[1] https://lore.kernel.org/kvm/b791a3f6-a5ab-4f7e-bb2a-d277b26ec2c4@redhat.com/
[2] https://lore.kernel.org/kvm/ZuBSNS33_ck-w6-9@google.com
[3] https://lore.kernel.org/all/20240309010929.1403984-6-seanjc@google.com
---
 arch/x86/kvm/mmu/mmu.c     | 20 ++++++++++++++++----
 arch/x86/kvm/mmu/tdp_mmu.c | 11 ++++++++---
 arch/x86/kvm/mmu/tdp_mmu.h |  3 ++-
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 8fc943824015..c2e4a4dcbfac 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6629,9 +6629,10 @@ static bool kvm_rmap_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_e
 
 /*
  * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
- * (not including it)
+ * (not including it) for VALID roots specified with attr_filter
  */
-void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+static void kvm_zap_gfn_range_filtered(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end,
+				       enum kvm_gfn_range_filter attr_filter)
 {
 	bool flush;
 
@@ -6647,7 +6648,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
 
 	if (tdp_mmu_enabled)
-		flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end, flush);
+		flush = kvm_tdp_mmu_zap_leafs(kvm, gfn_start, gfn_end,
+					      attr_filter, flush);
 
 	if (flush)
 		kvm_flush_remote_tlbs_range(kvm, gfn_start, gfn_end - gfn_start);
@@ -6657,6 +6659,17 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	write_unlock(&kvm->mmu_lock);
 }
 
+/*
+ * Invalidate (zap) SPTEs that cover GFNs from gfn_start and up to gfn_end
+ * (not including it) for all *VALID* non-mirror roots.
+ */
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
+{
+	kvm_zap_gfn_range_filtered(kvm, gfn_start, gfn_end,
+				   KVM_FILTER_SHARED);
+}
+EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
+
 static bool slot_rmap_write_protect(struct kvm *kvm,
 				    struct kvm_rmap_head *rmap_head,
 				    const struct kvm_memory_slot *slot)
@@ -6998,7 +7011,6 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 
 	return need_tlb_flush;
 }
-EXPORT_SYMBOL_GPL(kvm_zap_gfn_range);
 
 static void kvm_rmap_zap_collapsible_sptes(struct kvm *kvm,
 					   const struct kvm_memory_slot *slot)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index b0e1c4cb3004..5482f0d5d262 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1016,16 +1016,21 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
 }
 
 /*
- * Zap leaf SPTEs for the range of gfns, [start, end), for all *VALID** roots.
+ * Zap leaf SPTEs for the range of gfns, [start, end), for *VALID** roots
+ * specified with attr_filter.
  * Returns true if a TLB flush is needed before releasing the MMU lock, i.e. if
  * one or more SPTEs were zapped since the MMU lock was last acquired.
  */
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end,
+			   enum kvm_gfn_range_filter attr_filter, bool flush)
 {
+	enum kvm_tdp_mmu_root_types types;
 	struct kvm_mmu_page *root;
 
 	lockdep_assert_held_write(&kvm->mmu_lock);
-	for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1)
+
+	types = kvm_gfn_range_filter_to_root_types(kvm, attr_filter);
+	__for_each_tdp_mmu_root_yield_safe(kvm, root, -1, types)
 		flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
 
 	return flush;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 7927fa4a96e0..6e17d4d151b9 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -63,7 +63,8 @@ static inline struct kvm_mmu_page *tdp_mmu_get_root(struct kvm_vcpu *vcpu,
 	return root_to_sp(vcpu->arch.mmu->root.hpa);
 }
 
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush);
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end,
+			   enum kvm_gfn_range_filter attr_filter, bool flush);
 bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp);
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
 void kvm_tdp_mmu_invalidate_roots(struct kvm *kvm,

base-commit: 2893e1e10283b33c1412b83949ea346aa75eaf18
-- 
2.43.2