KVM: pfncache: track HVA invalidations for HVA-based caches

[PATCH] KVM: pfncache: track HVA invalidations for HVA-based caches
Posted by Jeongjun Park 4 days, 11 hours ago
HVA-based gfn_to_pfn caches are not necessarily backed by a KVM memslot.
When an MMU notifier invalidation targets such an HVA, KVM's global
mmu_invalidate_seq is not guaranteed to change because that sequence is
advanced through the memslot-based invalidation path.

This matters during hva_to_pfn_retry(). The refresh path temporarily
marks the cache invalid and drops gpc->lock while resolving the HVA and
creating a kernel mapping. If an overlapping HVA invalidation completes in
that window, the notifier may observe gpc->valid == false and therefore
leave no state behind for the in-progress refresh. For an HVA outside all
memslots, the refresh cannot rely on mmu_invalidate_seq to detect the
event either.

To prevent this, we must add a per-cache HVA invalidation sequence.
Bump the sequence whenever the cached HVA overlaps an MMU notifier range,
regardless of the current valid state. Snapshot the sequence before
dropping gpc->lock in hva_to_pfn_retry(), and retry the refresh if it
changes before the new mapping is published.

Reported-by: syzbot+0948c82180d475ad24e2@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/6a0c5f2c.a00a0220.2c7954.0000.GAE@google.com/
Fixes: b9220d32799a ("KVM: x86/xen: allow shared_info to be mapped by fixed HVA")
Signed-off-by: Jeongjun Park <aha310510@gmail.com>
---
 include/linux/kvm_types.h |  1 +
 virt/kvm/pfncache.c       | 42 ++++++++++++++++++++++++++++++++-------
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index a568d8e6f4e8..ff3b8aa73561 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -85,6 +85,7 @@ struct gfn_to_pfn_cache {
 	u64 generation;
 	gpa_t gpa;
 	unsigned long uhva;
+	unsigned long hva_invalidate_seq;
 	struct kvm_memory_slot *memslot;
 	struct kvm *kvm;
 	struct list_head list;
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 728d2c1b488a..296b06482ebc 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -19,6 +19,24 @@
 
 #include "kvm_mm.h"
 
+static inline bool gpc_uhva_in_range(struct gfn_to_pfn_cache *gpc,
+			      unsigned long start, unsigned long end)
+{
+	return gpc->uhva >= start && gpc->uhva < end;
+}
+
+static inline bool gpc_should_invalidate(struct gfn_to_pfn_cache *gpc,
+				  unsigned long start, unsigned long end)
+{
+	if (!gpc_uhva_in_range(gpc, start, end))
+		return false;
+
+	if (kvm_gpc_is_hva_active(gpc))
+		return true;
+
+	return gpc->valid && !is_error_noslot_pfn(gpc->pfn);
+}
+
 /*
  * MMU notifier 'invalidate_range_start' hook.
  */
@@ -32,8 +50,7 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 		read_lock_irq(&gpc->lock);
 
 		/* Only a single page so no need to care about length */
-		if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
-		    gpc->uhva >= start && gpc->uhva < end) {
+		if (gpc_should_invalidate(gpc, start, end)) {
 			read_unlock_irq(&gpc->lock);
 
 			/*
@@ -45,9 +62,11 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 			 */
 
 			write_lock_irq(&gpc->lock);
-			if (gpc->valid && !is_error_noslot_pfn(gpc->pfn) &&
-			    gpc->uhva >= start && gpc->uhva < end)
+			if (gpc_should_invalidate(gpc, start, end)) {
+				if (kvm_gpc_is_hva_active(gpc))
+					gpc->hva_invalidate_seq++;
 				gpc->valid = false;
+			}
 			write_unlock_irq(&gpc->lock);
 			continue;
 		}
@@ -124,8 +143,11 @@ static void gpc_unmap(kvm_pfn_t pfn, void *khva)
 #endif
 }
 
-static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+static inline bool mmu_notifier_retry_cache(struct gfn_to_pfn_cache *gpc,
+					    unsigned long mmu_seq, unsigned long hva_seq)
 {
+	struct kvm *kvm = gpc->kvm;
+
 	/*
 	 * mn_active_invalidate_count acts for all intents and purposes
 	 * like mmu_invalidate_in_progress here; but the latter cannot
@@ -149,7 +171,10 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
 	 * new (incremented) value of mmu_invalidate_seq is observed.
 	 */
 	smp_rmb();
-	return kvm->mmu_invalidate_seq != mmu_seq;
+	if (kvm->mmu_invalidate_seq != mmu_seq)
+		return true;
+
+	return gpc->hva_invalidate_seq != hva_seq;
 }
 
 static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
@@ -159,6 +184,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 	kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
 	void *new_khva = NULL;
 	unsigned long mmu_seq;
+	unsigned long hva_seq;
 	struct page *page;
 
 	struct kvm_follow_pfn kfp = {
@@ -182,6 +208,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 
 	do {
 		mmu_seq = gpc->kvm->mmu_invalidate_seq;
+		hva_seq = gpc->hva_invalidate_seq;
 		smp_rmb();
 
 		write_unlock_irq(&gpc->lock);
@@ -232,7 +259,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 		 * attempting to refresh.
 		 */
 		WARN_ON_ONCE(gpc->valid);
-	} while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
+	} while (mmu_notifier_retry_cache(gpc, mmu_seq, hva_seq));
 
 	gpc->valid = true;
 	gpc->pfn = new_pfn;
@@ -391,6 +418,7 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm)
 	gpc->pfn = KVM_PFN_ERR_FAULT;
 	gpc->gpa = INVALID_GPA;
 	gpc->uhva = KVM_HVA_ERR_BAD;
+	gpc->hva_invalidate_seq = 0;
 	gpc->active = gpc->valid = false;
 }
 
--