[RFC PATCH v2 02/10] kvm: gmem: Add KVM_GMEM_GET_PFN_SHARED

Patrick Roy posted 10 patches 2 months, 2 weeks ago
[RFC PATCH v2 02/10] kvm: gmem: Add KVM_GMEM_GET_PFN_SHARED
Posted by Patrick Roy 2 months, 2 weeks ago
If `KVM_GMEM_NO_DIRECT_MAP` is set, all gmem folios are removed from the
direct map immediately after allocation. Add a flag to
kvm_gmem_grab_folio to overwrite this behavior, and expose it via
`kvm_gmem_get_pfn`. Only allow this flag to be set if KVM can actually
access gmem (currently only if the vm type is KVM_X86_SW_PROTECTED_VM).

KVM_GMEM_GET_PFN_SHARED defers the direct map removal for newly
allocated folios until kvm_gmem_put_shared_pfn is called. For existing
folios, the direct map entry is temporarily restored until
kvm_gmem_put_shared_pfn is called.

The folio lock must be held the entire time the folio is present in the
direct map, to prevent races with concurrent calls
kvm_gmem_folio_set_private that might remove direct map entries while
the folios are being accessed by KVM. As this is currently not possible
(kvm_gmem_get_pfn always unlocks the folio), the next patch will
introduce a KVM_GMEM_GET_PFN_LOCKED flag.

Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
 arch/x86/kvm/mmu/mmu.c   |  2 +-
 include/linux/kvm_host.h | 12 +++++++++--
 virt/kvm/guest_memfd.c   | 46 +++++++++++++++++++++++++++++++---------
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 901be9e420a4c..cb2f111f2cce0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4349,7 +4349,7 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
 	}
 
 	r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
-			     &max_order);
+			     &max_order, 0);
 	if (r) {
 		kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
 		return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 689e8be873a75..8a2975674de4b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2432,17 +2432,25 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
 }
 #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
 
+#define KVM_GMEM_GET_PFN_SHARED         BIT(0)
+#define KVM_GMEM_GET_PFN_PREPARE        BIT(31)  /* internal */
+
 #ifdef CONFIG_KVM_PRIVATE_MEM
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags);
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn);
 #else
 static inline int kvm_gmem_get_pfn(struct kvm *kvm,
 				   struct kvm_memory_slot *slot, gfn_t gfn,
-				   kvm_pfn_t *pfn, int *max_order)
+				   kvm_pfn_t *pfn, int *max_order, int flags)
 {
 	KVM_BUG_ON(1, kvm);
 	return -EIO;
 }
+static inline int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn)
+{
+	return -EIO;
+}
 #endif /* CONFIG_KVM_PRIVATE_MEM */
 
 #ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2ed27992206f3..492b04f4e5c18 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -55,6 +55,11 @@ static bool kvm_gmem_test_no_direct_map(struct inode *inode)
 	return ((unsigned long)inode->i_private & KVM_GMEM_NO_DIRECT_MAP) == KVM_GMEM_NO_DIRECT_MAP;
 }
 
+static bool kvm_gmem_test_accessible(struct kvm *kvm)
+{
+	return kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM;
+}
+
 static int kvm_gmem_folio_set_private(struct folio *folio)
 {
 	unsigned long start, npages, i;
@@ -110,10 +115,11 @@ static int kvm_gmem_folio_clear_private(struct folio *folio)
 	return r;
 }
 
-static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, unsigned long flags)
 {
 	int r;
 	struct folio *folio;
+	bool share = flags & KVM_GMEM_GET_PFN_SHARED;
 
 	/* TODO: Support huge pages. */
 	folio = filemap_grab_folio(inode->i_mapping, index);
@@ -139,7 +145,7 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool
 		folio_mark_uptodate(folio);
 	}
 
-	if (prepare) {
+	if (flags & KVM_GMEM_GET_PFN_PREPARE) {
 		r = kvm_gmem_prepare_folio(inode, index, folio);
 		if (r < 0)
 			goto out_err;
@@ -148,12 +154,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool
 	if (!kvm_gmem_test_no_direct_map(inode))
 		goto out;
 
-	if (!folio_test_private(folio)) {
+	if (folio_test_private(folio) && share) {
+		r = kvm_gmem_folio_clear_private(folio);
+	} else if (!folio_test_private(folio) && !share) {
 		r = kvm_gmem_folio_set_private(folio);
-		if (r)
-			goto out_err;
 	}
 
+	if (r)
+		goto out_err;
+
 out:
 	/*
 	 * Ignore accessed, referenced, and dirty flags.  The memory is
@@ -264,7 +273,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
 			break;
 		}
 
-		folio = kvm_gmem_get_folio(inode, index, true);
+		folio = kvm_gmem_get_folio(inode, index, KVM_GMEM_GET_PFN_PREPARE);
 		if (IS_ERR(folio)) {
 			r = PTR_ERR(folio);
 			break;
@@ -624,7 +633,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
 }
 
 static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
-		       gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
+		       gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags)
 {
 	pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
 	struct kvm_gmem *gmem = file->private_data;
@@ -643,7 +652,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
 		return -EIO;
 	}
 
-	folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
+	folio = kvm_gmem_get_folio(file_inode(file), index, flags);
 	if (IS_ERR(folio))
 		return PTR_ERR(folio);
 
@@ -667,20 +676,37 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
 }
 
 int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
-		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+		     gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags)
 {
 	struct file *file = kvm_gmem_get_file(slot);
 	int r;
+	int valid_flags = KVM_GMEM_GET_PFN_SHARED;
+
+	if ((flags & valid_flags) != flags)
+		return -EINVAL;
+
+	if ((flags & KVM_GMEM_GET_PFN_SHARED) && !kvm_gmem_test_accessible(kvm))
+		return -EPERM;
 
 	if (!file)
 		return -EFAULT;
 
-	r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
+	r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, flags | KVM_GMEM_GET_PFN_PREPARE);
 	fput(file);
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
 
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn) {
+	struct folio *folio = pfn_folio(pfn);
+
+	if (!kvm_gmem_test_no_direct_map(folio_inode(folio)))
+		return 0;
+
+	return kvm_gmem_folio_set_private(folio);
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_put_shared_pfn);
+
 long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
 		       kvm_gmem_populate_cb post_populate, void *opaque)
 {
-- 
2.46.0