If `KVM_GMEM_NO_DIRECT_MAP` is set, all gmem folios are removed from the
direct map immediately after allocation. Add a flag to
kvm_gmem_grab_folio to overwrite this behavior, and expose it via
`kvm_gmem_get_pfn`. Only allow this flag to be set if KVM can actually
access gmem (currently only if the vm type is KVM_X86_SW_PROTECTED_VM).
KVM_GMEM_GET_PFN_SHARED defers the direct map removal for newly
allocated folios until kvm_gmem_put_shared_pfn is called. For existing
folios, the direct map entry is temporarily restored until
kvm_gmem_put_shared_pfn is called.
The folio lock must be held the entire time the folio is present in the
direct map, to prevent races with concurrent calls
kvm_gmem_folio_set_private that might remove direct map entries while
the folios are being accessed by KVM. As this is currently not possible
(kvm_gmem_get_pfn always unlocks the folio), the next patch will
introduce a KVM_GMEM_GET_PFN_LOCKED flag.
Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
---
arch/x86/kvm/mmu/mmu.c | 2 +-
include/linux/kvm_host.h | 12 +++++++++--
virt/kvm/guest_memfd.c | 46 +++++++++++++++++++++++++++++++---------
3 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 901be9e420a4c..cb2f111f2cce0 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4349,7 +4349,7 @@ static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu,
}
r = kvm_gmem_get_pfn(vcpu->kvm, fault->slot, fault->gfn, &fault->pfn,
- &max_order);
+ &max_order, 0);
if (r) {
kvm_mmu_prepare_memory_fault_exit(vcpu, fault);
return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 689e8be873a75..8a2975674de4b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2432,17 +2432,25 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
+#define KVM_GMEM_GET_PFN_SHARED BIT(0)
+#define KVM_GMEM_GET_PFN_PREPARE BIT(31) /* internal */
+
#ifdef CONFIG_KVM_PRIVATE_MEM
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order);
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags);
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn);
#else
static inline int kvm_gmem_get_pfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
- kvm_pfn_t *pfn, int *max_order)
+ kvm_pfn_t *pfn, int *max_order, int flags)
{
KVM_BUG_ON(1, kvm);
return -EIO;
}
+static inline int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn)
+{
+ return -EIO;
+}
#endif /* CONFIG_KVM_PRIVATE_MEM */
#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2ed27992206f3..492b04f4e5c18 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -55,6 +55,11 @@ static bool kvm_gmem_test_no_direct_map(struct inode *inode)
return ((unsigned long)inode->i_private & KVM_GMEM_NO_DIRECT_MAP) == KVM_GMEM_NO_DIRECT_MAP;
}
+static bool kvm_gmem_test_accessible(struct kvm *kvm)
+{
+ return kvm->arch.vm_type == KVM_X86_SW_PROTECTED_VM;
+}
+
static int kvm_gmem_folio_set_private(struct folio *folio)
{
unsigned long start, npages, i;
@@ -110,10 +115,11 @@ static int kvm_gmem_folio_clear_private(struct folio *folio)
return r;
}
-static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare)
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, unsigned long flags)
{
int r;
struct folio *folio;
+ bool share = flags & KVM_GMEM_GET_PFN_SHARED;
/* TODO: Support huge pages. */
folio = filemap_grab_folio(inode->i_mapping, index);
@@ -139,7 +145,7 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool
folio_mark_uptodate(folio);
}
- if (prepare) {
+ if (flags & KVM_GMEM_GET_PFN_PREPARE) {
r = kvm_gmem_prepare_folio(inode, index, folio);
if (r < 0)
goto out_err;
@@ -148,12 +154,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool
if (!kvm_gmem_test_no_direct_map(inode))
goto out;
- if (!folio_test_private(folio)) {
+ if (folio_test_private(folio) && share) {
+ r = kvm_gmem_folio_clear_private(folio);
+ } else if (!folio_test_private(folio) && !share) {
r = kvm_gmem_folio_set_private(folio);
- if (r)
- goto out_err;
}
+ if (r)
+ goto out_err;
+
out:
/*
* Ignore accessed, referenced, and dirty flags. The memory is
@@ -264,7 +273,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
break;
}
- folio = kvm_gmem_get_folio(inode, index, true);
+ folio = kvm_gmem_get_folio(inode, index, KVM_GMEM_GET_PFN_PREPARE);
if (IS_ERR(folio)) {
r = PTR_ERR(folio);
break;
@@ -624,7 +633,7 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot)
}
static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare)
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags)
{
pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff;
struct kvm_gmem *gmem = file->private_data;
@@ -643,7 +652,7 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
return -EIO;
}
- folio = kvm_gmem_get_folio(file_inode(file), index, prepare);
+ folio = kvm_gmem_get_folio(file_inode(file), index, flags);
if (IS_ERR(folio))
return PTR_ERR(folio);
@@ -667,20 +676,37 @@ static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot,
}
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
- gfn_t gfn, kvm_pfn_t *pfn, int *max_order)
+ gfn_t gfn, kvm_pfn_t *pfn, int *max_order, unsigned long flags)
{
struct file *file = kvm_gmem_get_file(slot);
int r;
+ int valid_flags = KVM_GMEM_GET_PFN_SHARED;
+
+ if ((flags & valid_flags) != flags)
+ return -EINVAL;
+
+ if ((flags & KVM_GMEM_GET_PFN_SHARED) && !kvm_gmem_test_accessible(kvm))
+ return -EPERM;
if (!file)
return -EFAULT;
- r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true);
+ r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, flags | KVM_GMEM_GET_PFN_PREPARE);
fput(file);
return r;
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
+int kvm_gmem_put_shared_pfn(kvm_pfn_t pfn) {
+ struct folio *folio = pfn_folio(pfn);
+
+ if (!kvm_gmem_test_no_direct_map(folio_inode(folio)))
+ return 0;
+
+ return kvm_gmem_folio_set_private(folio);
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_put_shared_pfn);
+
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
{
--
2.46.0