write syscall populates guest_memfd with user-supplied data in a generic
way, ie no vendor-specific preparation is performed. This is supposed
to be used in non-CoCo setups where guest memory is not
hardware-encrypted.
The following behaviour is implemented:
- only page-aligned count and offset are allowed
- if the memory is already allocated, the call will successfully
populate it
- if the memory is not allocated, the call will both allocate and
populate
- if the memory is already populated, the call will not repopulate it
Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
---
virt/kvm/guest_memfd.c | 79 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 47a9f68f7b24..e80566ef56e9 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -102,6 +102,80 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return filemap_grab_folio(inode->i_mapping, index);
}
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ pgoff_t start, end, index;
+ ssize_t ret = 0;
+
+ if (!PAGE_ALIGNED(*offset) || !PAGE_ALIGNED(count))
+ return -EINVAL;
+
+ if (*offset + count > i_size_read(file_inode(file)))
+ return -EINVAL;
+
+ if (!buf)
+ return -EINVAL;
+
+ start = *offset >> PAGE_SHIFT;
+ end = (*offset + count) >> PAGE_SHIFT;
+
+ filemap_invalidate_lock(file->f_mapping);
+
+ for (index = start; index < end; ) {
+ struct folio *folio;
+ void *vaddr;
+ pgoff_t buf_offset = (index - start) << PAGE_SHIFT;
+
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ folio = kvm_gmem_get_folio(file_inode(file), index);
+ if (IS_ERR(folio)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (folio_test_hwpoison(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = -EFAULT;
+ goto out;
+ }
+
+ if (folio_test_uptodate(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ ret = -ENOSPC;
+ goto out;
+ }
+
+ folio_unlock(folio);
+
+ vaddr = kmap_local_folio(folio, 0);
+ ret = copy_from_user(vaddr, buf + buf_offset, PAGE_SIZE);
+ if (ret)
+ ret = -EINVAL;
+ kunmap_local(vaddr);
+
+ kvm_gmem_mark_prepared(folio);
+ folio_put(folio);
+
+ index = folio_next_index(folio);
+ *offset += PAGE_SIZE;
+ }
+
+out:
+ filemap_invalidate_unlock(file->f_mapping);
+
+ return ret && start == (*offset >> PAGE_SHIFT) ?
+ ret : *offset - (start << PAGE_SHIFT);
+}
+#endif
+
static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
pgoff_t end)
{
@@ -308,6 +382,10 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
}
static struct file_operations kvm_gmem_fops = {
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+ .llseek = default_llseek,
+ .write = kvm_kmem_gmem_write,
+#endif
.open = generic_file_open,
.release = kvm_gmem_release,
.fallocate = kvm_gmem_fallocate,
@@ -423,6 +501,7 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
}
file->f_flags |= O_LARGEFILE;
+ file->f_mode |= FMODE_LSEEK | FMODE_PWRITE;
inode = file->f_inode;
WARN_ON(file->f_mapping != inode->i_mapping);
--
2.40.1
On 11/29/24 06:39, Nikita Kalyazin wrote:
>
> +#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
Another option is to use the confidential computing (coco) attributes to keep
the write operation limited to clear-text guests (diff against patch 1/2):
There are a couple of benefits and shortcomings that I've listed below the diff.
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 9aba0ba25276..b7a0c7f2f82d 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/backing-dev.h>
+#include <linux/cc_platform.h>
#include <linux/falloc.h>
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
@@ -274,7 +275,14 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return filemap_grab_folio(inode->i_mapping, index);
}
-#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+static bool kvm_has_cc(void)
+{
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ return true;
+ return false;
+}
+
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM)
static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
size_t count, loff_t *offset)
{
@@ -290,6 +298,9 @@ static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
if (!buf)
return -EINVAL;
+ if (kvm_has_cc())
+ return -EIO;
+
start = *offset >> PAGE_SHIFT;
end = (*offset + count) >> PAGE_SHIFT;
@@ -564,7 +575,7 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
}
static struct file_operations kvm_gmem_fops = {
-#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM) && !defined(CONFIG_KVM_AMD_SEV)
+#if defined(CONFIG_KVM_GENERIC_PRIVATE_MEM)
.llseek = default_llseek,
.write = kvm_kmem_gmem_write,
#endif
Advantages:
* works with multiple architectures (powerpc and x86 so far)
* enumerates specific types of coco attributes
Disadvantages:
* The platform can have an encryption attribute but still be running a guest in clear text
* Some guests could be encrypted while others are clear text
To remedy the disadvantage, the write function would need to check if guest encryption is
currently active for a specific guest.
Mike
> +static ssize_t kvm_kmem_gmem_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *offset)
> +{
> + pgoff_t start, end, index;
> + ssize_t ret = 0;
© 2016 - 2026 Red Hat, Inc.