[PATCH 1/3] KVM: gmem: allocate private data for the gmem inode

Paolo Bonzini posted 3 patches 2 weeks, 1 day ago
[PATCH 1/3] KVM: gmem: allocate private data for the gmem inode
Posted by Paolo Bonzini 2 weeks, 1 day ago
In preparation for removing the usage of the uptodate flag,
reintroduce the gmem filesystem type.  We need it in order to
free the private inode information.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/uapi/linux/magic.h |   1 +
 virt/kvm/guest_memfd.c     | 117 +++++++++++++++++++++++++++++++++----
 virt/kvm/kvm_main.c        |   7 ++-
 virt/kvm/kvm_mm.h          |   8 ++-
 4 files changed, 119 insertions(+), 14 deletions(-)

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index bb575f3ab45e..d856dd6a7ed9 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -103,5 +103,6 @@
 #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
 #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
 #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
+#define KVM_GUEST_MEM_MAGIC	0x474d454d	/* "GMEM" */
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 8f079a61a56d..3ea5a7597fd4 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,9 +4,74 @@
 #include <linux/kvm_host.h>
 #include <linux/pagemap.h>
 #include <linux/anon_inodes.h>
+#include <linux/pseudo_fs.h>
 
 #include "kvm_mm.h"
 
+/* Do all the filesystem crap just for evict_inode... */
+
+static struct vfsmount *kvm_gmem_mnt __read_mostly;
+
+static void gmem_evict_inode(struct inode *inode)
+{
+	kvfree(inode->i_private);
+	truncate_inode_pages_final(&inode->i_data);
+	clear_inode(inode);
+}
+
+static const struct super_operations gmem_super_operations = {
+	.drop_inode	= generic_delete_inode,
+	.evict_inode    = gmem_evict_inode,
+	.statfs         = simple_statfs,
+};
+
+static int gmem_init_fs_context(struct fs_context *fc)
+{
+	struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->ops = &gmem_super_operations;
+	return 0;
+}
+
+static struct file_system_type kvm_gmem_fs_type = {
+	.name           = "kvm_gmemfs",
+	.init_fs_context = gmem_init_fs_context,
+	.kill_sb        = kill_anon_super,
+};
+
+static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
+{
+	struct inode *inode;
+	struct file *file;
+
+	if (fops->owner && !try_module_get(fops->owner))
+		return ERR_PTR(-ENOENT);
+
+	inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
+	if (IS_ERR(inode)) {
+		file = ERR_CAST(inode);
+		goto err;
+	}
+	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
+	if (IS_ERR(file))
+		goto err_iput;
+
+	return file;
+
+err_iput:
+	iput(inode);
+err:
+	module_put(fops->owner);
+	return file;
+}
+
+
+struct kvm_gmem_inode {
+	unsigned long flags;
+};
+
 struct kvm_gmem {
 	struct kvm *kvm;
 	struct xarray bindings;
@@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
 	.fallocate	= kvm_gmem_fallocate,
 };
 
-void kvm_gmem_init(struct module *module)
+int kvm_gmem_init(struct module *module)
 {
+	int ret;
+
+	ret = register_filesystem(&kvm_gmem_fs_type);
+	if (ret) {
+		pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
+		return ret;
+	}
+
+	kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
+	if (IS_ERR(kvm_gmem_mnt)) {
+		pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
+		return PTR_ERR(kvm_gmem_mnt);
+	}
+
 	kvm_gmem_fops.owner = module;
+
+	return 0;
+}
+
+void kvm_gmem_exit(void)
+{
+	kern_unmount(kvm_gmem_mnt);
+	unregister_filesystem(&kvm_gmem_fs_type);
 }
 
 static int kvm_gmem_migrate_folio(struct address_space *mapping,
@@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
 
 static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 {
-	const char *anon_name = "[kvm-gmem]";
+	const char *gmem_name = "[kvm-gmem]";
+	struct kvm_gmem_inode *i_gmem;
 	struct kvm_gmem *gmem;
 	struct inode *inode;
 	struct file *file;
 	int fd, err;
 
+	i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
+	if (!i_gmem)
+		return -ENOMEM;
+	i_gmem->flags = flags;
+
 	fd = get_unused_fd_flags(0);
-	if (fd < 0)
-		return fd;
+	if (fd < 0) {
+		err = fd;
+		goto err_i_gmem;
+	}
 
 	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
 	if (!gmem) {
@@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 		goto err_fd;
 	}
 
-	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
-					 O_RDWR, NULL);
+	file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
 	if (IS_ERR(file)) {
 		err = PTR_ERR(file);
 		goto err_gmem;
 	}
 
+	inode = file->f_inode;
+
+	file->f_mapping = inode->i_mapping;
+	file->private_data = gmem;
 	file->f_flags |= O_LARGEFILE;
 
-	inode = file->f_inode;
-	WARN_ON(file->f_mapping != inode->i_mapping);
-
-	inode->i_private = (void *)(unsigned long)flags;
+	inode->i_private = i_gmem;
 	inode->i_op = &kvm_gmem_iops;
 	inode->i_mapping->a_ops = &kvm_gmem_aops;
 	inode->i_mode |= S_IFREG;
@@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
 	kfree(gmem);
 err_fd:
 	put_unused_fd(fd);
+err_i_gmem:
+	kvfree(i_gmem);
 	return err;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 279e03029ce1..8b7b4e0eb639 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
 	if (WARN_ON_ONCE(r))
 		goto err_vfio;
 
-	kvm_gmem_init(module);
+	r = kvm_gmem_init(module);
+	if (r)
+		goto err_gmem;
 
 	r = kvm_init_virtualization();
 	if (r)
@@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
 err_register:
 	kvm_uninit_virtualization();
 err_virt:
+	kvm_gmem_exit();
+err_gmem:
 	kvm_vfio_ops_exit();
 err_vfio:
 	kvm_async_pf_deinit();
@@ -6556,6 +6560,7 @@ void kvm_exit(void)
 	for_each_possible_cpu(cpu)
 		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
 	kmem_cache_destroy(kvm_vcpu_cache);
+	kvm_gmem_exit();
 	kvm_vfio_ops_exit();
 	kvm_async_pf_deinit();
 	kvm_irqfd_exit();
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 715f19669d01..91e4202574a8 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
 #endif /* HAVE_KVM_PFNCACHE */
 
 #ifdef CONFIG_KVM_PRIVATE_MEM
-void kvm_gmem_init(struct module *module);
+int kvm_gmem_init(struct module *module);
+void kvm_gmem_exit(void);
 int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
 int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
 		  unsigned int fd, loff_t offset);
 void kvm_gmem_unbind(struct kvm_memory_slot *slot);
 #else
-static inline void kvm_gmem_init(struct module *module)
+static inline void kvm_gmem_exit(void) {}
+static inline int kvm_gmem_init(struct module *module)
 {
-
+	return 0;
 }
 
 static inline int kvm_gmem_bind(struct kvm *kvm,
-- 
2.43.5
Re: [PATCH 1/3] KVM: gmem: allocate private data for the gmem inode
Posted by Sean Christopherson 1 week, 3 days ago
+Ackerley, who's also working on resurrecting the file system[*].  At a glance,
there appear to be non-trivial differences, e.g. Ackerley's version has a call
to security_inode_init_security_anon().  I've paged out much of the inode stuff,
so I trust Ackerley's judgment far, far more than my own :-)

[*] https://lore.kernel.org/all/d1940d466fc69472c8b6dda95df2e0522b2d8744.1726009989.git.ackerleytng@google.com

On Fri, Nov 08, 2024, Paolo Bonzini wrote:
> In preparation for removing the usage of the uptodate flag,
> reintroduce the gmem filesystem type.  We need it in order to
> free the private inode information.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/uapi/linux/magic.h |   1 +
>  virt/kvm/guest_memfd.c     | 117 +++++++++++++++++++++++++++++++++----
>  virt/kvm/kvm_main.c        |   7 ++-
>  virt/kvm/kvm_mm.h          |   8 ++-
>  4 files changed, 119 insertions(+), 14 deletions(-)
> 
> diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
> index bb575f3ab45e..d856dd6a7ed9 100644
> --- a/include/uapi/linux/magic.h
> +++ b/include/uapi/linux/magic.h
> @@ -103,5 +103,6 @@
>  #define DEVMEM_MAGIC		0x454d444d	/* "DMEM" */
>  #define SECRETMEM_MAGIC		0x5345434d	/* "SECM" */
>  #define PID_FS_MAGIC		0x50494446	/* "PIDF" */
> +#define KVM_GUEST_MEM_MAGIC	0x474d454d	/* "GMEM" */
>  
>  #endif /* __LINUX_MAGIC_H__ */
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 8f079a61a56d..3ea5a7597fd4 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,9 +4,74 @@
>  #include <linux/kvm_host.h>
>  #include <linux/pagemap.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/pseudo_fs.h>
>  
>  #include "kvm_mm.h"
>  
> +/* Do all the filesystem crap just for evict_inode... */
> +
> +static struct vfsmount *kvm_gmem_mnt __read_mostly;
> +
> +static void gmem_evict_inode(struct inode *inode)
> +{
> +	kvfree(inode->i_private);
> +	truncate_inode_pages_final(&inode->i_data);
> +	clear_inode(inode);
> +}
> +
> +static const struct super_operations gmem_super_operations = {
> +	.drop_inode	= generic_delete_inode,
> +	.evict_inode    = gmem_evict_inode,
> +	.statfs         = simple_statfs,
> +};
> +
> +static int gmem_init_fs_context(struct fs_context *fc)
> +{
> +	struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
> +	if (!ctx)
> +		return -ENOMEM;
> +
> +	ctx->ops = &gmem_super_operations;
> +	return 0;
> +}
> +
> +static struct file_system_type kvm_gmem_fs_type = {
> +	.name           = "kvm_gmemfs",
> +	.init_fs_context = gmem_init_fs_context,
> +	.kill_sb        = kill_anon_super,
> +};
> +
> +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
> +{
> +	struct inode *inode;
> +	struct file *file;
> +
> +	if (fops->owner && !try_module_get(fops->owner))
> +		return ERR_PTR(-ENOENT);
> +
> +	inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
> +	if (IS_ERR(inode)) {
> +		file = ERR_CAST(inode);
> +		goto err;
> +	}
> +	file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
> +	if (IS_ERR(file))
> +		goto err_iput;
> +
> +	return file;
> +
> +err_iput:
> +	iput(inode);
> +err:
> +	module_put(fops->owner);
> +	return file;
> +}
> +
> +
> +struct kvm_gmem_inode {
> +	unsigned long flags;
> +};
> +
>  struct kvm_gmem {
>  	struct kvm *kvm;
>  	struct xarray bindings;
> @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
>  	.fallocate	= kvm_gmem_fallocate,
>  };
>  
> -void kvm_gmem_init(struct module *module)
> +int kvm_gmem_init(struct module *module)
>  {
> +	int ret;
> +
> +	ret = register_filesystem(&kvm_gmem_fs_type);
> +	if (ret) {
> +		pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
> +		return ret;
> +	}
> +
> +	kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
> +	if (IS_ERR(kvm_gmem_mnt)) {
> +		pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
> +		return PTR_ERR(kvm_gmem_mnt);
> +	}
> +
>  	kvm_gmem_fops.owner = module;
> +
> +	return 0;
> +}
> +
> +void kvm_gmem_exit(void)
> +{
> +	kern_unmount(kvm_gmem_mnt);
> +	unregister_filesystem(&kvm_gmem_fs_type);
>  }
>  
>  static int kvm_gmem_migrate_folio(struct address_space *mapping,
> @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
>  
>  static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  {
> -	const char *anon_name = "[kvm-gmem]";
> +	const char *gmem_name = "[kvm-gmem]";
> +	struct kvm_gmem_inode *i_gmem;
>  	struct kvm_gmem *gmem;
>  	struct inode *inode;
>  	struct file *file;
>  	int fd, err;
>  
> +	i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
> +	if (!i_gmem)
> +		return -ENOMEM;
> +	i_gmem->flags = flags;
> +
>  	fd = get_unused_fd_flags(0);
> -	if (fd < 0)
> -		return fd;
> +	if (fd < 0) {
> +		err = fd;
> +		goto err_i_gmem;
> +	}
>  
>  	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
>  	if (!gmem) {
> @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  		goto err_fd;
>  	}
>  
> -	file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
> -					 O_RDWR, NULL);
> +	file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
>  	if (IS_ERR(file)) {
>  		err = PTR_ERR(file);
>  		goto err_gmem;
>  	}
>  
> +	inode = file->f_inode;
> +
> +	file->f_mapping = inode->i_mapping;
> +	file->private_data = gmem;
>  	file->f_flags |= O_LARGEFILE;
>  
> -	inode = file->f_inode;
> -	WARN_ON(file->f_mapping != inode->i_mapping);
> -
> -	inode->i_private = (void *)(unsigned long)flags;
> +	inode->i_private = i_gmem;
>  	inode->i_op = &kvm_gmem_iops;
>  	inode->i_mapping->a_ops = &kvm_gmem_aops;
>  	inode->i_mode |= S_IFREG;
> @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
>  	kfree(gmem);
>  err_fd:
>  	put_unused_fd(fd);
> +err_i_gmem:
> +	kvfree(i_gmem);
>  	return err;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 279e03029ce1..8b7b4e0eb639 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  	if (WARN_ON_ONCE(r))
>  		goto err_vfio;
>  
> -	kvm_gmem_init(module);
> +	r = kvm_gmem_init(module);
> +	if (r)
> +		goto err_gmem;
>  
>  	r = kvm_init_virtualization();
>  	if (r)
> @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
>  err_register:
>  	kvm_uninit_virtualization();
>  err_virt:
> +	kvm_gmem_exit();
> +err_gmem:
>  	kvm_vfio_ops_exit();
>  err_vfio:
>  	kvm_async_pf_deinit();
> @@ -6556,6 +6560,7 @@ void kvm_exit(void)
>  	for_each_possible_cpu(cpu)
>  		free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
>  	kmem_cache_destroy(kvm_vcpu_cache);
> +	kvm_gmem_exit();
>  	kvm_vfio_ops_exit();
>  	kvm_async_pf_deinit();
>  	kvm_irqfd_exit();
> diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
> index 715f19669d01..91e4202574a8 100644
> --- a/virt/kvm/kvm_mm.h
> +++ b/virt/kvm/kvm_mm.h
> @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
>  #endif /* HAVE_KVM_PFNCACHE */
>  
>  #ifdef CONFIG_KVM_PRIVATE_MEM
> -void kvm_gmem_init(struct module *module);
> +int kvm_gmem_init(struct module *module);
> +void kvm_gmem_exit(void);
>  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
>  int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
>  		  unsigned int fd, loff_t offset);
>  void kvm_gmem_unbind(struct kvm_memory_slot *slot);
>  #else
> -static inline void kvm_gmem_init(struct module *module)
> +static inline void kvm_gmem_exit(void) {}
> +static inline int kvm_gmem_init(struct module *module)
>  {
> -
> +	return 0;
>  }
>  
>  static inline int kvm_gmem_bind(struct kvm *kvm,
> -- 
> 2.43.5
> 
>