In preparation for removing the usage of the uptodate flag,
reintroduce the gmem filesystem type. We need it in order to
free the private inode information.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
include/uapi/linux/magic.h | 1 +
virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++----
virt/kvm/kvm_main.c | 7 ++-
virt/kvm/kvm_mm.h | 8 ++-
4 files changed, 119 insertions(+), 14 deletions(-)
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index bb575f3ab45e..d856dd6a7ed9 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -103,5 +103,6 @@
#define DEVMEM_MAGIC 0x454d444d /* "DMEM" */
#define SECRETMEM_MAGIC 0x5345434d /* "SECM" */
#define PID_FS_MAGIC 0x50494446 /* "PIDF" */
+#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */
#endif /* __LINUX_MAGIC_H__ */
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 8f079a61a56d..3ea5a7597fd4 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,9 +4,74 @@
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
#include <linux/anon_inodes.h>
+#include <linux/pseudo_fs.h>
#include "kvm_mm.h"
+/* Do all the filesystem crap just for evict_inode... */
+
+static struct vfsmount *kvm_gmem_mnt __read_mostly;
+
+static void gmem_evict_inode(struct inode *inode)
+{
+ kvfree(inode->i_private);
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+}
+
+static const struct super_operations gmem_super_operations = {
+ .drop_inode = generic_delete_inode,
+ .evict_inode = gmem_evict_inode,
+ .statfs = simple_statfs,
+};
+
+static int gmem_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->ops = &gmem_super_operations;
+ return 0;
+}
+
+static struct file_system_type kvm_gmem_fs_type = {
+ .name = "kvm_gmemfs",
+ .init_fs_context = gmem_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops)
+{
+ struct inode *inode;
+ struct file *file;
+
+ if (fops->owner && !try_module_get(fops->owner))
+ return ERR_PTR(-ENOENT);
+
+ inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb);
+ if (IS_ERR(inode)) {
+ file = ERR_CAST(inode);
+ goto err;
+ }
+ file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops);
+ if (IS_ERR(file))
+ goto err_iput;
+
+ return file;
+
+err_iput:
+ iput(inode);
+err:
+ module_put(fops->owner);
+ return file;
+}
+
+
+struct kvm_gmem_inode {
+ unsigned long flags;
+};
+
struct kvm_gmem {
struct kvm *kvm;
struct xarray bindings;
@@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = {
.fallocate = kvm_gmem_fallocate,
};
-void kvm_gmem_init(struct module *module)
+int kvm_gmem_init(struct module *module)
{
+ int ret;
+
+ ret = register_filesystem(&kvm_gmem_fs_type);
+ if (ret) {
+ pr_err("kvm-gmem: cannot register file system (%d)\n", ret);
+ return ret;
+ }
+
+ kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type);
+ if (IS_ERR(kvm_gmem_mnt)) {
+ pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt));
+ return PTR_ERR(kvm_gmem_mnt);
+ }
+
kvm_gmem_fops.owner = module;
+
+ return 0;
+}
+
+void kvm_gmem_exit(void)
+{
+ kern_unmount(kvm_gmem_mnt);
+ unregister_filesystem(&kvm_gmem_fs_type);
}
static int kvm_gmem_migrate_folio(struct address_space *mapping,
@@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = {
static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
{
- const char *anon_name = "[kvm-gmem]";
+ const char *gmem_name = "[kvm-gmem]";
+ struct kvm_gmem_inode *i_gmem;
struct kvm_gmem *gmem;
struct inode *inode;
struct file *file;
int fd, err;
+ i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL);
+ if (!i_gmem)
+ return -ENOMEM;
+ i_gmem->flags = flags;
+
fd = get_unused_fd_flags(0);
- if (fd < 0)
- return fd;
+ if (fd < 0) {
+ err = fd;
+ goto err_i_gmem;
+ }
gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
if (!gmem) {
@@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
goto err_fd;
}
- file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem,
- O_RDWR, NULL);
+ file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops);
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_gmem;
}
+ inode = file->f_inode;
+
+ file->f_mapping = inode->i_mapping;
+ file->private_data = gmem;
file->f_flags |= O_LARGEFILE;
- inode = file->f_inode;
- WARN_ON(file->f_mapping != inode->i_mapping);
-
- inode->i_private = (void *)(unsigned long)flags;
+ inode->i_private = i_gmem;
inode->i_op = &kvm_gmem_iops;
inode->i_mapping->a_ops = &kvm_gmem_aops;
inode->i_mode |= S_IFREG;
@@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags)
kfree(gmem);
err_fd:
put_unused_fd(fd);
+err_i_gmem:
+ kvfree(i_gmem);
return err;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 279e03029ce1..8b7b4e0eb639 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
if (WARN_ON_ONCE(r))
goto err_vfio;
- kvm_gmem_init(module);
+ r = kvm_gmem_init(module);
+ if (r)
+ goto err_gmem;
r = kvm_init_virtualization();
if (r)
@@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module)
err_register:
kvm_uninit_virtualization();
err_virt:
+ kvm_gmem_exit();
+err_gmem:
kvm_vfio_ops_exit();
err_vfio:
kvm_async_pf_deinit();
@@ -6556,6 +6560,7 @@ void kvm_exit(void)
for_each_possible_cpu(cpu)
free_cpumask_var(per_cpu(cpu_kick_mask, cpu));
kmem_cache_destroy(kvm_vcpu_cache);
+ kvm_gmem_exit();
kvm_vfio_ops_exit();
kvm_async_pf_deinit();
kvm_irqfd_exit();
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 715f19669d01..91e4202574a8 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
#endif /* HAVE_KVM_PFNCACHE */
#ifdef CONFIG_KVM_PRIVATE_MEM
-void kvm_gmem_init(struct module *module);
+int kvm_gmem_init(struct module *module);
+void kvm_gmem_exit(void);
int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned int fd, loff_t offset);
void kvm_gmem_unbind(struct kvm_memory_slot *slot);
#else
-static inline void kvm_gmem_init(struct module *module)
+static inline void kvm_gmem_exit(void) {}
+static inline int kvm_gmem_init(struct module *module)
{
-
+ return 0;
}
static inline int kvm_gmem_bind(struct kvm *kvm,
--
2.43.5
+Ackerley, who's also working on resurrecting the file system[*]. At a glance, there appear to be non-trivial differences, e.g. Ackerley's version has a call to security_inode_init_security_anon(). I've paged out much of the inode stuff, so I trust Ackerley's judgment far, far more than my own :-) [*] https://lore.kernel.org/all/d1940d466fc69472c8b6dda95df2e0522b2d8744.1726009989.git.ackerleytng@google.com On Fri, Nov 08, 2024, Paolo Bonzini wrote: > In preparation for removing the usage of the uptodate flag, > reintroduce the gmem filesystem type. We need it in order to > free the private inode information. > > Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> > --- > include/uapi/linux/magic.h | 1 + > virt/kvm/guest_memfd.c | 117 +++++++++++++++++++++++++++++++++---- > virt/kvm/kvm_main.c | 7 ++- > virt/kvm/kvm_mm.h | 8 ++- > 4 files changed, 119 insertions(+), 14 deletions(-) > > diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h > index bb575f3ab45e..d856dd6a7ed9 100644 > --- a/include/uapi/linux/magic.h > +++ b/include/uapi/linux/magic.h > @@ -103,5 +103,6 @@ > #define DEVMEM_MAGIC 0x454d444d /* "DMEM" */ > #define SECRETMEM_MAGIC 0x5345434d /* "SECM" */ > #define PID_FS_MAGIC 0x50494446 /* "PIDF" */ > +#define KVM_GUEST_MEM_MAGIC 0x474d454d /* "GMEM" */ > > #endif /* __LINUX_MAGIC_H__ */ > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 8f079a61a56d..3ea5a7597fd4 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -4,9 +4,74 @@ > #include <linux/kvm_host.h> > #include <linux/pagemap.h> > #include <linux/anon_inodes.h> > +#include <linux/pseudo_fs.h> > > #include "kvm_mm.h" > > +/* Do all the filesystem crap just for evict_inode... */ > + > +static struct vfsmount *kvm_gmem_mnt __read_mostly; > + > +static void gmem_evict_inode(struct inode *inode) > +{ > + kvfree(inode->i_private); > + truncate_inode_pages_final(&inode->i_data); > + clear_inode(inode); > +} > + > +static const struct super_operations gmem_super_operations = { > + .drop_inode = generic_delete_inode, > + .evict_inode = gmem_evict_inode, > + .statfs = simple_statfs, > +}; > + > +static int gmem_init_fs_context(struct fs_context *fc) > +{ > + struct pseudo_fs_context *ctx = init_pseudo(fc, KVM_GUEST_MEM_MAGIC); > + if (!ctx) > + return -ENOMEM; > + > + ctx->ops = &gmem_super_operations; > + return 0; > +} > + > +static struct file_system_type kvm_gmem_fs_type = { > + .name = "kvm_gmemfs", > + .init_fs_context = gmem_init_fs_context, > + .kill_sb = kill_anon_super, > +}; > + > +static struct file *kvm_gmem_create_file(const char *name, const struct file_operations *fops) > +{ > + struct inode *inode; > + struct file *file; > + > + if (fops->owner && !try_module_get(fops->owner)) > + return ERR_PTR(-ENOENT); > + > + inode = alloc_anon_inode(kvm_gmem_mnt->mnt_sb); > + if (IS_ERR(inode)) { > + file = ERR_CAST(inode); > + goto err; > + } > + file = alloc_file_pseudo(inode, kvm_gmem_mnt, name, O_RDWR, fops); > + if (IS_ERR(file)) > + goto err_iput; > + > + return file; > + > +err_iput: > + iput(inode); > +err: > + module_put(fops->owner); > + return file; > +} > + > + > +struct kvm_gmem_inode { > + unsigned long flags; > +}; > + > struct kvm_gmem { > struct kvm *kvm; > struct xarray bindings; > @@ -308,9 +373,31 @@ static struct file_operations kvm_gmem_fops = { > .fallocate = kvm_gmem_fallocate, > }; > > -void kvm_gmem_init(struct module *module) > +int kvm_gmem_init(struct module *module) > { > + int ret; > + > + ret = register_filesystem(&kvm_gmem_fs_type); > + if (ret) { > + pr_err("kvm-gmem: cannot register file system (%d)\n", ret); > + return ret; > + } > + > + kvm_gmem_mnt = kern_mount(&kvm_gmem_fs_type); > + if (IS_ERR(kvm_gmem_mnt)) { > + pr_err("kvm-gmem: kernel mount failed (%ld)\n", PTR_ERR(kvm_gmem_mnt)); > + return PTR_ERR(kvm_gmem_mnt); > + } > + > kvm_gmem_fops.owner = module; > + > + return 0; > +} > + > +void kvm_gmem_exit(void) > +{ > + kern_unmount(kvm_gmem_mnt); > + unregister_filesystem(&kvm_gmem_fs_type); > } > > static int kvm_gmem_migrate_folio(struct address_space *mapping, > @@ -394,15 +481,23 @@ static const struct inode_operations kvm_gmem_iops = { > > static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > { > - const char *anon_name = "[kvm-gmem]"; > + const char *gmem_name = "[kvm-gmem]"; > + struct kvm_gmem_inode *i_gmem; > struct kvm_gmem *gmem; > struct inode *inode; > struct file *file; > int fd, err; > > + i_gmem = kvzalloc(sizeof(struct kvm_gmem_inode), GFP_KERNEL); > + if (!i_gmem) > + return -ENOMEM; > + i_gmem->flags = flags; > + > fd = get_unused_fd_flags(0); > - if (fd < 0) > - return fd; > + if (fd < 0) { > + err = fd; > + goto err_i_gmem; > + } > > gmem = kzalloc(sizeof(*gmem), GFP_KERNEL); > if (!gmem) { > @@ -410,19 +505,19 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > goto err_fd; > } > > - file = anon_inode_create_getfile(anon_name, &kvm_gmem_fops, gmem, > - O_RDWR, NULL); > + file = kvm_gmem_create_file(gmem_name, &kvm_gmem_fops); > if (IS_ERR(file)) { > err = PTR_ERR(file); > goto err_gmem; > } > > + inode = file->f_inode; > + > + file->f_mapping = inode->i_mapping; > + file->private_data = gmem; > file->f_flags |= O_LARGEFILE; > > - inode = file->f_inode; > - WARN_ON(file->f_mapping != inode->i_mapping); > - > - inode->i_private = (void *)(unsigned long)flags; > + inode->i_private = i_gmem; > inode->i_op = &kvm_gmem_iops; > inode->i_mapping->a_ops = &kvm_gmem_aops; > inode->i_mode |= S_IFREG; > @@ -444,6 +539,8 @@ static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags) > kfree(gmem); > err_fd: > put_unused_fd(fd); > +err_i_gmem: > + kvfree(i_gmem); > return err; > } > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 279e03029ce1..8b7b4e0eb639 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -6504,7 +6504,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) > if (WARN_ON_ONCE(r)) > goto err_vfio; > > - kvm_gmem_init(module); > + r = kvm_gmem_init(module); > + if (r) > + goto err_gmem; > > r = kvm_init_virtualization(); > if (r) > @@ -6525,6 +6527,8 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) > err_register: > kvm_uninit_virtualization(); > err_virt: > + kvm_gmem_exit(); > +err_gmem: > kvm_vfio_ops_exit(); > err_vfio: > kvm_async_pf_deinit(); > @@ -6556,6 +6560,7 @@ void kvm_exit(void) > for_each_possible_cpu(cpu) > free_cpumask_var(per_cpu(cpu_kick_mask, cpu)); > kmem_cache_destroy(kvm_vcpu_cache); > + kvm_gmem_exit(); > kvm_vfio_ops_exit(); > kvm_async_pf_deinit(); > kvm_irqfd_exit(); > diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h > index 715f19669d01..91e4202574a8 100644 > --- a/virt/kvm/kvm_mm.h > +++ b/virt/kvm/kvm_mm.h > @@ -36,15 +36,17 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, > #endif /* HAVE_KVM_PFNCACHE */ > > #ifdef CONFIG_KVM_PRIVATE_MEM > -void kvm_gmem_init(struct module *module); > +int kvm_gmem_init(struct module *module); > +void kvm_gmem_exit(void); > int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args); > int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > unsigned int fd, loff_t offset); > void kvm_gmem_unbind(struct kvm_memory_slot *slot); > #else > -static inline void kvm_gmem_init(struct module *module) > +static inline void kvm_gmem_exit(void) {} > +static inline int kvm_gmem_init(struct module *module) > { > - > + return 0; > } > > static inline int kvm_gmem_bind(struct kvm *kvm, > -- > 2.43.5 > >
© 2016 - 2024 Red Hat, Inc.