[RFC PATCH 02/11] KVM: guest_mem: Add ioctl KVM_LINK_GUEST_MEMFD

Ackerley Tng posted 11 patches 2 years, 6 months ago
[RFC PATCH 02/11] KVM: guest_mem: Add ioctl KVM_LINK_GUEST_MEMFD
Posted by Ackerley Tng 2 years, 6 months ago
KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new
file (and fd).

Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
 include/uapi/linux/kvm.h |  8 +++++
 virt/kvm/guest_mem.c     | 73 ++++++++++++++++++++++++++++++++++++++++
 virt/kvm/kvm_main.c      | 10 ++++++
 virt/kvm/kvm_mm.h        |  7 ++++
 4 files changed, 98 insertions(+)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index eb900344a054..d0e2a2ce0df2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd {
 	__u64 reserved[6];
 };
 
+#define KVM_LINK_GUEST_MEMFD	_IOWR(KVMIO,  0xd5, struct kvm_link_guest_memfd)
+
+struct kvm_link_guest_memfd {
+	__u64 fd;
+	__u64 flags;
+	__u64 reserved[6];
+};
+
 #endif /* __LINUX_KVM_H */
diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
index 30d0ab8745ee..1b3df273f785 100644
--- a/virt/kvm/guest_mem.c
+++ b/virt/kvm/guest_mem.c
@@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
 	return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt);
 }
 
+static inline void __kvm_gmem_do_link(struct inode *inode)
+{
+	/* Refer to simple_link() */
+
+	inode->i_ctime = current_time(inode);
+	inc_nlink(inode);
+
+	/*
+	 * ihold() to add additional reference to inode for reference in dentry,
+	 * created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not
+	 * necessary when creating a new file because alloc_inode() creates
+	 * inodes with i_count = 1, which is the refcount for the dentry in the
+	 * file.
+	 */
+	ihold(inode);
+
+	/*
+	 * dget() and d_instantiate() complete the setup of a dentry, but those
+	 * have already been done in kvm_gmem_alloc_file() ->
+	 * alloc_file_pseudo()
+	 */
+}
+
+int kvm_gmem_link(struct kvm *kvm, struct kvm_link_guest_memfd *args)
+{
+	int ret;
+	int fd;
+	struct fd f;
+	struct kvm_gmem *gmem;
+	u64 flags = args->flags;
+	u64 valid_flags = 0;
+	struct inode *inode;
+	struct file *dst_file;
+
+	if (flags & ~valid_flags)
+		return -EINVAL;
+
+	f = fdget(args->fd);
+	if (!f.file)
+		return -EINVAL;
+
+	ret = -EINVAL;
+	if (f.file->f_op != &kvm_gmem_fops)
+		goto out;
+
+	/* Cannot link a gmem file with the same vm again */
+	gmem = f.file->private_data;
+	if (gmem->kvm == kvm)
+		goto out;
+
+	ret = fd = get_unused_fd_flags(0);
+	if (fd < 0)
+		goto out;
+
+	inode = file_inode(f.file);
+	dst_file = kvm_gmem_alloc_file(inode, kvm_gmem_mnt);
+	if (IS_ERR(dst_file)) {
+		ret = PTR_ERR(dst_file);
+		goto out_fd;
+	}
+
+	__kvm_gmem_do_link(inode);
+
+	fd_install(fd, dst_file);
+	return fd;
+
+out_fd:
+	put_unused_fd(fd);
+out:
+	fdput(f);
+	return ret;
+}
+
 int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
 		  unsigned int fd, loff_t offset)
 {
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ee331cf8ba54..51cc8b80ebe0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5177,6 +5177,16 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_gmem_create(kvm, &guest_memfd);
 		break;
 	}
+	case KVM_LINK_GUEST_MEMFD: {
+		struct kvm_link_guest_memfd params;
+
+		r = -EFAULT;
+		if (copy_from_user(&params, argp, sizeof(params)))
+			goto out;
+
+		r = kvm_gmem_link(kvm, &params);
+		break;
+	}
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 	}
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 798f20d612bb..f85f452133b3 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -41,6 +41,7 @@ static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
 int kvm_gmem_init(void);
 void kvm_gmem_exit(void);
 int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args);
+int kvm_gmem_link(struct kvm *kvm, struct kvm_link_guest_memfd *args);
 int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot,
 		  unsigned int fd, loff_t offset);
 void kvm_gmem_unbind(struct kvm_memory_slot *slot);
@@ -61,6 +62,12 @@ static inline int kvm_gmem_create(struct kvm *kvm,
 	return -EOPNOTSUPP;
 }
 
+static inline int kvm_gmem_link(struct kvm *kvm,
+				struct kvm_link_guest_memfd *args)
+{
+	return -EOPNOTSUPP;
+}
+
 static inline int kvm_gmem_bind(struct kvm *kvm,
 					 struct kvm_memory_slot *slot,
 					 unsigned int fd, loff_t offset)
-- 
2.41.0.640.ga95def55d0-goog
Re: [RFC PATCH 02/11] KVM: guest_mem: Add ioctl KVM_LINK_GUEST_MEMFD
Posted by Sean Christopherson 2 years, 5 months ago
On Mon, Aug 07, 2023, Ackerley Tng wrote:
> KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new
> file (and fd).
> 
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> ---
>  include/uapi/linux/kvm.h |  8 +++++
>  virt/kvm/guest_mem.c     | 73 ++++++++++++++++++++++++++++++++++++++++
>  virt/kvm/kvm_main.c      | 10 ++++++
>  virt/kvm/kvm_mm.h        |  7 ++++
>  4 files changed, 98 insertions(+)
> 
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index eb900344a054..d0e2a2ce0df2 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd {
>  	__u64 reserved[6];
>  };
>  
> +#define KVM_LINK_GUEST_MEMFD	_IOWR(KVMIO,  0xd5, struct kvm_link_guest_memfd)
> +
> +struct kvm_link_guest_memfd {
> +	__u64 fd;
> +	__u64 flags;
> +	__u64 reserved[6];
> +};
> +
>  #endif /* __LINUX_KVM_H */
> diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
> index 30d0ab8745ee..1b3df273f785 100644
> --- a/virt/kvm/guest_mem.c
> +++ b/virt/kvm/guest_mem.c
> @@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
>  	return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt);
>  }
>  
> +static inline void __kvm_gmem_do_link(struct inode *inode)
> +{
> +	/* Refer to simple_link() */
> +
> +	inode->i_ctime = current_time(inode);
> +	inc_nlink(inode);
> +
> +	/*
> +	 * ihold() to add additional reference to inode for reference in dentry,
> +	 * created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not
> +	 * necessary when creating a new file because alloc_inode() creates
> +	 * inodes with i_count = 1, which is the refcount for the dentry in the
> +	 * file.
> +	 */
> +	ihold(inode);
> +
> +	/*
> +	 * dget() and d_instantiate() complete the setup of a dentry, but those
> +	 * have already been done in kvm_gmem_alloc_file() ->
> +	 * alloc_file_pseudo()
> +	 */
> +}

Does this have to be done before the fd is exposed to userspace, or can it be
done after?  If it can be done after, I'd prefer to have the allocation helper
also install the fd, and also rename it to something that better conveys that
it's allocating more than just the file, e.g. that it allocates and initialize
kvm_gmem too.

Completely untested, but this is what I'm thinkin/hoping.

static int kvm_gmem_alloc_view(struct kvm *kvm, struct inode *inode,
			       struct vfsmount *mnt)
{
	struct file *file;
	struct kvm_gmem *gmem;

	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
	if (!gmem)
		return -ENOMEM;

	fd = get_unused_fd_flags(0);
	if (fd < 0) {
		r = fd;
		goto err_fd;
	}

	file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, &kvm_gmem_fops);
	if (IS_ERR(file)) {
		r = PTR_ERR(file);
		goto err_file;
	}

	file->f_flags |= O_LARGEFILE;
	file->f_mapping = inode->i_mapping;

	kvm_get_kvm(kvm);
	gmem->kvm = kvm;
	xa_init(&gmem->bindings);

	file->private_data = gmem;

	list_add(&gmem->entry, &inode->i_mapping->private_list);

	fd_install(fd, file);

	return 0;
err:
	put_unused_fd(fd);
err_fd:
	kfree(gmem);
	return r;
}

static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags,
			     struct vfsmount *mnt)
{
	const char *anon_name = "[kvm-gmem]";
	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
	struct inode *inode;
	struct file *file;
	int fd, err;

	inode = alloc_anon_inode(mnt->mnt_sb);
	if (IS_ERR(inode))
		return PTR_ERR(inode);

	err = security_inode_init_security_anon(inode, &qname, NULL);
	if (err)
		goto err;

	inode->i_private = (void *)(unsigned long)flags;
	inode->i_op = &kvm_gmem_iops;
	inode->i_mapping->a_ops = &kvm_gmem_aops;
	inode->i_mode |= S_IFREG;
	inode->i_size = size;
	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
	mapping_set_large_folios(inode->i_mapping);
	mapping_set_unevictable(inode->i_mapping);
	mapping_set_unmovable(inode->i_mapping);

	fd = kvm_gmem_alloc_view(kvm, inode, mnt);
	if (fd < 0) {
		err = fd;
		goto err;
	}
	return fd;
err:
	iput(inode);
	return err;
}
Re: [RFC PATCH 02/11] KVM: guest_mem: Add ioctl KVM_LINK_GUEST_MEMFD
Posted by Ackerley Tng 9 months ago
Sean Christopherson <seanjc@google.com> writes:

> On Mon, Aug 07, 2023, Ackerley Tng wrote:
>> KVM_LINK_GUEST_MEMFD will link a gmem fd's underlying inode to a new
>> file (and fd).
>>
>> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
>> ---
>>  include/uapi/linux/kvm.h |  8 +++++
>>  virt/kvm/guest_mem.c     | 73 ++++++++++++++++++++++++++++++++++++++++
>>  virt/kvm/kvm_main.c      | 10 ++++++
>>  virt/kvm/kvm_mm.h        |  7 ++++
>>  4 files changed, 98 insertions(+)
>>
>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>> index eb900344a054..d0e2a2ce0df2 100644
>> --- a/include/uapi/linux/kvm.h
>> +++ b/include/uapi/linux/kvm.h
>> @@ -2299,4 +2299,12 @@ struct kvm_create_guest_memfd {
>>  	__u64 reserved[6];
>>  };
>>
>> +#define KVM_LINK_GUEST_MEMFD	_IOWR(KVMIO,  0xd5, struct kvm_link_guest_memfd)
>> +
>> +struct kvm_link_guest_memfd {
>> +	__u64 fd;
>> +	__u64 flags;
>> +	__u64 reserved[6];
>> +};
>> +
>>  #endif /* __LINUX_KVM_H */
>> diff --git a/virt/kvm/guest_mem.c b/virt/kvm/guest_mem.c
>> index 30d0ab8745ee..1b3df273f785 100644
>> --- a/virt/kvm/guest_mem.c
>> +++ b/virt/kvm/guest_mem.c
>> @@ -477,6 +477,79 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
>>  	return __kvm_gmem_create(kvm, size, flags, kvm_gmem_mnt);
>>  }
>>
>> +static inline void __kvm_gmem_do_link(struct inode *inode)
>> +{
>> +	/* Refer to simple_link() */
>> +
>> +	inode->i_ctime = current_time(inode);
>> +	inc_nlink(inode);
>> +
>> +	/*
>> +	 * ihold() to add additional reference to inode for reference in dentry,
>> +	 * created in kvm_gmem_alloc_file() -> alloc_file_pseudo(). This is not
>> +	 * necessary when creating a new file because alloc_inode() creates
>> +	 * inodes with i_count = 1, which is the refcount for the dentry in the
>> +	 * file.
>> +	 */
>> +	ihold(inode);
>> +
>> +	/*
>> +	 * dget() and d_instantiate() complete the setup of a dentry, but those
>> +	 * have already been done in kvm_gmem_alloc_file() ->
>> +	 * alloc_file_pseudo()
>> +	 */
>> +}

Thanks Sean, we're just circling back to this series, working on a next
revision.

>
> Does this have to be done before the fd is exposed to userspace, or can it be
> done after?

Does "exposed to userspace" mean the call to get_unused_fd_flags(),
where an fd is reserved?

Do you mean to make this reservation as late as possible?

> If it can be done after, I'd prefer to have the allocation helper
> also install the fd, and also rename it to something that better conveys that
> it's allocating more than just the file, e.g. that it allocates and initialize
> kvm_gmem too.
>
> Completely untested, but this is what I'm thinkin/hoping.
>
> static int kvm_gmem_alloc_view(struct kvm *kvm, struct inode *inode,
> 			       struct vfsmount *mnt)

Will rename this kvm_gmem_alloc_view(), that naming totally makes
sense, and attaches a meaning to the struct file as a view into the
memory.

> {
> 	struct file *file;
> 	struct kvm_gmem *gmem;
>
> 	gmem = kzalloc(sizeof(*gmem), GFP_KERNEL);
> 	if (!gmem)
> 		return -ENOMEM;
>
> 	fd = get_unused_fd_flags(0);
> 	if (fd < 0) {
> 		r = fd;
> 		goto err_fd;
> 	}

Do you see the fd as part of the view? I thought the fd is just a handle
to the view (struct file).

>
> 	file = alloc_file_pseudo(inode, mnt, "kvm-gmem", O_RDWR, &kvm_gmem_fops);
> 	if (IS_ERR(file)) {
> 		r = PTR_ERR(file);
> 		goto err_file;
> 	}
>
> 	file->f_flags |= O_LARGEFILE;
> 	file->f_mapping = inode->i_mapping;
>
> 	kvm_get_kvm(kvm);
> 	gmem->kvm = kvm;
> 	xa_init(&gmem->bindings);
>
> 	file->private_data = gmem;
>
> 	list_add(&gmem->entry, &inode->i_mapping->private_list);
>
> 	fd_install(fd, file);
>
> 	return 0;
> err:
> 	put_unused_fd(fd);
> err_fd:
> 	kfree(gmem);
> 	return r;
> }
>
> static int __kvm_gmem_create(struct kvm *kvm, loff_t size, u64 flags,
> 			     struct vfsmount *mnt)
> {
> 	const char *anon_name = "[kvm-gmem]";
> 	const struct qstr qname = QSTR_INIT(anon_name, strlen(anon_name));
> 	struct inode *inode;
> 	struct file *file;
> 	int fd, err;
>
> 	inode = alloc_anon_inode(mnt->mnt_sb);
> 	if (IS_ERR(inode))
> 		return PTR_ERR(inode);
>
> 	err = security_inode_init_security_anon(inode, &qname, NULL);
> 	if (err)
> 		goto err;
>
> 	inode->i_private = (void *)(unsigned long)flags;
> 	inode->i_op = &kvm_gmem_iops;
> 	inode->i_mapping->a_ops = &kvm_gmem_aops;
> 	inode->i_mode |= S_IFREG;
> 	inode->i_size = size;
> 	mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
> 	mapping_set_large_folios(inode->i_mapping);
> 	mapping_set_unevictable(inode->i_mapping);
> 	mapping_set_unmovable(inode->i_mapping);
>
> 	fd = kvm_gmem_alloc_view(kvm, inode, mnt);
> 	if (fd < 0) {
> 		err = fd;
> 		goto err;
> 	}
> 	return fd;
> err:
> 	iput(inode);
> 	return err;
> }