When launching a virtual machine, Gunyah userspace allocates memory for
the guest and informs Gunyah about these memory regions through
SET_USER_MEMORY_REGION ioctl.
Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
drivers/virt/gunyah/Makefile | 2 +-
drivers/virt/gunyah/vm_mgr.c | 46 +++++++
drivers/virt/gunyah/vm_mgr.h | 28 +++-
drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++
include/uapi/linux/gunyah.h | 22 ++++
5 files changed, 319 insertions(+), 2 deletions(-)
create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c
diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile
index 03951cf82023..ff8bc4925392 100644
--- a/drivers/virt/gunyah/Makefile
+++ b/drivers/virt/gunyah/Makefile
@@ -2,5 +2,5 @@
obj-$(CONFIG_GUNYAH) += gunyah.o
-gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o
+gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o
obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o
diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c
index 0864dbd77e28..b847fde63333 100644
--- a/drivers/virt/gunyah/vm_mgr.c
+++ b/drivers/virt/gunyah/vm_mgr.c
@@ -35,14 +35,55 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm)
ghvm->vmid = vmid;
ghvm->rm = rm;
+ mutex_init(&ghvm->mm_lock);
+ INIT_LIST_HEAD(&ghvm->memory_mappings);
+
return ghvm;
}
static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ struct gunyah_vm *ghvm = filp->private_data;
+ void __user *argp = (void __user *)arg;
long r;
switch (cmd) {
+ case GH_VM_SET_USER_MEM_REGION: {
+ struct gunyah_vm_memory_mapping *mapping;
+ struct gh_userspace_memory_region region;
+
+ r = -EFAULT;
+ if (copy_from_user(®ion, argp, sizeof(region)))
+ break;
+
+ r = -EINVAL;
+ /* All other flag bits are reserved for future use */
+ if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC |
+ GH_MEM_LENT))
+ break;
+
+
+ if (region.memory_size) {
+ r = 0;
+ mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion);
+ if (IS_ERR(mapping)) {
+ r = PTR_ERR(mapping);
+ break;
+ }
+ } else {
+ mapping = gh_vm_mem_mapping_find(ghvm, region.label);
+ if (IS_ERR(mapping)) {
+ r = PTR_ERR(mapping);
+ break;
+ }
+ r = 0;
+ if (!mapping)
+ break;
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+ kfree(mapping);
+ }
+ break;
+ }
default:
r = -ENOTTY;
break;
@@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static int gh_vm_release(struct inode *inode, struct file *filp)
{
struct gunyah_vm *ghvm = filp->private_data;
+ struct gunyah_vm_memory_mapping *mapping, *tmp;
+ list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) {
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+ kfree(mapping);
+ }
put_gh_rm(ghvm->rm);
kfree(ghvm);
return 0;
diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h
index e47f34de7f9e..6b38bf780f76 100644
--- a/drivers/virt/gunyah/vm_mgr.h
+++ b/drivers/virt/gunyah/vm_mgr.h
@@ -7,14 +7,40 @@
#define _GH_PRIV_VM_MGR_H
#include <linux/gunyah_rsc_mgr.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
#include <uapi/linux/gunyah.h>
long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg);
+enum gunyah_vm_mem_share_type {
+ VM_MEM_SHARE,
+ VM_MEM_LEND,
+};
+
+struct gunyah_vm_memory_mapping {
+ struct list_head list;
+ enum gunyah_vm_mem_share_type share_type;
+ struct gh_rm_mem_parcel parcel;
+
+ __u64 guest_phys_addr;
+ struct page **pages;
+ unsigned long npages;
+};
+
struct gunyah_vm {
u16 vmid;
- struct gh_rm_rpc *rm;
+ struct gh_rm *rm;
+
+ struct mutex mm_lock;
+ struct list_head memory_mappings;
};
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+ struct gh_userspace_memory_region *region);
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping);
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label);
+
#endif
diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c
new file mode 100644
index 000000000000..f2dbdb4ee8ab
--- /dev/null
+++ b/drivers/virt/gunyah/vm_mgr_mm.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "gh_vm_mgr: " fmt
+
+#include <linux/gunyah_rsc_mgr.h>
+#include <linux/mm.h>
+
+#include <uapi/linux/gunyah.h>
+
+#include "vm_mgr.h"
+
+static inline bool page_contiguous(phys_addr_t p, phys_addr_t t)
+{
+ return t - p == PAGE_SIZE;
+}
+
+static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+ struct gunyah_vm_memory_mapping *mapping;
+
+
+ list_for_each_entry(mapping, &ghvm->memory_mappings, list)
+ if (mapping->parcel.label == label)
+ return mapping;
+
+ return NULL;
+}
+
+void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping)
+{
+ int i, ret = 0;
+
+ if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) {
+ ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel);
+ if (ret)
+ pr_warn("Failed to reclaim memory parcel for label %d: %d\n",
+ mapping->parcel.label, ret);
+ }
+
+ if (!ret)
+ for (i = 0; i < mapping->npages; i++)
+ unpin_user_page(mapping->pages[i]);
+
+ kfree(mapping->pages);
+ kfree(mapping->parcel.acl_entries);
+ kfree(mapping->parcel.mem_entries);
+
+ mutex_lock(&ghvm->mm_lock);
+ list_del(&mapping->list);
+ mutex_unlock(&ghvm->mm_lock);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label)
+{
+ struct gunyah_vm_memory_mapping *mapping;
+ int ret;
+
+ ret = mutex_lock_interruptible(&ghvm->mm_lock);
+ if (ret)
+ return ERR_PTR(ret);
+ mapping = __gh_vm_mem_mapping_find(ghvm, label);
+ mutex_unlock(&ghvm->mm_lock);
+ return mapping ? : ERR_PTR(-ENODEV);
+}
+
+struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm,
+ struct gh_userspace_memory_region *region)
+{
+ phys_addr_t curr_page, prev_page;
+ struct gunyah_vm_memory_mapping *mapping, *tmp_mapping;
+ struct gh_rm_mem_entry *mem_entries;
+ int i, j, pinned, ret = 0;
+ struct gh_rm_mem_parcel *parcel;
+ size_t entry_size;
+ u16 vmid;
+
+ if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) ||
+ !PAGE_ALIGNED(region->userspace_addr))
+ return ERR_PTR(-EINVAL);
+
+ if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT))
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = mutex_lock_interruptible(&ghvm->mm_lock);
+ if (ret)
+ return ERR_PTR(ret);
+ mapping = __gh_vm_mem_mapping_find(ghvm, region->label);
+ if (mapping) {
+ mutex_unlock(&ghvm->mm_lock);
+ return ERR_PTR(-EEXIST);
+ }
+
+ mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+ if (!mapping) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ mapping->parcel.label = region->label;
+ mapping->guest_phys_addr = region->guest_phys_addr;
+ mapping->npages = region->memory_size >> PAGE_SHIFT;
+ parcel = &mapping->parcel;
+ parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */
+ parcel->mem_type = GH_RM_MEM_TYPE_NORMAL;
+
+ /* Check for overlap */
+ list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) {
+ if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <=
+ tmp_mapping->guest_phys_addr) ||
+ (mapping->guest_phys_addr >=
+ tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) {
+ ret = -EEXIST;
+ goto unlock;
+ }
+ }
+
+ list_add(&mapping->list, &ghvm->memory_mappings);
+unlock:
+ mutex_unlock(&ghvm->mm_lock);
+ if (ret)
+ goto free_mapping;
+
+ mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL);
+ if (!mapping->pages) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages,
+ FOLL_WRITE | FOLL_LONGTERM, mapping->pages);
+ if (pinned < 0) {
+ ret = pinned;
+ goto reclaim;
+ } else if (pinned != mapping->npages) {
+ ret = -EFAULT;
+ mapping->npages = pinned; /* update npages for reclaim */
+ goto reclaim;
+ }
+
+ if (region->flags & GH_MEM_LENT) {
+ parcel->n_acl_entries = 1;
+ mapping->share_type = VM_MEM_LEND;
+ } else {
+ parcel->n_acl_entries = 2;
+ mapping->share_type = VM_MEM_SHARE;
+ }
+ parcel->acl_entries = kcalloc(parcel->n_acl_entries,
+ sizeof(*parcel->acl_entries),
+ GFP_KERNEL);
+ if (!parcel->acl_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid);
+ if (region->flags & GH_MEM_ALLOW_READ)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_R;
+ if (region->flags & GH_MEM_ALLOW_WRITE)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_W;
+ if (region->flags & GH_MEM_ALLOW_EXEC)
+ parcel->acl_entries[0].perms |= GH_RM_ACL_X;
+
+ if (mapping->share_type == VM_MEM_SHARE) {
+ ret = gh_rm_get_vmid(ghvm->rm, &vmid);
+ if (ret) {
+ if (ret > 0) {
+ pr_warn("RM failed to get this VM's VMID: %d", ret);
+ ret = -EINVAL;
+ }
+ goto reclaim;
+ }
+
+ parcel->acl_entries[1].vmid = cpu_to_le16(vmid);
+ /* Host assumed to have all these permissions. Gunyah will not
+ * grant new permissions if host actually had less than RWX
+ */
+ parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X;
+ }
+
+ mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL);
+ if (!mem_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ /* reduce number of entries by combining contiguous pages into single memory entry */
+ prev_page = page_to_phys(mapping->pages[0]);
+ mem_entries[0].ipa_base = cpu_to_le64(prev_page);
+ entry_size = PAGE_SIZE;
+ for (i = 1, j = 0; i < mapping->npages; i++) {
+ curr_page = page_to_phys(mapping->pages[i]);
+ if (page_contiguous(prev_page, curr_page)) {
+ entry_size += PAGE_SIZE;
+ } else {
+ mem_entries[j].size = cpu_to_le64(entry_size);
+ j++;
+ mem_entries[j].ipa_base = cpu_to_le64(curr_page);
+ entry_size = PAGE_SIZE;
+ }
+
+ prev_page = curr_page;
+ }
+ mem_entries[j].size = cpu_to_le64(entry_size);
+
+ parcel->n_mem_entries = j + 1;
+ parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries,
+ GFP_KERNEL);
+ kfree(mem_entries);
+ if (!parcel->mem_entries) {
+ ret = -ENOMEM;
+ goto reclaim;
+ }
+
+ return mapping;
+reclaim:
+ gh_vm_mem_mapping_reclaim(ghvm, mapping);
+free_mapping:
+ kfree(mapping);
+ return ERR_PTR(ret);
+}
diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h
index 88a40d6e0b96..574f33b198d0 100644
--- a/include/uapi/linux/gunyah.h
+++ b/include/uapi/linux/gunyah.h
@@ -20,4 +20,26 @@
*/
#define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */
+/*
+ * ioctls for VM fds
+ */
+struct gh_userspace_memory_region {
+ __u32 label;
+#define GH_MEM_ALLOW_READ (1UL << 0)
+#define GH_MEM_ALLOW_WRITE (1UL << 1)
+#define GH_MEM_ALLOW_EXEC (1UL << 2)
+/*
+ * The guest will be lent the memory instead of shared.
+ * In other words, the guest has exclusive access to the memory region and the host loses access.
+ */
+#define GH_MEM_LENT (1UL << 3)
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 userspace_addr;
+};
+
+#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \
+ struct gh_userspace_memory_region)
+
#endif
--
2.39.0
On 20/01/2023 22:46, Elliot Berman wrote: > When launching a virtual machine, Gunyah userspace allocates memory for > the guest and informs Gunyah about these memory regions through > SET_USER_MEMORY_REGION ioctl. > > Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> > Signed-off-by: Elliot Berman <quic_eberman@quicinc.com> > --- > drivers/virt/gunyah/Makefile | 2 +- > drivers/virt/gunyah/vm_mgr.c | 46 +++++++ > drivers/virt/gunyah/vm_mgr.h | 28 +++- > drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++ > include/uapi/linux/gunyah.h | 22 ++++ > 5 files changed, 319 insertions(+), 2 deletions(-) > create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c > > diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile > index 03951cf82023..ff8bc4925392 100644 > --- a/drivers/virt/gunyah/Makefile > +++ b/drivers/virt/gunyah/Makefile > @@ -2,5 +2,5 @@ > > obj-$(CONFIG_GUNYAH) += gunyah.o > > -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o > +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o > obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o > diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c > index 0864dbd77e28..b847fde63333 100644 > --- a/drivers/virt/gunyah/vm_mgr.c > +++ b/drivers/virt/gunyah/vm_mgr.c > @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm *gunyah_vm_alloc(struct gh_rm_rpc *rm) > ghvm->vmid = vmid; > ghvm->rm = rm; > > + mutex_init(&ghvm->mm_lock); > + INIT_LIST_HEAD(&ghvm->memory_mappings); > + > return ghvm; > } > > static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > { > + struct gunyah_vm *ghvm = filp->private_data; > + void __user *argp = (void __user *)arg; > long r; > > switch (cmd) { > + case GH_VM_SET_USER_MEM_REGION: { > + struct gunyah_vm_memory_mapping *mapping; > + struct gh_userspace_memory_region region; > + > + r = -EFAULT; > + if (copy_from_user(®ion, argp, sizeof(region))) > + break; Why not be explict about the error codes, do something like. if (copy_from_user(®ion, argp, sizeof(region))) return -EFAULT; setting r value everytime before starting any code is making the code more reader unfriendly. > + > + r = -EINVAL; > + /* All other flag bits are reserved for future use */ > + if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | GH_MEM_ALLOW_EXEC | > + GH_MEM_LENT)) > + break; > + > + > + if (region.memory_size) { This behaviour allocating memory in presense of valid memory_size and finding memory in cases of zero size needs to be described properly in the uapi so that the users are aware of this. > + r = 0; > + mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion); > + if (IS_ERR(mapping)) { > + r = PTR_ERR(mapping); > + break; > + } > + } else { > + mapping = gh_vm_mem_mapping_find(ghvm, region.label); > + if (IS_ERR(mapping)) { > + r = PTR_ERR(mapping); > + break; > + } > + r = 0; > + if (!mapping) > + break; > + gh_vm_mem_mapping_reclaim(ghvm, mapping); > + kfree(mapping); > + } > + break; > + } > default: > r = -ENOTTY; > break; > @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > static int gh_vm_release(struct inode *inode, struct file *filp) > { > struct gunyah_vm *ghvm = filp->private_data; > + struct gunyah_vm_memory_mapping *mapping, *tmp; > Locking? > + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, list) { > + gh_vm_mem_mapping_reclaim(ghvm, mapping); > + kfree(mapping); > + } > put_gh_rm(ghvm->rm); > kfree(ghvm); > return 0; > diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h > index e47f34de7f9e..6b38bf780f76 100644 > --- a/drivers/virt/gunyah/vm_mgr.h > +++ b/drivers/virt/gunyah/vm_mgr.h > @@ -7,14 +7,40 @@ > #define _GH_PRIV_VM_MGR_H > > #include <linux/gunyah_rsc_mgr.h> > +#include <linux/list.h> > +#include <linux/miscdevice.h> > +#include <linux/mutex.h> > > #include <uapi/linux/gunyah.h> > > long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, unsigned long arg); > > +enum gunyah_vm_mem_share_type { > + VM_MEM_SHARE, > + VM_MEM_LEND, > +}; > + > +struct gunyah_vm_memory_mapping { > + struct list_head list; > + enum gunyah_vm_mem_share_type share_type; > + struct gh_rm_mem_parcel parcel; > + > + __u64 guest_phys_addr; > + struct page **pages; > + unsigned long npages; > +}; > + > struct gunyah_vm { > u16 vmid; > - struct gh_rm_rpc *rm; > + struct gh_rm *rm; > + > + struct mutex mm_lock; > + struct list_head memory_mappings; > }; > > +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm, > + struct gh_userspace_memory_region *region); > +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping); > +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label); > + > #endif > diff --git a/drivers/virt/gunyah/vm_mgr_mm.c b/drivers/virt/gunyah/vm_mgr_mm.c > new file mode 100644 > index 000000000000..f2dbdb4ee8ab > --- /dev/null > +++ b/drivers/virt/gunyah/vm_mgr_mm.c > @@ -0,0 +1,223 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All rights reserved. > + */ > + > +#define pr_fmt(fmt) "gh_vm_mgr: " fmt > + > +#include <linux/gunyah_rsc_mgr.h> > +#include <linux/mm.h> > + > +#include <uapi/linux/gunyah.h> > + > +#include "vm_mgr.h" > + > +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t) > +{ > + return t - p == PAGE_SIZE; > +} > + > +static struct gunyah_vm_memory_mapping *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label) > +{ > + struct gunyah_vm_memory_mapping *mapping; > + > + only one line. > + list_for_each_entry(mapping, &ghvm->memory_mappings, list) > + if (mapping->parcel.label == label) > + return mapping; > + > + return NULL; > +} > + > +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct gunyah_vm_memory_mapping *mapping) > +{ > + int i, ret = 0; > + > + if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) { > + ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel); > + if (ret) > + pr_warn("Failed to reclaim memory parcel for label %d: %d\n", > + mapping->parcel.label, ret); > + } > + > + if (!ret) > + for (i = 0; i < mapping->npages; i++) > + unpin_user_page(mapping->pages[i]); > + > + kfree(mapping->pages); > + kfree(mapping->parcel.acl_entries); > + kfree(mapping->parcel.mem_entries); > + > + mutex_lock(&ghvm->mm_lock); > + list_del(&mapping->list); > + mutex_unlock(&ghvm->mm_lock); > +} > + > +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label) > +{ > + struct gunyah_vm_memory_mapping *mapping; > + int ret; > + > + ret = mutex_lock_interruptible(&ghvm->mm_lock); > + if (ret) > + return ERR_PTR(ret); > + mapping = __gh_vm_mem_mapping_find(ghvm, label); > + mutex_unlock(&ghvm->mm_lock); > + return mapping ? : ERR_PTR(-ENODEV); > +} > + > +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct gunyah_vm *ghvm, > + struct gh_userspace_memory_region *region) > +{ Is this a static functoin or an exported symbol? > + phys_addr_t curr_page, prev_page; > + struct gunyah_vm_memory_mapping *mapping, *tmp_mapping; > + struct gh_rm_mem_entry *mem_entries; > + int i, j, pinned, ret = 0; > + struct gh_rm_mem_parcel *parcel; > + size_t entry_size; > + u16 vmid; > + Reverse christmas tree to sor local variables would be nice. > + if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) || > + !PAGE_ALIGNED(region->userspace_addr)) Even this alignment needs some documentation. Or why not just let the user only pass number of pages instead of size? > + return ERR_PTR(-EINVAL); > + > + if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT)) > + return ERR_PTR(-EOPNOTSUPP); We should proabably move this as very first check while handling this IOCTL. > + > + ret = mutex_lock_interruptible(&ghvm->mm_lock); > + if (ret) > + return ERR_PTR(ret); > + mapping = __gh_vm_mem_mapping_find(ghvm, region->label); so label is unique and userspace proabably aware of this? Can we have more than one userspace doing this? and if so how can it ensure that each label is unique? > + if (mapping) { > + mutex_unlock(&ghvm->mm_lock); > + return ERR_PTR(-EEXIST); > + } > + > + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); > + if (!mapping) { > + ret = -ENOMEM; > + goto unlock; > + } > + > + mapping->parcel.label = region->label; > + mapping->guest_phys_addr = region->guest_phys_addr; > + mapping->npages = region->memory_size >> PAGE_SHIFT; > + parcel = &mapping->parcel; > + parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later by mem_share/mem_lend */ > + parcel->mem_type = GH_RM_MEM_TYPE_NORMAL; > + > + /* Check for overlap */ > + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) { > + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <= > + tmp_mapping->guest_phys_addr) || > + (mapping->guest_phys_addr >= > + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) { > + ret = -EEXIST; > + goto unlock; > + } > + } This looks like we will loop every mappign for each allocation giving us an O(n), How frequent and how many max mappings can be there in the system? > + > + list_add(&mapping->list, &ghvm->memory_mappings); > +unlock: > + mutex_unlock(&ghvm->mm_lock); > + if (ret) > + goto free_mapping; > + > + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL); > + if (!mapping->pages) { > + ret = -ENOMEM; > + goto reclaim; > + } > + > + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages, > + FOLL_WRITE | FOLL_LONGTERM, mapping->pages); > + if (pinned < 0) { > + ret = pinned; > + goto reclaim; > + } else if (pinned != mapping->npages) { > + ret = -EFAULT; > + mapping->npages = pinned; /* update npages for reclaim */ > + goto reclaim; > + } > + > + if (region->flags & GH_MEM_LENT) { > + parcel->n_acl_entries = 1; > + mapping->share_type = VM_MEM_LEND; > + } else { > + parcel->n_acl_entries = 2; > + mapping->share_type = VM_MEM_SHARE; > + } > + parcel->acl_entries = kcalloc(parcel->n_acl_entries, > + sizeof(*parcel->acl_entries), > + GFP_KERNEL); > + if (!parcel->acl_entries) { > + ret = -ENOMEM; > + goto reclaim; > + } > + > + parcel->acl_entries[0].vmid = cpu_to_le16(ghvm->vmid); > + if (region->flags & GH_MEM_ALLOW_READ) > + parcel->acl_entries[0].perms |= GH_RM_ACL_R; > + if (region->flags & GH_MEM_ALLOW_WRITE) > + parcel->acl_entries[0].perms |= GH_RM_ACL_W; > + if (region->flags & GH_MEM_ALLOW_EXEC) > + parcel->acl_entries[0].perms |= GH_RM_ACL_X; > + > + if (mapping->share_type == VM_MEM_SHARE) { > + ret = gh_rm_get_vmid(ghvm->rm, &vmid); > + if (ret) { > + if (ret > 0) { > + pr_warn("RM failed to get this VM's VMID: %d", ret); > + ret = -EINVAL; > + } > + goto reclaim; > + } > + > + parcel->acl_entries[1].vmid = cpu_to_le16(vmid); > + /* Host assumed to have all these permissions. Gunyah will not > + * grant new permissions if host actually had less than RWX > + */ > + parcel->acl_entries[1].perms |= GH_RM_ACL_R | GH_RM_ACL_W | GH_RM_ACL_X; > + } > + > + mem_entries = kcalloc(mapping->npages, sizeof(*mem_entries), GFP_KERNEL); > + if (!mem_entries) { > + ret = -ENOMEM; > + goto reclaim; > + } > + > + /* reduce number of entries by combining contiguous pages into single memory entry */ > + prev_page = page_to_phys(mapping->pages[0]); > + mem_entries[0].ipa_base = cpu_to_le64(prev_page); > + entry_size = PAGE_SIZE; > + for (i = 1, j = 0; i < mapping->npages; i++) { > + curr_page = page_to_phys(mapping->pages[i]); > + if (page_contiguous(prev_page, curr_page)) { > + entry_size += PAGE_SIZE; > + } else { > + mem_entries[j].size = cpu_to_le64(entry_size); > + j++; > + mem_entries[j].ipa_base = cpu_to_le64(curr_page); > + entry_size = PAGE_SIZE; > + } > + > + prev_page = curr_page; > + } > + mem_entries[j].size = cpu_to_le64(entry_size); > + > + parcel->n_mem_entries = j + 1; > + parcel->mem_entries = kmemdup(mem_entries, sizeof(*mem_entries) * parcel->n_mem_entries, > + GFP_KERNEL); > + kfree(mem_entries); > + if (!parcel->mem_entries) { > + ret = -ENOMEM; > + goto reclaim; > + } > + > + return mapping; > +reclaim: > + gh_vm_mem_mapping_reclaim(ghvm, mapping); > +free_mapping: > + kfree(mapping); > + return ERR_PTR(ret); > +} > diff --git a/include/uapi/linux/gunyah.h b/include/uapi/linux/gunyah.h > index 88a40d6e0b96..574f33b198d0 100644 > --- a/include/uapi/linux/gunyah.h > +++ b/include/uapi/linux/gunyah.h > @@ -20,4 +20,26 @@ > */ > #define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a Gunyah VM fd */ > > +/* > + * ioctls for VM fds > + */ > +struct gh_userspace_memory_region { This struct needs some kernedoc. > + __u32 label; > +#define GH_MEM_ALLOW_READ (1UL << 0) > +#define GH_MEM_ALLOW_WRITE (1UL << 1) > +#define GH_MEM_ALLOW_EXEC (1UL << 2) > +/* > + * The guest will be lent the memory instead of shared. > + * In other words, the guest has exclusive access to the memory region and the host loses access. > + */ > +#define GH_MEM_LENT (1UL << 3) > + __u32 flags; > + __u64 guest_phys_addr; > + __u64 memory_size; if we are only expecting pages, this should probably be make explict by using nr_pages instead of size > + __u64 userspace_addr; > +}; > + > +#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ > + struct gh_userspace_memory_region) > + > #endif
On 2/6/2023 8:12 AM, Srinivas Kandagatla wrote: > > > On 20/01/2023 22:46, Elliot Berman wrote: >> When launching a virtual machine, Gunyah userspace allocates memory for >> the guest and informs Gunyah about these memory regions through >> SET_USER_MEMORY_REGION ioctl. >> >> Co-developed-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> >> Signed-off-by: Prakruthi Deepak Heragu <quic_pheragu@quicinc.com> >> Signed-off-by: Elliot Berman <quic_eberman@quicinc.com> >> --- >> drivers/virt/gunyah/Makefile | 2 +- >> drivers/virt/gunyah/vm_mgr.c | 46 +++++++ >> drivers/virt/gunyah/vm_mgr.h | 28 +++- >> drivers/virt/gunyah/vm_mgr_mm.c | 223 ++++++++++++++++++++++++++++++++ >> include/uapi/linux/gunyah.h | 22 ++++ >> 5 files changed, 319 insertions(+), 2 deletions(-) >> create mode 100644 drivers/virt/gunyah/vm_mgr_mm.c >> >> diff --git a/drivers/virt/gunyah/Makefile b/drivers/virt/gunyah/Makefile >> index 03951cf82023..ff8bc4925392 100644 >> --- a/drivers/virt/gunyah/Makefile >> +++ b/drivers/virt/gunyah/Makefile >> @@ -2,5 +2,5 @@ >> obj-$(CONFIG_GUNYAH) += gunyah.o >> -gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o >> +gunyah_rsc_mgr-y += rsc_mgr.o rsc_mgr_rpc.o vm_mgr.o vm_mgr_mm.o >> obj-$(CONFIG_GUNYAH) += gunyah_rsc_mgr.o >> diff --git a/drivers/virt/gunyah/vm_mgr.c b/drivers/virt/gunyah/vm_mgr.c >> index 0864dbd77e28..b847fde63333 100644 >> --- a/drivers/virt/gunyah/vm_mgr.c >> +++ b/drivers/virt/gunyah/vm_mgr.c >> @@ -35,14 +35,55 @@ static __must_check struct gunyah_vm >> *gunyah_vm_alloc(struct gh_rm_rpc *rm) >> ghvm->vmid = vmid; >> ghvm->rm = rm; >> + mutex_init(&ghvm->mm_lock); >> + INIT_LIST_HEAD(&ghvm->memory_mappings); >> + >> return ghvm; >> } >> static long gh_vm_ioctl(struct file *filp, unsigned int cmd, >> unsigned long arg) >> { >> + struct gunyah_vm *ghvm = filp->private_data; >> + void __user *argp = (void __user *)arg; >> long r; >> switch (cmd) { >> + case GH_VM_SET_USER_MEM_REGION: { >> + struct gunyah_vm_memory_mapping *mapping; >> + struct gh_userspace_memory_region region; >> + >> + r = -EFAULT; >> + if (copy_from_user(®ion, argp, sizeof(region))) >> + break; > Why not be explict about the error codes, do something like. > > if (copy_from_user(®ion, argp, sizeof(region))) > return -EFAULT; > > > setting r value everytime before starting any code is making the code > more reader unfriendly. > > Done. >> + >> + r = -EINVAL; >> + /* All other flag bits are reserved for future use */ >> + if (region.flags & ~(GH_MEM_ALLOW_READ | GH_MEM_ALLOW_WRITE | >> GH_MEM_ALLOW_EXEC | >> + GH_MEM_LENT)) >> + break; >> + >> + >> + if (region.memory_size) { > > This behaviour allocating memory in presense of valid memory_size and > finding memory in cases of zero size needs to be described properly in > the uapi so that the users are aware of this. > This behavior is described in "Document Gunyah VM Manager": https://lore.kernel.org/all/20230120224627.4053418-20-quic_eberman@quicinc.com/ >> + r = 0; >> + mapping = gh_vm_mem_mapping_alloc(ghvm, ®ion); >> + if (IS_ERR(mapping)) { >> + r = PTR_ERR(mapping); >> + break; >> + } >> + } else { >> + mapping = gh_vm_mem_mapping_find(ghvm, region.label); >> + if (IS_ERR(mapping)) { >> + r = PTR_ERR(mapping); >> + break; >> + } >> + r = 0; >> + if (!mapping) >> + break; >> + gh_vm_mem_mapping_reclaim(ghvm, mapping); >> + kfree(mapping); >> + } >> + break; >> + } >> default: >> r = -ENOTTY; >> break; >> @@ -54,7 +95,12 @@ static long gh_vm_ioctl(struct file *filp, unsigned >> int cmd, unsigned long arg) >> static int gh_vm_release(struct inode *inode, struct file *filp) >> { >> struct gunyah_vm *ghvm = filp->private_data; >> + struct gunyah_vm_memory_mapping *mapping, *tmp; > Locking? >> + list_for_each_entry_safe(mapping, tmp, &ghvm->memory_mappings, >> list) { >> + gh_vm_mem_mapping_reclaim(ghvm, mapping); >> + kfree(mapping); >> + } >> put_gh_rm(ghvm->rm); >> kfree(ghvm); >> return 0; >> diff --git a/drivers/virt/gunyah/vm_mgr.h b/drivers/virt/gunyah/vm_mgr.h >> index e47f34de7f9e..6b38bf780f76 100644 >> --- a/drivers/virt/gunyah/vm_mgr.h >> +++ b/drivers/virt/gunyah/vm_mgr.h >> @@ -7,14 +7,40 @@ >> #define _GH_PRIV_VM_MGR_H >> #include <linux/gunyah_rsc_mgr.h> >> +#include <linux/list.h> >> +#include <linux/miscdevice.h> >> +#include <linux/mutex.h> >> #include <uapi/linux/gunyah.h> >> long gh_dev_vm_mgr_ioctl(struct gh_rm *rm, unsigned int cmd, >> unsigned long arg); >> +enum gunyah_vm_mem_share_type { >> + VM_MEM_SHARE, >> + VM_MEM_LEND, >> +}; >> + >> +struct gunyah_vm_memory_mapping { >> + struct list_head list; >> + enum gunyah_vm_mem_share_type share_type; >> + struct gh_rm_mem_parcel parcel; >> + >> + __u64 guest_phys_addr; >> + struct page **pages; >> + unsigned long npages; >> +}; >> + >> struct gunyah_vm { >> u16 vmid; >> - struct gh_rm_rpc *rm; >> + struct gh_rm *rm; >> + >> + struct mutex mm_lock; >> + struct list_head memory_mappings; >> }; >> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct >> gunyah_vm *ghvm, >> + struct gh_userspace_memory_region *region); >> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct >> gunyah_vm_memory_mapping *mapping); >> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct >> gunyah_vm *ghvm, u32 label); >> + >> #endif >> diff --git a/drivers/virt/gunyah/vm_mgr_mm.c >> b/drivers/virt/gunyah/vm_mgr_mm.c >> new file mode 100644 >> index 000000000000..f2dbdb4ee8ab >> --- /dev/null >> +++ b/drivers/virt/gunyah/vm_mgr_mm.c >> @@ -0,0 +1,223 @@ >> +// SPDX-License-Identifier: GPL-2.0-only >> +/* >> + * Copyright (c) 2022-2023 Qualcomm Innovation Center, Inc. All >> rights reserved. >> + */ >> + >> +#define pr_fmt(fmt) "gh_vm_mgr: " fmt >> + >> +#include <linux/gunyah_rsc_mgr.h> >> +#include <linux/mm.h> >> + >> +#include <uapi/linux/gunyah.h> >> + >> +#include "vm_mgr.h" >> + >> +static inline bool page_contiguous(phys_addr_t p, phys_addr_t t) >> +{ >> + return t - p == PAGE_SIZE; >> +} >> + >> +static struct gunyah_vm_memory_mapping >> *__gh_vm_mem_mapping_find(struct gunyah_vm *ghvm, u32 label) >> +{ >> + struct gunyah_vm_memory_mapping *mapping; >> + >> + > only one line. Done. >> + list_for_each_entry(mapping, &ghvm->memory_mappings, list) >> + if (mapping->parcel.label == label) >> + return mapping; >> + >> + return NULL; >> +} >> + >> +void gh_vm_mem_mapping_reclaim(struct gunyah_vm *ghvm, struct >> gunyah_vm_memory_mapping *mapping) >> +{ >> + int i, ret = 0; >> + >> + if (mapping->parcel.mem_handle != GH_MEM_HANDLE_INVAL) { >> + ret = gh_rm_mem_reclaim(ghvm->rm, &mapping->parcel); >> + if (ret) >> + pr_warn("Failed to reclaim memory parcel for label %d: >> %d\n", >> + mapping->parcel.label, ret); >> + } >> + >> + if (!ret) >> + for (i = 0; i < mapping->npages; i++) >> + unpin_user_page(mapping->pages[i]); >> + >> + kfree(mapping->pages); >> + kfree(mapping->parcel.acl_entries); >> + kfree(mapping->parcel.mem_entries); >> + >> + mutex_lock(&ghvm->mm_lock); >> + list_del(&mapping->list); >> + mutex_unlock(&ghvm->mm_lock); >> +} >> + >> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_find(struct >> gunyah_vm *ghvm, u32 label) >> +{ >> + struct gunyah_vm_memory_mapping *mapping; >> + int ret; >> + >> + ret = mutex_lock_interruptible(&ghvm->mm_lock); >> + if (ret) >> + return ERR_PTR(ret); >> + mapping = __gh_vm_mem_mapping_find(ghvm, label); >> + mutex_unlock(&ghvm->mm_lock); >> + return mapping ? : ERR_PTR(-ENODEV); >> +} >> + >> +struct gunyah_vm_memory_mapping *gh_vm_mem_mapping_alloc(struct >> gunyah_vm *ghvm, >> + struct gh_userspace_memory_region *region) >> +{ > Is this a static functoin or an exported symbol? > Neither, it's used in vm_mgr.c. >> + phys_addr_t curr_page, prev_page; >> + struct gunyah_vm_memory_mapping *mapping, *tmp_mapping; >> + struct gh_rm_mem_entry *mem_entries; >> + int i, j, pinned, ret = 0; >> + struct gh_rm_mem_parcel *parcel; >> + size_t entry_size; >> + u16 vmid; > + > Reverse christmas tree to sor local variables would be nice. > Done, and checked throughout the series as well. >> + if (!region->memory_size || !PAGE_ALIGNED(region->memory_size) || >> + !PAGE_ALIGNED(region->userspace_addr)) > Even this alignment needs some documentation. > Documented now. > Or why not just let the user only pass number of pages instead of size? > > KVM, Nitro enclaves, and ACRN all give size directly and not as # of pages. >> + return ERR_PTR(-EINVAL); >> + >> + if (!gh_api_has_feature(GH_API_FEATURE_MEMEXTENT)) >> + return ERR_PTR(-EOPNOTSUPP); > > We should proabably move this as very first check while handling this > IOCTL. > Done. > >> + >> + ret = mutex_lock_interruptible(&ghvm->mm_lock); >> + if (ret) >> + return ERR_PTR(ret); >> + mapping = __gh_vm_mem_mapping_find(ghvm, region->label); > > so label is unique and userspace proabably aware of this? > Can we have more than one userspace doing this? and if so how can it > ensure that each label is unique? > The label is unique and userspace is aware of this. One userspace owns the VM FD. > >> + if (mapping) { >> + mutex_unlock(&ghvm->mm_lock); >> + return ERR_PTR(-EEXIST); >> + } >> + >> + mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); >> + if (!mapping) { >> + ret = -ENOMEM; >> + goto unlock; >> + } >> + >> + mapping->parcel.label = region->label; > >> + mapping->guest_phys_addr = region->guest_phys_addr; >> + mapping->npages = region->memory_size >> PAGE_SHIFT; >> + parcel = &mapping->parcel; >> + parcel->mem_handle = GH_MEM_HANDLE_INVAL; /* to be filled later >> by mem_share/mem_lend */ >> + parcel->mem_type = GH_RM_MEM_TYPE_NORMAL; >> + >> + /* Check for overlap */ >> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) { >> + if (!((mapping->guest_phys_addr + (mapping->npages << >> PAGE_SHIFT) <= >> + tmp_mapping->guest_phys_addr) || >> + (mapping->guest_phys_addr >= >> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << >> PAGE_SHIFT)))) { >> + ret = -EEXIST; >> + goto unlock; >> + } >> + } > This looks like we will loop every mappign for each allocation giving us > an O(n), How frequent and how many max mappings can be there in the system? > In all our use cases so far, only a few (max 3) mappings are used. Gunyah and Linux would be similarly bounded by heap-based memory constraints when adding more memory mappings if you don't hit U32_MAX first. This could be reworked into a rb tree, but I thought it better to use a simpler list-based implementation since it was easier to review and benefits are not seen when only a few mappings used. [snip] >> --- a/include/uapi/linux/gunyah.h >> +++ b/include/uapi/linux/gunyah.h >> @@ -20,4 +20,26 @@ >> */ >> #define GH_CREATE_VM _IO(GH_IOCTL_TYPE, 0x0) /* Returns a >> Gunyah VM fd */ >> +/* >> + * ioctls for VM fds >> + */ >> +struct gh_userspace_memory_region { > > This struct needs some kernedoc. > Done, as well for the other UAPI structs added later in the series. >> + __u32 label; > >> +#define GH_MEM_ALLOW_READ (1UL << 0) >> +#define GH_MEM_ALLOW_WRITE (1UL << 1) >> +#define GH_MEM_ALLOW_EXEC (1UL << 2) >> +/* >> + * The guest will be lent the memory instead of shared. >> + * In other words, the guest has exclusive access to the memory >> region and the host loses access. >> + */ >> +#define GH_MEM_LENT (1UL << 3) >> + __u32 flags; >> + __u64 guest_phys_addr; >> + __u64 memory_size; > if we are only expecting pages, this should probably be make explict by > using nr_pages instead of size > >> + __u64 userspace_addr; >> +}; >> + >> +#define GH_VM_SET_USER_MEM_REGION _IOW(GH_IOCTL_TYPE, 0x1, \ >> + struct gh_userspace_memory_region) >> + >> #endif
* Elliot Berman <quic_eberman@quicinc.com> [2023-01-20 14:46:12]: > + /* Check for overlap */ > + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) { > + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <= > + tmp_mapping->guest_phys_addr) || > + (mapping->guest_phys_addr >= > + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) { > + ret = -EEXIST; > + goto unlock; > + } > + } > + > + list_add(&mapping->list, &ghvm->memory_mappings); I think the potential race condition described last time is still possible. Pls check. > +unlock: > + mutex_unlock(&ghvm->mm_lock); > + if (ret) > + goto free_mapping; > + > + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL); > + if (!mapping->pages) { > + ret = -ENOMEM; > + goto reclaim; Same comment as last time. Can you check this error path? We seem to call unpin_user_page() in this path, which is not correct. > + } > + > + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages, > + FOLL_WRITE | FOLL_LONGTERM, mapping->pages); > + if (pinned < 0) { > + ret = pinned; > + goto reclaim; Same comment as above.
On 1/25/2023 5:34 AM, Srivatsa Vaddagiri wrote: > * Elliot Berman <quic_eberman@quicinc.com> [2023-01-20 14:46:12]: > >> + /* Check for overlap */ >> + list_for_each_entry(tmp_mapping, &ghvm->memory_mappings, list) { >> + if (!((mapping->guest_phys_addr + (mapping->npages << PAGE_SHIFT) <= >> + tmp_mapping->guest_phys_addr) || >> + (mapping->guest_phys_addr >= >> + tmp_mapping->guest_phys_addr + (tmp_mapping->npages << PAGE_SHIFT)))) { >> + ret = -EEXIST; >> + goto unlock; >> + } >> + } >> + >> + list_add(&mapping->list, &ghvm->memory_mappings); > > I think the potential race condition described last time is still possible. Pls > check. > >> +unlock: >> + mutex_unlock(&ghvm->mm_lock); >> + if (ret) >> + goto free_mapping; >> + >> + mapping->pages = kcalloc(mapping->npages, sizeof(*mapping->pages), GFP_KERNEL); >> + if (!mapping->pages) { >> + ret = -ENOMEM; >> + goto reclaim; > > Same comment as last time. Can you check this error path? We seem to call > unpin_user_page() in this path, which is not correct. > >> + } >> + >> + pinned = pin_user_pages_fast(region->userspace_addr, mapping->npages, >> + FOLL_WRITE | FOLL_LONGTERM, mapping->pages); >> + if (pinned < 0) { >> + ret = pinned; >> + goto reclaim; > > Same comment as above. > Caught the race condition you are describing between adding and removing the memory parcels. Should be fixed for v10. - Elliot
© 2016 - 2025 Red Hat, Inc.