From: Jason Gunthorpe <jgg@mellanox.com>
gntdev simply wants to monitor a specific VMA for any notifier events,
this can be done straightforwardly using mmu_range_notifier_insert() over
the VMA's VA range.
The notifier should be attached until the original VMA is destroyed.
It is unclear if any of this is even sane, but at least a lot of duplicate
code is removed.
Cc: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: xen-devel@lists.xenproject.org
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
---
drivers/xen/gntdev-common.h | 8 +-
drivers/xen/gntdev.c | 180 ++++++++++--------------------------
2 files changed, 49 insertions(+), 139 deletions(-)
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 2f8b949c3eeb14..b201fdd20b667b 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -21,15 +21,8 @@ struct gntdev_dmabuf_priv;
struct gntdev_priv {
/* Maps with visible offsets in the file descriptor. */
struct list_head maps;
- /*
- * Maps that are not visible; will be freed on munmap.
- * Only populated if populate_freeable_maps == 1
- */
- struct list_head freeable_maps;
/* lock protects maps and freeable_maps. */
struct mutex lock;
- struct mm_struct *mm;
- struct mmu_notifier mn;
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
/* Device for which DMA memory is allocated. */
@@ -49,6 +42,7 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ struct mmu_range_notifier notifier;
struct list_head next;
struct vm_area_struct *vma;
int index;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index a446a7221e13e9..12d626670bebbc 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -65,7 +65,6 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
static atomic_t pages_mapped = ATOMIC_INIT(0);
static int use_ptemod;
-#define populate_freeable_maps use_ptemod
static int unmap_grant_pages(struct gntdev_grant_map *map,
int offset, int pages);
@@ -251,12 +250,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
evtchn_put(map->notify.event);
}
- if (populate_freeable_maps && priv) {
- mutex_lock(&priv->lock);
- list_del(&map->next);
- mutex_unlock(&priv->lock);
- }
-
if (map->pages && !use_ptemod)
unmap_grant_pages(map, 0, map->count);
gntdev_free_map(map);
@@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- /* It is possible that an mmu notifier could be running
- * concurrently, so take priv->lock to ensure that the vma won't
- * vanishing during the unmap_grant_pages call, since we will
- * spin here until that completes. Such a concurrent call will
- * not do any unmapping, since that has been done prior to
- * closing the vma, but it may still iterate the unmap_ops list.
- */
- mutex_lock(&priv->lock);
+ if (use_ptemod && map->vma == vma) {
+ mmu_range_notifier_remove(&map->notifier);
map->vma = NULL;
- mutex_unlock(&priv->lock);
}
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
@@ -477,109 +462,44 @@ static const struct vm_operations_struct gntdev_vmops = {
/* ------------------------------------------------------------------ */
-static bool in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end)
-{
- if (!map->vma)
- return false;
- if (map->vma->vm_start >= end)
- return false;
- if (map->vma->vm_end <= start)
- return false;
-
- return true;
-}
-
-static int unmap_if_in_range(struct gntdev_grant_map *map,
- unsigned long start, unsigned long end,
- bool blockable)
+static bool gntdev_invalidate(struct mmu_range_notifier *mn,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
{
+ struct gntdev_grant_map *map =
+ container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
int err;
- if (!in_range(map, start, end))
- return 0;
+ if (!mmu_notifier_range_blockable(range))
+ return false;
- if (!blockable)
- return -EAGAIN;
+ /*
+ * If the VMA is split or otherwise changed the notifier is not
+ * updated, but we don't want to process VA's outside the modified
+ * VMA. FIXME: It would be much more understandable to just prevent
+ * modifying the VMA in the first place.
+ */
+ if (map->vma->vm_start >= range->end ||
+ map->vma->vm_end <= range->start)
+ return true;
- mstart = max(start, map->vma->vm_start);
- mend = min(end, map->vma->vm_end);
+ mstart = max(range->start, map->vma->vm_start);
+ mend = min(range->end, map->vma->vm_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
- start, end, mstart, mend);
+ range->start, range->end, mstart, mend);
err = unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
WARN_ON(err);
- return 0;
-}
-
-static int mn_invl_range_start(struct mmu_notifier *mn,
- const struct mmu_notifier_range *range)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int ret = 0;
-
- if (mmu_notifier_range_blockable(range))
- mutex_lock(&priv->lock);
- else if (!mutex_trylock(&priv->lock))
- return -EAGAIN;
-
- list_for_each_entry(map, &priv->maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- ret = unmap_if_in_range(map, range->start, range->end,
- mmu_notifier_range_blockable(range));
- if (ret)
- goto out_unlock;
- }
-
-out_unlock:
- mutex_unlock(&priv->lock);
-
- return ret;
-}
-
-static void mn_release(struct mmu_notifier *mn,
- struct mm_struct *mm)
-{
- struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
- struct gntdev_grant_map *map;
- int err;
-
- mutex_lock(&priv->lock);
- list_for_each_entry(map, &priv->maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- list_for_each_entry(map, &priv->freeable_maps, next) {
- if (!map->vma)
- continue;
- pr_debug("map %d+%d (%lx %lx)\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end);
- err = unmap_grant_pages(map, /* offset */ 0, map->count);
- WARN_ON(err);
- }
- mutex_unlock(&priv->lock);
+ return true;
}
-static const struct mmu_notifier_ops gntdev_mmu_ops = {
- .release = mn_release,
- .invalidate_range_start = mn_invl_range_start,
+static const struct mmu_range_notifier_ops gntdev_mmu_ops = {
+ .invalidate = gntdev_invalidate,
};
/* ------------------------------------------------------------------ */
@@ -594,7 +514,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
return -ENOMEM;
INIT_LIST_HEAD(&priv->maps);
- INIT_LIST_HEAD(&priv->freeable_maps);
mutex_init(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
@@ -606,17 +525,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
}
#endif
- if (use_ptemod) {
- priv->mm = get_task_mm(current);
- if (!priv->mm) {
- kfree(priv);
- return -ENOMEM;
- }
- priv->mn.ops = &gntdev_mmu_ops;
- ret = mmu_notifier_register(&priv->mn, priv->mm);
- mmput(priv->mm);
- }
-
if (ret) {
kfree(priv);
return ret;
@@ -653,16 +561,12 @@ static int gntdev_release(struct inode *inode, struct file *flip)
list_del(&map->next);
gntdev_put_map(NULL /* already removed */, map);
}
- WARN_ON(!list_empty(&priv->freeable_maps));
mutex_unlock(&priv->lock);
#ifdef CONFIG_XEN_GNTDEV_DMABUF
gntdev_dmabuf_fini(priv->dmabuf_priv);
#endif
- if (use_ptemod)
- mmu_notifier_unregister(&priv->mn, priv->mm);
-
kfree(priv);
return 0;
}
@@ -723,8 +627,6 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
if (map) {
list_del(&map->next);
- if (populate_freeable_maps)
- list_add_tail(&map->next, &priv->freeable_maps);
err = 0;
}
mutex_unlock(&priv->lock);
@@ -1096,11 +998,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
- if (use_ptemod && priv->mm != vma->vm_mm) {
- pr_warn("Huh? Other mm?\n");
- goto unlock_out;
- }
-
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1111,10 +1008,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
vma->vm_flags |= VM_DONTCOPY;
vma->vm_private_data = map;
-
- if (use_ptemod)
- map->vma = vma;
-
if (map->flags) {
if ((vma->vm_flags & VM_WRITE) &&
(map->flags & GNTMAP_readonly))
@@ -1125,8 +1018,28 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ if (use_ptemod) {
+ map->vma = vma;
+ err = mmu_range_notifier_insert_locked(
+ &map->notifier, vma->vm_start,
+ vma->vm_end - vma->vm_start, vma->vm_mm);
+ if (err)
+ goto out_unlock_put;
+ }
mutex_unlock(&priv->lock);
+ /*
+ * gntdev takes the address of the PTE in find_grant_ptes() and passes
+ * it to the hypervisor in gntdev_map_grant_pages(). The purpose of
+ * the notifier is to prevent the hypervisor pointer to the PTE from
+ * going stale.
+ *
+ * Since this vma's mappings can't be touched without the mmap_sem,
+ * and we are holding it now, there is no need for the notifier_range
+ * locking pattern.
+ */
+ mmu_range_read_begin(&map->notifier);
+
if (use_ptemod) {
map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
@@ -1175,8 +1088,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
mutex_unlock(&priv->lock);
out_put_map:
if (use_ptemod) {
- map->vma = NULL;
unmap_grant_pages(map, 0, map->count);
+ if (map->vma) {
+ mmu_range_notifier_remove(&map->notifier);
+ map->vma = NULL;
+ }
}
gntdev_put_map(priv, map);
return err;
--
2.23.0
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
> From: Jason Gunthorpe <jgg@mellanox.com>
>
> gntdev simply wants to monitor a specific VMA for any notifier events,
> this can be done straightforwardly using mmu_range_notifier_insert() over
> the VMA's VA range.
>
> The notifier should be attached until the original VMA is destroyed.
>
> It is unclear if any of this is even sane, but at least a lot of duplicate
> code is removed.
I didn't have a chance to look at the patch itself yet but as a heads-up
--- it crashes dom0.
-boris
>
> Cc: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
> Cc: xen-devel@lists.xenproject.org
> Cc: Juergen Gross <jgross@suse.com>
> Cc: Stefano Stabellini <sstabellini@kernel.org>
> Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
> ---
> drivers/xen/gntdev-common.h | 8 +-
> drivers/xen/gntdev.c | 180 ++++++++++--------------------------
> 2 files changed, 49 insertions(+), 139 deletions(-)
>
> diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
> index 2f8b949c3eeb14..b201fdd20b667b 100644
> --- a/drivers/xen/gntdev-common.h
> +++ b/drivers/xen/gntdev-common.h
> @@ -21,15 +21,8 @@ struct gntdev_dmabuf_priv;
> struct gntdev_priv {
> /* Maps with visible offsets in the file descriptor. */
> struct list_head maps;
> - /*
> - * Maps that are not visible; will be freed on munmap.
> - * Only populated if populate_freeable_maps == 1
> - */
> - struct list_head freeable_maps;
> /* lock protects maps and freeable_maps. */
> struct mutex lock;
> - struct mm_struct *mm;
> - struct mmu_notifier mn;
>
> #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
> /* Device for which DMA memory is allocated. */
> @@ -49,6 +42,7 @@ struct gntdev_unmap_notify {
> };
>
> struct gntdev_grant_map {
> + struct mmu_range_notifier notifier;
> struct list_head next;
> struct vm_area_struct *vma;
> int index;
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index a446a7221e13e9..12d626670bebbc 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -65,7 +65,6 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be mapped by "
> static atomic_t pages_mapped = ATOMIC_INIT(0);
>
> static int use_ptemod;
> -#define populate_freeable_maps use_ptemod
>
> static int unmap_grant_pages(struct gntdev_grant_map *map,
> int offset, int pages);
> @@ -251,12 +250,6 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
> evtchn_put(map->notify.event);
> }
>
> - if (populate_freeable_maps && priv) {
> - mutex_lock(&priv->lock);
> - list_del(&map->next);
> - mutex_unlock(&priv->lock);
> - }
> -
> if (map->pages && !use_ptemod)
> unmap_grant_pages(map, 0, map->count);
> gntdev_free_map(map);
> @@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
> struct gntdev_priv *priv = file->private_data;
>
> pr_debug("gntdev_vma_close %p\n", vma);
> - if (use_ptemod) {
> - /* It is possible that an mmu notifier could be running
> - * concurrently, so take priv->lock to ensure that the vma won't
> - * vanishing during the unmap_grant_pages call, since we will
> - * spin here until that completes. Such a concurrent call will
> - * not do any unmapping, since that has been done prior to
> - * closing the vma, but it may still iterate the unmap_ops list.
> - */
> - mutex_lock(&priv->lock);
> + if (use_ptemod && map->vma == vma) {
> + mmu_range_notifier_remove(&map->notifier);
> map->vma = NULL;
> - mutex_unlock(&priv->lock);
> }
> vma->vm_private_data = NULL;
> gntdev_put_map(priv, map);
> @@ -477,109 +462,44 @@ static const struct vm_operations_struct gntdev_vmops = {
>
> /* ------------------------------------------------------------------ */
>
> -static bool in_range(struct gntdev_grant_map *map,
> - unsigned long start, unsigned long end)
> -{
> - if (!map->vma)
> - return false;
> - if (map->vma->vm_start >= end)
> - return false;
> - if (map->vma->vm_end <= start)
> - return false;
> -
> - return true;
> -}
> -
> -static int unmap_if_in_range(struct gntdev_grant_map *map,
> - unsigned long start, unsigned long end,
> - bool blockable)
> +static bool gntdev_invalidate(struct mmu_range_notifier *mn,
> + const struct mmu_notifier_range *range,
> + unsigned long cur_seq)
> {
> + struct gntdev_grant_map *map =
> + container_of(mn, struct gntdev_grant_map, notifier);
> unsigned long mstart, mend;
> int err;
>
> - if (!in_range(map, start, end))
> - return 0;
> + if (!mmu_notifier_range_blockable(range))
> + return false;
>
> - if (!blockable)
> - return -EAGAIN;
> + /*
> + * If the VMA is split or otherwise changed the notifier is not
> + * updated, but we don't want to process VA's outside the modified
> + * VMA. FIXME: It would be much more understandable to just prevent
> + * modifying the VMA in the first place.
> + */
> + if (map->vma->vm_start >= range->end ||
> + map->vma->vm_end <= range->start)
> + return true;
>
> - mstart = max(start, map->vma->vm_start);
> - mend = min(end, map->vma->vm_end);
> + mstart = max(range->start, map->vma->vm_start);
> + mend = min(range->end, map->vma->vm_end);
> pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
> map->index, map->count,
> map->vma->vm_start, map->vma->vm_end,
> - start, end, mstart, mend);
> + range->start, range->end, mstart, mend);
> err = unmap_grant_pages(map,
> (mstart - map->vma->vm_start) >> PAGE_SHIFT,
> (mend - mstart) >> PAGE_SHIFT);
> WARN_ON(err);
>
> - return 0;
> -}
> -
> -static int mn_invl_range_start(struct mmu_notifier *mn,
> - const struct mmu_notifier_range *range)
> -{
> - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
> - struct gntdev_grant_map *map;
> - int ret = 0;
> -
> - if (mmu_notifier_range_blockable(range))
> - mutex_lock(&priv->lock);
> - else if (!mutex_trylock(&priv->lock))
> - return -EAGAIN;
> -
> - list_for_each_entry(map, &priv->maps, next) {
> - ret = unmap_if_in_range(map, range->start, range->end,
> - mmu_notifier_range_blockable(range));
> - if (ret)
> - goto out_unlock;
> - }
> - list_for_each_entry(map, &priv->freeable_maps, next) {
> - ret = unmap_if_in_range(map, range->start, range->end,
> - mmu_notifier_range_blockable(range));
> - if (ret)
> - goto out_unlock;
> - }
> -
> -out_unlock:
> - mutex_unlock(&priv->lock);
> -
> - return ret;
> -}
> -
> -static void mn_release(struct mmu_notifier *mn,
> - struct mm_struct *mm)
> -{
> - struct gntdev_priv *priv = container_of(mn, struct gntdev_priv, mn);
> - struct gntdev_grant_map *map;
> - int err;
> -
> - mutex_lock(&priv->lock);
> - list_for_each_entry(map, &priv->maps, next) {
> - if (!map->vma)
> - continue;
> - pr_debug("map %d+%d (%lx %lx)\n",
> - map->index, map->count,
> - map->vma->vm_start, map->vma->vm_end);
> - err = unmap_grant_pages(map, /* offset */ 0, map->count);
> - WARN_ON(err);
> - }
> - list_for_each_entry(map, &priv->freeable_maps, next) {
> - if (!map->vma)
> - continue;
> - pr_debug("map %d+%d (%lx %lx)\n",
> - map->index, map->count,
> - map->vma->vm_start, map->vma->vm_end);
> - err = unmap_grant_pages(map, /* offset */ 0, map->count);
> - WARN_ON(err);
> - }
> - mutex_unlock(&priv->lock);
> + return true;
> }
>
> -static const struct mmu_notifier_ops gntdev_mmu_ops = {
> - .release = mn_release,
> - .invalidate_range_start = mn_invl_range_start,
> +static const struct mmu_range_notifier_ops gntdev_mmu_ops = {
> + .invalidate = gntdev_invalidate,
> };
>
> /* ------------------------------------------------------------------ */
> @@ -594,7 +514,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
> return -ENOMEM;
>
> INIT_LIST_HEAD(&priv->maps);
> - INIT_LIST_HEAD(&priv->freeable_maps);
> mutex_init(&priv->lock);
>
> #ifdef CONFIG_XEN_GNTDEV_DMABUF
> @@ -606,17 +525,6 @@ static int gntdev_open(struct inode *inode, struct file *flip)
> }
> #endif
>
> - if (use_ptemod) {
> - priv->mm = get_task_mm(current);
> - if (!priv->mm) {
> - kfree(priv);
> - return -ENOMEM;
> - }
> - priv->mn.ops = &gntdev_mmu_ops;
> - ret = mmu_notifier_register(&priv->mn, priv->mm);
> - mmput(priv->mm);
> - }
> -
> if (ret) {
> kfree(priv);
> return ret;
> @@ -653,16 +561,12 @@ static int gntdev_release(struct inode *inode, struct file *flip)
> list_del(&map->next);
> gntdev_put_map(NULL /* already removed */, map);
> }
> - WARN_ON(!list_empty(&priv->freeable_maps));
> mutex_unlock(&priv->lock);
>
> #ifdef CONFIG_XEN_GNTDEV_DMABUF
> gntdev_dmabuf_fini(priv->dmabuf_priv);
> #endif
>
> - if (use_ptemod)
> - mmu_notifier_unregister(&priv->mn, priv->mm);
> -
> kfree(priv);
> return 0;
> }
> @@ -723,8 +627,6 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
> map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
> if (map) {
> list_del(&map->next);
> - if (populate_freeable_maps)
> - list_add_tail(&map->next, &priv->freeable_maps);
> err = 0;
> }
> mutex_unlock(&priv->lock);
> @@ -1096,11 +998,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> goto unlock_out;
> if (use_ptemod && map->vma)
> goto unlock_out;
> - if (use_ptemod && priv->mm != vma->vm_mm) {
> - pr_warn("Huh? Other mm?\n");
> - goto unlock_out;
> - }
> -
> refcount_inc(&map->users);
>
> vma->vm_ops = &gntdev_vmops;
> @@ -1111,10 +1008,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> vma->vm_flags |= VM_DONTCOPY;
>
> vma->vm_private_data = map;
> -
> - if (use_ptemod)
> - map->vma = vma;
> -
> if (map->flags) {
> if ((vma->vm_flags & VM_WRITE) &&
> (map->flags & GNTMAP_readonly))
> @@ -1125,8 +1018,28 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> map->flags |= GNTMAP_readonly;
> }
>
> + if (use_ptemod) {
> + map->vma = vma;
> + err = mmu_range_notifier_insert_locked(
> + &map->notifier, vma->vm_start,
> + vma->vm_end - vma->vm_start, vma->vm_mm);
> + if (err)
> + goto out_unlock_put;
> + }
> mutex_unlock(&priv->lock);
>
> + /*
> + * gntdev takes the address of the PTE in find_grant_ptes() and passes
> + * it to the hypervisor in gntdev_map_grant_pages(). The purpose of
> + * the notifier is to prevent the hypervisor pointer to the PTE from
> + * going stale.
> + *
> + * Since this vma's mappings can't be touched without the mmap_sem,
> + * and we are holding it now, there is no need for the notifier_range
> + * locking pattern.
> + */
> + mmu_range_read_begin(&map->notifier);
> +
> if (use_ptemod) {
> map->pages_vm_start = vma->vm_start;
> err = apply_to_page_range(vma->vm_mm, vma->vm_start,
> @@ -1175,8 +1088,11 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> mutex_unlock(&priv->lock);
> out_put_map:
> if (use_ptemod) {
> - map->vma = NULL;
> unmap_grant_pages(map, 0, map->count);
> + if (map->vma) {
> + mmu_range_notifier_remove(&map->notifier);
> + map->vma = NULL;
> + }
> }
> gntdev_put_map(priv, map);
> return err;
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Wed, Oct 30, 2019 at 12:55:37PM -0400, Boris Ostrovsky wrote: > On 10/28/19 4:10 PM, Jason Gunthorpe wrote: > > From: Jason Gunthorpe <jgg@mellanox.com> > > > > gntdev simply wants to monitor a specific VMA for any notifier events, > > this can be done straightforwardly using mmu_range_notifier_insert() over > > the VMA's VA range. > > > > The notifier should be attached until the original VMA is destroyed. > > > > It is unclear if any of this is even sane, but at least a lot of duplicate > > code is removed. > > I didn't have a chance to look at the patch itself yet but as a heads-up > --- it crashes dom0. Thanks Boris. I spent a bit of time and got a VM running with a xen 4.9 hypervisor and a kernel with this patch series. It a ubuntu bionic VM with the distro's xen stuff. Can you give some guidance how you made it crash? I see the VM autoloaded gntdev: Module Size Used by xen_gntdev 24576 2 xen_evtchn 16384 1 xenfs 16384 1 xen_privcmd 24576 16 xenfs And lsof says several xen processes have the chardev open: xenstored 819 root 13u CHR 10,53 0t0 19595 /dev/xen/gntdev xenconsol 857 root 8u CHR 10,53 0t0 19595 /dev/xen/gntdev xenconsol 857 860 root 8u CHR 10,53 0t0 19595 /dev/xen/gntdev But no crashing.. However, I wasn't able to get my usual debug kernel .config to boot with the xen hypervisor, it crashes on early boot with: (XEN) Dom0 has maximum 8 VCPUs (XEN) Scrubbing Free RAM on 1 nodes using 8 CPUs (XEN) .done. (XEN) Initial low memory virq threshold set at 0x1000 pages. (XEN) Std. Loglevel: All (XEN) Guest Loglevel: All (XEN) *** Serial input -> DOM0 (type 'CTRL-a' three times to switch input to Xen) (XEN) Freed 468kB init memory (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0002] (XEN) Pagetable walk from fffffbfff0480fbe: (XEN) L4[0x1f7] = 0000000000000000 ffffffffffffffff (XEN) domain_crash_sync called from entry.S: fault at ffff82d080348a06 entry.o#create_bounce_frame+0x135/0x15f (XEN) Domain 0 (vcpu#0) crashed on cpu#0: (XEN) ----[ Xen-4.9.2 x86_64 debug=n Not tainted ]---- (XEN) CPU: 0 (XEN) RIP: e033:[<ffffffff82b9f731>] (XEN) RFLAGS: 0000000000000296 EM: 1 CONTEXT: pv guest (d0v0) (XEN) rax: fffffbfff0480fbe rbx: 0000000000000000 rcx: 00000000c0000101 (XEN) rdx: 00000000ffffffff rsi: ffffffff84026000 rdi: ffffffff82cb4a20 (XEN) rbp: ffffffff82407ff8 rsp: ffffffff82407da0 r8: 0000000000000000 (XEN) r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000000 (XEN) r12: 0000000000000000 r13: 1ffffffff0480fbe r14: 0000000000000000 (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 00000000003506e0 (XEN) cr3: 0000000034027000 cr2: fffffbfff0480fbe (XEN) fsb: 0000000000000000 gsb: ffffffff82b61000 gss: 0000000000000000 (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033 Which is surely some .config issue, but I didn't figure out what. Jason _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On 11/1/19 1:48 PM, Jason Gunthorpe wrote: > On Wed, Oct 30, 2019 at 12:55:37PM -0400, Boris Ostrovsky wrote: >> On 10/28/19 4:10 PM, Jason Gunthorpe wrote: >>> From: Jason Gunthorpe <jgg@mellanox.com> >>> >>> gntdev simply wants to monitor a specific VMA for any notifier events, >>> this can be done straightforwardly using mmu_range_notifier_insert() over >>> the VMA's VA range. >>> >>> The notifier should be attached until the original VMA is destroyed. >>> >>> It is unclear if any of this is even sane, but at least a lot of duplicate >>> code is removed. >> I didn't have a chance to look at the patch itself yet but as a heads-up >> --- it crashes dom0. > Thanks Boris. I spent a bit of time and got a VM running with a xen > 4.9 hypervisor and a kernel with this patch series. It a ubuntu bionic > VM with the distro's xen stuff. > > Can you give some guidance how you made it crash? It crashes trying to dereference mrn->ops->invalidate in mn_itree_invalidate() when a guest exits. I don't think you've initialized notifier ops. I don't see you using gntdev_mmu_ops anywhere. -boris > I see the VM > autoloaded gntdev: > > Module Size Used by > xen_gntdev 24576 2 > xen_evtchn 16384 1 > xenfs 16384 1 > xen_privcmd 24576 16 xenfs > > And lsof says several xen processes have the chardev open: > > xenstored 819 root 13u CHR 10,53 0t0 19595 /dev/xen/gntdev > xenconsol 857 root 8u CHR 10,53 0t0 19595 /dev/xen/gntdev > xenconsol 857 860 root 8u CHR 10,53 0t0 19595 /dev/xen/gntdev > > But no crashing.. > > However, I wasn't able to get my usual debug kernel .config to boot > with the xen hypervisor, it crashes on early boot with: > > (XEN) Dom0 has maximum 8 VCPUs > (XEN) Scrubbing Free RAM on 1 nodes using 8 CPUs > (XEN) .done. > (XEN) Initial low memory virq threshold set at 0x1000 pages. > (XEN) Std. Loglevel: All > (XEN) Guest Loglevel: All > (XEN) *** Serial input -> DOM0 (type 'CTRL-a' three times to switch input to Xen) > (XEN) Freed 468kB init memory > (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0002] > (XEN) Pagetable walk from fffffbfff0480fbe: > (XEN) L4[0x1f7] = 0000000000000000 ffffffffffffffff > (XEN) domain_crash_sync called from entry.S: fault at ffff82d080348a06 entry.o#create_bounce_frame+0x135/0x15f > (XEN) Domain 0 (vcpu#0) crashed on cpu#0: > (XEN) ----[ Xen-4.9.2 x86_64 debug=n Not tainted ]---- > (XEN) CPU: 0 > (XEN) RIP: e033:[<ffffffff82b9f731>] > (XEN) RFLAGS: 0000000000000296 EM: 1 CONTEXT: pv guest (d0v0) > (XEN) rax: fffffbfff0480fbe rbx: 0000000000000000 rcx: 00000000c0000101 > (XEN) rdx: 00000000ffffffff rsi: ffffffff84026000 rdi: ffffffff82cb4a20 > (XEN) rbp: ffffffff82407ff8 rsp: ffffffff82407da0 r8: 0000000000000000 > (XEN) r9: 0000000000000000 r10: 0000000000000000 r11: 0000000000000000 > (XEN) r12: 0000000000000000 r13: 1ffffffff0480fbe r14: 0000000000000000 > (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 00000000003506e0 > (XEN) cr3: 0000000034027000 cr2: fffffbfff0480fbe > (XEN) fsb: 0000000000000000 gsb: ffffffff82b61000 gss: 0000000000000000 > (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: e02b cs: e033 > > Which is surely some .config issue, but I didn't figure out what. > > Jason _______________________________________________ Xen-devel mailing list Xen-devel@lists.xenproject.org https://lists.xenproject.org/mailman/listinfo/xen-devel
On Fri, Nov 01, 2019 at 02:51:46PM -0400, Boris Ostrovsky wrote:
> On 11/1/19 1:48 PM, Jason Gunthorpe wrote:
> > On Wed, Oct 30, 2019 at 12:55:37PM -0400, Boris Ostrovsky wrote:
> >> On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
> >>> From: Jason Gunthorpe <jgg@mellanox.com>
> >>>
> >>> gntdev simply wants to monitor a specific VMA for any notifier events,
> >>> this can be done straightforwardly using mmu_range_notifier_insert() over
> >>> the VMA's VA range.
> >>>
> >>> The notifier should be attached until the original VMA is destroyed.
> >>>
> >>> It is unclear if any of this is even sane, but at least a lot of duplicate
> >>> code is removed.
> >> I didn't have a chance to look at the patch itself yet but as a heads-up
> > Thanks Boris. I spent a bit of time and got a VM running with a xen
> > 4.9 hypervisor and a kernel with this patch series. It a ubuntu bionic
> > VM with the distro's xen stuff.
> >
> > Can you give some guidance how you made it crash?
>
> It crashes trying to dereference mrn->ops->invalidate in
> mn_itree_invalidate() when a guest exits.
>
> I don't think you've initialized notifier ops. I don't see you using
> gntdev_mmu_ops anywhere.
So weird the compiler didn't complain about an unused static...
But yes, this is a mistake, it should be:
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 37b278857ad807..0ca35485fd3865 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1011,6 +1011,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
if (use_ptemod) {
map->vma = vma;
+ map->notifier.ops = &gntdev_mmu_ops;
err = mmu_range_notifier_insert_locked(
&map->notifier, vma->vm_start,
vma->vm_end - vma->vm_start, vma->vm_mm);
Thanks,
Jason
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
> @@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
> struct gntdev_priv *priv = file->private_data;
>
> pr_debug("gntdev_vma_close %p\n", vma);
> - if (use_ptemod) {
> - /* It is possible that an mmu notifier could be running
> - * concurrently, so take priv->lock to ensure that the vma won't
> - * vanishing during the unmap_grant_pages call, since we will
> - * spin here until that completes. Such a concurrent call will
> - * not do any unmapping, since that has been done prior to
> - * closing the vma, but it may still iterate the unmap_ops list.
> - */
> - mutex_lock(&priv->lock);
> + if (use_ptemod && map->vma == vma) {
Is it possible for map->vma not to be equal to vma?
-boris
> + mmu_range_notifier_remove(&map->notifier);
> map->vma = NULL;
> - mutex_unlock(&priv->lock);
> }
> vma->vm_private_data = NULL;
> gntdev_put_map(priv, map);
>
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Mon, Nov 04, 2019 at 05:03:31PM -0500, Boris Ostrovsky wrote:
> On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
> > @@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
> > struct gntdev_priv *priv = file->private_data;
> >
> > pr_debug("gntdev_vma_close %p\n", vma);
> > - if (use_ptemod) {
> > - /* It is possible that an mmu notifier could be running
> > - * concurrently, so take priv->lock to ensure that the vma won't
> > - * vanishing during the unmap_grant_pages call, since we will
> > - * spin here until that completes. Such a concurrent call will
> > - * not do any unmapping, since that has been done prior to
> > - * closing the vma, but it may still iterate the unmap_ops list.
> > - */
> > - mutex_lock(&priv->lock);
> > + if (use_ptemod && map->vma == vma) {
>
>
> Is it possible for map->vma not to be equal to vma?
It could be NULL at least if use_ptemod is not set.
Otherwise, I'm not sure, the confusing bit is that the map comes from
here:
map = gntdev_find_map_index(priv, index, count);
It looks like the intent is that the map->vma is always set to the
only vma that has the map as private_data.
So, I suppose it can be relaxed to a null test and a WARN_ON that it
hasn't changed?
Jason
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 11/4/19 9:31 PM, Jason Gunthorpe wrote:
> On Mon, Nov 04, 2019 at 05:03:31PM -0500, Boris Ostrovsky wrote:
>> On 10/28/19 4:10 PM, Jason Gunthorpe wrote:
>>> @@ -445,17 +438,9 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
>>> struct gntdev_priv *priv = file->private_data;
>>>
>>> pr_debug("gntdev_vma_close %p\n", vma);
>>> - if (use_ptemod) {
>>> - /* It is possible that an mmu notifier could be running
>>> - * concurrently, so take priv->lock to ensure that the vma won't
>>> - * vanishing during the unmap_grant_pages call, since we will
>>> - * spin here until that completes. Such a concurrent call will
>>> - * not do any unmapping, since that has been done prior to
>>> - * closing the vma, but it may still iterate the unmap_ops list.
>>> - */
>>> - mutex_lock(&priv->lock);
>>> + if (use_ptemod && map->vma == vma) {
>>
>> Is it possible for map->vma not to be equal to vma?
> It could be NULL at least if use_ptemod is not set.
>
> Otherwise, I'm not sure, the confusing bit is that the map comes from
> here:
>
> map = gntdev_find_map_index(priv, index, count);
>
> It looks like the intent is that the map->vma is always set to the
> only vma that has the map as private_data.
I am not sure how this can work otherwise. We stash map pointer in vm's
vm_private_data and vice versa (for use_ptemod) gntdev_mmap() so if they
have to match.
That's why I was asking you to see if you had something particular in
mind when you added this test.
> So, I suppose it can be relaxed to a null test and a WARN_ON that it
> hasn't changed?
You mean
if (use_ptemod) {
WARN_ON(map->vma != vma);
...
Yes, that sounds good.
-boris
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Tue, Nov 05, 2019 at 10:16:46AM -0500, Boris Ostrovsky wrote:
> > So, I suppose it can be relaxed to a null test and a WARN_ON that it
> > hasn't changed?
>
> You mean
>
> if (use_ptemod) {
> WARN_ON(map->vma != vma);
> ...
>
>
> Yes, that sounds good.
I amended my copy of the patch with the above, has this rework shown
signs of working?
@@ -436,7 +436,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod && map->vma == vma) {
+ if (use_ptemod) {
+ WARN_ON(map->vma != vma);
mmu_range_notifier_remove(&map->notifier);
map->vma = NULL;
}
Jason
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On 11/7/19 3:36 PM, Jason Gunthorpe wrote:
> On Tue, Nov 05, 2019 at 10:16:46AM -0500, Boris Ostrovsky wrote:
>
>>> So, I suppose it can be relaxed to a null test and a WARN_ON that it
>>> hasn't changed?
>> You mean
>>
>> if (use_ptemod) {
>> WARN_ON(map->vma != vma);
>> ...
>>
>>
>> Yes, that sounds good.
> I amended my copy of the patch with the above, has this rework shown
> signs of working?
Yes, it works fine.
But please don't forget notifier ops initialization.
With those two changes,
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
>
> @@ -436,7 +436,8 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
> struct gntdev_priv *priv = file->private_data;
>
> pr_debug("gntdev_vma_close %p\n", vma);
> - if (use_ptemod && map->vma == vma) {
> + if (use_ptemod) {
> + WARN_ON(map->vma != vma);
> mmu_range_notifier_remove(&map->notifier);
> map->vma = NULL;
> }
>
> Jason
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
On Thu, Nov 07, 2019 at 05:54:52PM -0500, Boris Ostrovsky wrote:
> On 11/7/19 3:36 PM, Jason Gunthorpe wrote:
> > On Tue, Nov 05, 2019 at 10:16:46AM -0500, Boris Ostrovsky wrote:
> >
> >>> So, I suppose it can be relaxed to a null test and a WARN_ON that it
> >>> hasn't changed?
> >> You mean
> >>
> >> if (use_ptemod) {
> >> WARN_ON(map->vma != vma);
> >> ...
> >>
> >>
> >> Yes, that sounds good.
> > I amended my copy of the patch with the above, has this rework shown
> > signs of working?
>
> Yes, it works fine.
>
> But please don't forget notifier ops initialization.
>
> With those two changes,
>
> Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Thanks, I got both things. I'll forward this toward linux-next and
repost a v3
Jason
_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel
© 2016 - 2026 Red Hat, Inc.