[PATCH v6 13/25] iommufd: Add mmap interface

Nicolin Chen posted 25 patches 3 months, 4 weeks ago
There is a newer version of this series
[PATCH v6 13/25] iommufd: Add mmap interface
Posted by Nicolin Chen 3 months, 4 weeks ago
For vIOMMU passing through HW resources to user space (VMs), allowing a VM
to control the passed through HW directly by accessing hardware registers,
add an mmap infrastructure to map the physical MMIO pages to user space.

Maintain a maple tree per ictx as a translation table managing mmappable
regions, from an allocated for-user mmap offset to an iommufd_mmap struct,
where it stores the real PFN range for an io_remap_pfn_range call.

Keep track of the lifecycle of the mmappable region by taking refcount of
its owner, so as to enforce user space to unmap the region first before it
can destroy its owner object.

To allow an IOMMU driver to add and delete mmappable regions onto/from the
maple tree, add iommufd_viommu_alloc/destroy_mmap helpers.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h | 14 ++++++
 include/linux/iommufd.h                 | 42 ++++++++++++++++
 drivers/iommu/iommufd/driver.c          | 51 ++++++++++++++++++++
 drivers/iommu/iommufd/main.c            | 64 +++++++++++++++++++++++++
 4 files changed, 171 insertions(+)

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 1bb1c0764bc2..e8192f79fe42 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -7,6 +7,7 @@
 #include <linux/iommu.h>
 #include <linux/iommufd.h>
 #include <linux/iova_bitmap.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/uaccess.h>
 #include <linux/xarray.h>
@@ -44,6 +45,7 @@ struct iommufd_ctx {
 	struct xarray groups;
 	wait_queue_head_t destroy_wait;
 	struct rw_semaphore ioas_creation_lock;
+	struct maple_tree mt_mmap;
 
 	struct mutex sw_msi_lock;
 	struct list_head sw_msi_list;
@@ -55,6 +57,18 @@ struct iommufd_ctx {
 	struct iommufd_ioas *vfio_ioas;
 };
 
+/* Entry for iommufd_ctx::mt_mmap */
+struct iommufd_mmap {
+	struct iommufd_object *owner;
+
+	/* Allocated start position in mt_mmap tree */
+	unsigned long startp;
+
+	/* Physical range for io_remap_pfn_range() */
+	unsigned long mmio_pfn;
+	unsigned long num_pfns;
+};
+
 /*
  * The IOVA to PFN map. The map automatically copies the PFNs into multiple
  * domains and permits sharing of PFNs between io_pagetable instances. This
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index acf0e8f0c630..0da9bc8f94f3 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -251,6 +251,11 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent,
 			   struct iommufd_object *obj_depended);
 void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
 			      struct iommufd_object *obj_depended);
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+			phys_addr_t mmio_addr, size_t length,
+			unsigned long *offset);
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+			   struct iommufd_object *owner, unsigned long offset);
 struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
 				       unsigned long vdev_id);
 int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
@@ -271,6 +276,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent,
 {
 }
 
+static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx,
+				      struct iommufd_object *owner,
+				      phys_addr_t mmio_addr, size_t length,
+				      unsigned long *offset)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+					 struct iommufd_object *owner,
+					 unsigned long offset)
+{
+}
+
 static inline struct device *
 iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
 {
@@ -338,4 +357,27 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
 		_iommufd_object_undepend(&dependent->member.obj,               \
 					 &depended->member.obj);               \
 	})
+
+/*
+ * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure.
+ *
+ * To support an mmappable MMIO region, kernel driver must first register it to
+ * iommufd core to allocate an @offset, during a driver-structure initialization
+ * (e.g. viommu_init op). Then, it should report to user space this @offset and
+ * the @length of the MMIO region for mmap syscall.
+ */
+static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu,
+					    phys_addr_t mmio_addr,
+					    size_t length,
+					    unsigned long *offset)
+{
+	return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr,
+				   length, offset);
+}
+
+static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu,
+					       unsigned long offset)
+{
+	_iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset);
+}
 #endif
diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index 70b7917da0cb..8220b61d8c8d 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -31,6 +31,57 @@ void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
 }
 EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
 
+/*
+ * Allocate an @offset to return to user space to use for an mmap() syscall
+ *
+ * Driver should use a per-structure helper in include/linux/iommufd.h
+ */
+int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
+			phys_addr_t mmio_addr, size_t length,
+			unsigned long *offset)
+{
+	struct iommufd_mmap *immap;
+	unsigned long startp;
+	int rc;
+
+	if (!PAGE_ALIGNED(mmio_addr))
+		return -EINVAL;
+	if (!length || !PAGE_ALIGNED(length))
+		return -EINVAL;
+
+	immap = kzalloc(sizeof(*immap), GFP_KERNEL);
+	if (!immap)
+		return -ENOMEM;
+	immap->owner = owner;
+	immap->num_pfns = length >> PAGE_SHIFT;
+	immap->mmio_pfn = mmio_addr >> PAGE_SHIFT;
+
+	rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->num_pfns,
+			       0, U32_MAX >> PAGE_SHIFT, GFP_KERNEL);
+	if (rc < 0) {
+		kfree(immap);
+		return rc;
+	}
+
+	immap->startp = startp;
+	/* mmap() syscall will right-shift the offset in vma->vm_pgoff */
+	*offset = startp << PAGE_SHIFT;
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
+
+/* Driver should use a per-structure helper in include/linux/iommufd.h */
+void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
+			   struct iommufd_object *owner, unsigned long offset)
+{
+	struct iommufd_mmap *immap;
+
+	immap = mtree_erase(&ictx->mt_mmap, offset >> PAGE_SHIFT);
+	WARN_ON_ONCE(!immap || immap->owner != owner);
+	kfree(immap);
+}
+EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD");
+
 /* Caller should xa_lock(&viommu->vdevs) to protect the return value */
 struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
 				       unsigned long vdev_id)
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 4e8dbbfac890..339a269ebbc8 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -275,6 +275,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
 	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
 	xa_init(&ictx->groups);
 	ictx->file = filp;
+	mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
 	init_waitqueue_head(&ictx->destroy_wait);
 	mutex_init(&ictx->sw_msi_lock);
 	INIT_LIST_HEAD(&ictx->sw_msi_list);
@@ -479,11 +480,74 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
 	return ret;
 }
 
+static void iommufd_fops_vma_open(struct vm_area_struct *vma)
+{
+	struct iommufd_mmap *immap = vma->vm_private_data;
+
+	refcount_inc(&immap->owner->users);
+}
+
+static void iommufd_fops_vma_close(struct vm_area_struct *vma)
+{
+	struct iommufd_mmap *immap = vma->vm_private_data;
+
+	refcount_dec(&immap->owner->users);
+}
+
+static const struct vm_operations_struct iommufd_vma_ops = {
+	.open = iommufd_fops_vma_open,
+	.close = iommufd_fops_vma_close,
+};
+
+/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */
+static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct iommufd_ctx *ictx = filp->private_data;
+	size_t length = vma->vm_end - vma->vm_start;
+	struct iommufd_mmap *immap;
+	int rc;
+
+	if (!PAGE_ALIGNED(length))
+		return -EINVAL;
+	if (!(vma->vm_flags & VM_SHARED))
+		return -EINVAL;
+	if (vma->vm_flags & VM_EXEC)
+		return -EPERM;
+
+	/* vma->vm_pgoff carries an index to an mtree entry (immap) */
+	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
+	if (!immap)
+		return -ENXIO;
+	/*
+	 * mtree_load() returns the immap for any contained pgoff, only allow
+	 * the immap thing to be mapped
+	 */
+	if (vma->vm_pgoff != immap->startp)
+		return -ENXIO;
+	if (length != immap->num_pfns << PAGE_SHIFT)
+		return -ENXIO;
+
+	vma->vm_pgoff = 0;
+	vma->vm_private_data = immap;
+	vma->vm_ops = &iommufd_vma_ops;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+	rc = io_remap_pfn_range(vma, vma->vm_start, immap->mmio_pfn, length,
+				vma->vm_page_prot);
+	if (rc)
+		return rc;
+
+	/* vm_ops.open won't be called for mmap itself. */
+	refcount_inc(&immap->owner->users);
+	return rc;
+}
+
 static const struct file_operations iommufd_fops = {
 	.owner = THIS_MODULE,
 	.open = iommufd_fops_open,
 	.release = iommufd_fops_release,
 	.unlocked_ioctl = iommufd_fops_ioctl,
+	.mmap = iommufd_fops_mmap,
 };
 
 /**
-- 
2.43.0
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Pranjal Shrivastava 3 months, 3 weeks ago
On Sat, Jun 14, 2025 at 12:14:38AM -0700, Nicolin Chen wrote:
> For vIOMMU passing through HW resources to user space (VMs), allowing a VM
> to control the passed through HW directly by accessing hardware registers,
> add an mmap infrastructure to map the physical MMIO pages to user space.
> 
> Maintain a maple tree per ictx as a translation table managing mmappable
> regions, from an allocated for-user mmap offset to an iommufd_mmap struct,
> where it stores the real PFN range for an io_remap_pfn_range call.
> 
> Keep track of the lifecycle of the mmappable region by taking refcount of
> its owner, so as to enforce user space to unmap the region first before it
> can destroy its owner object.
> 
> To allow an IOMMU driver to add and delete mmappable regions onto/from the
> maple tree, add iommufd_viommu_alloc/destroy_mmap helpers.
>

The usage of mtree seems fine now, storing pfns ranges as compared to
pointers in v3. Input validation checks, vma checks and destroy op look
good.

Reviewed-by: Pranjal Shrivastava <praan@google.com>

> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
> ---
>  drivers/iommu/iommufd/iommufd_private.h | 14 ++++++
>  include/linux/iommufd.h                 | 42 ++++++++++++++++
>  drivers/iommu/iommufd/driver.c          | 51 ++++++++++++++++++++
>  drivers/iommu/iommufd/main.c            | 64 +++++++++++++++++++++++++
>  4 files changed, 171 insertions(+)
> 
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index 1bb1c0764bc2..e8192f79fe42 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -7,6 +7,7 @@
>  #include <linux/iommu.h>
>  #include <linux/iommufd.h>
>  #include <linux/iova_bitmap.h>
> +#include <linux/maple_tree.h>
>  #include <linux/rwsem.h>
>  #include <linux/uaccess.h>
>  #include <linux/xarray.h>
> @@ -44,6 +45,7 @@ struct iommufd_ctx {
>  	struct xarray groups;
>  	wait_queue_head_t destroy_wait;
>  	struct rw_semaphore ioas_creation_lock;
> +	struct maple_tree mt_mmap;
>  
>  	struct mutex sw_msi_lock;
>  	struct list_head sw_msi_list;
> @@ -55,6 +57,18 @@ struct iommufd_ctx {
>  	struct iommufd_ioas *vfio_ioas;
>  };
>  
> +/* Entry for iommufd_ctx::mt_mmap */
> +struct iommufd_mmap {
> +	struct iommufd_object *owner;
> +
> +	/* Allocated start position in mt_mmap tree */
> +	unsigned long startp;
> +
> +	/* Physical range for io_remap_pfn_range() */
> +	unsigned long mmio_pfn;
> +	unsigned long num_pfns;
> +};
> +
>  /*
>   * The IOVA to PFN map. The map automatically copies the PFNs into multiple
>   * domains and permits sharing of PFNs between io_pagetable instances. This
> diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
> index acf0e8f0c630..0da9bc8f94f3 100644
> --- a/include/linux/iommufd.h
> +++ b/include/linux/iommufd.h
> @@ -251,6 +251,11 @@ int _iommufd_object_depend(struct iommufd_object *obj_dependent,
>  			   struct iommufd_object *obj_depended);
>  void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
>  			      struct iommufd_object *obj_depended);
> +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
> +			phys_addr_t mmio_addr, size_t length,
> +			unsigned long *offset);
> +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
> +			   struct iommufd_object *owner, unsigned long offset);
>  struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
>  				       unsigned long vdev_id);
>  int iommufd_viommu_get_vdev_id(struct iommufd_viommu *viommu,
> @@ -271,6 +276,20 @@ _iommufd_object_undepend(struct iommufd_object *obj_dependent,
>  {
>  }
>  
> +static inline int _iommufd_alloc_mmap(struct iommufd_ctx *ictx,
> +				      struct iommufd_object *owner,
> +				      phys_addr_t mmio_addr, size_t length,
> +				      unsigned long *offset)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static inline void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
> +					 struct iommufd_object *owner,
> +					 unsigned long offset)
> +{
> +}
> +
>  static inline struct device *
>  iommufd_viommu_find_dev(struct iommufd_viommu *viommu, unsigned long vdev_id)
>  {
> @@ -338,4 +357,27 @@ static inline int iommufd_viommu_report_event(struct iommufd_viommu *viommu,
>  		_iommufd_object_undepend(&dependent->member.obj,               \
>  					 &depended->member.obj);               \
>  	})
> +
> +/*
> + * Helpers for IOMMU driver to alloc/destroy an mmapable area for a structure.
> + *
> + * To support an mmappable MMIO region, kernel driver must first register it to
> + * iommufd core to allocate an @offset, during a driver-structure initialization
> + * (e.g. viommu_init op). Then, it should report to user space this @offset and
> + * the @length of the MMIO region for mmap syscall.
> + */
> +static inline int iommufd_viommu_alloc_mmap(struct iommufd_viommu *viommu,
> +					    phys_addr_t mmio_addr,
> +					    size_t length,
> +					    unsigned long *offset)
> +{
> +	return _iommufd_alloc_mmap(viommu->ictx, &viommu->obj, mmio_addr,
> +				   length, offset);
> +}
> +
> +static inline void iommufd_viommu_destroy_mmap(struct iommufd_viommu *viommu,
> +					       unsigned long offset)
> +{
> +	_iommufd_destroy_mmap(viommu->ictx, &viommu->obj, offset);
> +}
>  #endif
> diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
> index 70b7917da0cb..8220b61d8c8d 100644
> --- a/drivers/iommu/iommufd/driver.c
> +++ b/drivers/iommu/iommufd/driver.c
> @@ -31,6 +31,57 @@ void _iommufd_object_undepend(struct iommufd_object *obj_dependent,
>  }
>  EXPORT_SYMBOL_NS_GPL(_iommufd_object_undepend, "IOMMUFD");
>  
> +/*
> + * Allocate an @offset to return to user space to use for an mmap() syscall
> + *
> + * Driver should use a per-structure helper in include/linux/iommufd.h
> + */
> +int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
> +			phys_addr_t mmio_addr, size_t length,
> +			unsigned long *offset)
> +{
> +	struct iommufd_mmap *immap;
> +	unsigned long startp;
> +	int rc;
> +
> +	if (!PAGE_ALIGNED(mmio_addr))
> +		return -EINVAL;
> +	if (!length || !PAGE_ALIGNED(length))
> +		return -EINVAL;
> +
> +	immap = kzalloc(sizeof(*immap), GFP_KERNEL);
> +	if (!immap)
> +		return -ENOMEM;
> +	immap->owner = owner;
> +	immap->num_pfns = length >> PAGE_SHIFT;
> +	immap->mmio_pfn = mmio_addr >> PAGE_SHIFT;
> +
> +	rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->num_pfns,
> +			       0, U32_MAX >> PAGE_SHIFT, GFP_KERNEL);
> +	if (rc < 0) {
> +		kfree(immap);
> +		return rc;
> +	}
> +
> +	immap->startp = startp;
> +	/* mmap() syscall will right-shift the offset in vma->vm_pgoff */
> +	*offset = startp << PAGE_SHIFT;
> +	return 0;
> +}
> +EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
> +
> +/* Driver should use a per-structure helper in include/linux/iommufd.h */
> +void _iommufd_destroy_mmap(struct iommufd_ctx *ictx,
> +			   struct iommufd_object *owner, unsigned long offset)
> +{
> +	struct iommufd_mmap *immap;
> +
> +	immap = mtree_erase(&ictx->mt_mmap, offset >> PAGE_SHIFT);
> +	WARN_ON_ONCE(!immap || immap->owner != owner);
> +	kfree(immap);
> +}
> +EXPORT_SYMBOL_NS_GPL(_iommufd_destroy_mmap, "IOMMUFD");
> +
>  /* Caller should xa_lock(&viommu->vdevs) to protect the return value */
>  struct device *iommufd_viommu_find_dev(struct iommufd_viommu *viommu,
>  				       unsigned long vdev_id)
> diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
> index 4e8dbbfac890..339a269ebbc8 100644
> --- a/drivers/iommu/iommufd/main.c
> +++ b/drivers/iommu/iommufd/main.c
> @@ -275,6 +275,7 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
>  	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
>  	xa_init(&ictx->groups);
>  	ictx->file = filp;
> +	mt_init_flags(&ictx->mt_mmap, MT_FLAGS_ALLOC_RANGE);
>  	init_waitqueue_head(&ictx->destroy_wait);
>  	mutex_init(&ictx->sw_msi_lock);
>  	INIT_LIST_HEAD(&ictx->sw_msi_list);
> @@ -479,11 +480,74 @@ static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
>  	return ret;
>  }
>  
> +static void iommufd_fops_vma_open(struct vm_area_struct *vma)
> +{
> +	struct iommufd_mmap *immap = vma->vm_private_data;
> +
> +	refcount_inc(&immap->owner->users);
> +}
> +
> +static void iommufd_fops_vma_close(struct vm_area_struct *vma)
> +{
> +	struct iommufd_mmap *immap = vma->vm_private_data;
> +
> +	refcount_dec(&immap->owner->users);
> +}
> +
> +static const struct vm_operations_struct iommufd_vma_ops = {
> +	.open = iommufd_fops_vma_open,
> +	.close = iommufd_fops_vma_close,
> +};
> +
> +/* The vm_pgoff must be pre-allocated from mt_mmap, and given to user space */
> +static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +	struct iommufd_ctx *ictx = filp->private_data;
> +	size_t length = vma->vm_end - vma->vm_start;
> +	struct iommufd_mmap *immap;
> +	int rc;
> +
> +	if (!PAGE_ALIGNED(length))
> +		return -EINVAL;
> +	if (!(vma->vm_flags & VM_SHARED))
> +		return -EINVAL;
> +	if (vma->vm_flags & VM_EXEC)
> +		return -EPERM;
> +
> +	/* vma->vm_pgoff carries an index to an mtree entry (immap) */
> +	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
> +	if (!immap)
> +		return -ENXIO;
> +	/*
> +	 * mtree_load() returns the immap for any contained pgoff, only allow
> +	 * the immap thing to be mapped
> +	 */
> +	if (vma->vm_pgoff != immap->startp)
> +		return -ENXIO;
> +	if (length != immap->num_pfns << PAGE_SHIFT)
> +		return -ENXIO;
> +
> +	vma->vm_pgoff = 0;
> +	vma->vm_private_data = immap;
> +	vma->vm_ops = &iommufd_vma_ops;
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +
> +	rc = io_remap_pfn_range(vma, vma->vm_start, immap->mmio_pfn, length,
> +				vma->vm_page_prot);
> +	if (rc)
> +		return rc;
> +
> +	/* vm_ops.open won't be called for mmap itself. */
> +	refcount_inc(&immap->owner->users);
> +	return rc;
> +}
> +
>  static const struct file_operations iommufd_fops = {
>  	.owner = THIS_MODULE,
>  	.open = iommufd_fops_open,
>  	.release = iommufd_fops_release,
>  	.unlocked_ioctl = iommufd_fops_ioctl,
> +	.mmap = iommufd_fops_mmap,
>  };
>  
>  /**
> -- 
> 2.43.0
>
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Jason Gunthorpe 3 months, 3 weeks ago
On Sat, Jun 14, 2025 at 12:14:38AM -0700, Nicolin Chen wrote:

> +/* Entry for iommufd_ctx::mt_mmap */
> +struct iommufd_mmap {
> +	struct iommufd_object *owner;
> +
> +	/* Allocated start position in mt_mmap tree */
> +	unsigned long startp;

pgoff_t, looks like this is already in PAGE_SIZE units.

> +	/* Physical range for io_remap_pfn_range() */
> +	unsigned long mmio_pfn;

physaddr_t and maybe don't use pfn?

> +	unsigned long num_pfns;

size_t

Rest looks OK

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

Jason
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Nicolin Chen 3 months, 2 weeks ago
On Mon, Jun 16, 2025 at 11:13:19AM -0300, Jason Gunthorpe wrote:
> On Sat, Jun 14, 2025 at 12:14:38AM -0700, Nicolin Chen wrote:
> 
> > +/* Entry for iommufd_ctx::mt_mmap */
> > +struct iommufd_mmap {
> > +	struct iommufd_object *owner;
> > +
> > +	/* Allocated start position in mt_mmap tree */
> > +	unsigned long startp;
> 
> pgoff_t, looks like this is already in PAGE_SIZE units.
> 
> > +	/* Physical range for io_remap_pfn_range() */
> > +	unsigned long mmio_pfn;
> 
> physaddr_t and maybe don't use pfn?
> 
> > +	unsigned long num_pfns;
> 
> size_t

I made the following changes, though I kept "unsigned long" for
"vm_pgoff" aligning with vma->vm_pgoff:

diff --git a/drivers/iommu/iommufd/driver.c b/drivers/iommu/iommufd/driver.c
index d797b77e878c6..73fcf3a89bf82 100644
--- a/drivers/iommu/iommufd/driver.c
+++ b/drivers/iommu/iommufd/driver.c
@@ -53,19 +53,19 @@ int _iommufd_alloc_mmap(struct iommufd_ctx *ictx, struct iommufd_object *owner,
 	if (!immap)
 		return -ENOMEM;
 	immap->owner = owner;
-	immap->num_pfns = length >> PAGE_SHIFT;
-	immap->mmio_pfn = mmio_addr >> PAGE_SHIFT;
+	immap->length = length;
+	immap->mmio_addr = mmio_addr;
 
-	rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->num_pfns,
-			       0, U32_MAX >> PAGE_SHIFT, GFP_KERNEL);
+	rc = mtree_alloc_range(&ictx->mt_mmap, &startp, immap, immap->length,
+			       0, PHYS_ADDR_MAX, GFP_KERNEL);
 	if (rc < 0) {
 		kfree(immap);
 		return rc;
 	}
 
-	immap->startp = startp;
-	/* mmap() syscall will right-shift the offset in vma->vm_pgoff */
-	*offset = startp << PAGE_SHIFT;
+	/* mmap() syscall will right-shift the offset in vma->vm_pgoff too */
+	immap->vm_pgoff = startp >> PAGE_SHIFT;
+	*offset = startp;
 	return 0;
 }
 EXPORT_SYMBOL_NS_GPL(_iommufd_alloc_mmap, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index cc9088ba5c812..ebac6a4b3538c 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -61,12 +61,12 @@ struct iommufd_ctx {
 struct iommufd_mmap {
 	struct iommufd_object *owner;
 
-	/* Allocated start position in mt_mmap tree */
-	unsigned long startp;
+	/* Page-shifted start position in mt_mmap to validate vma->vm_pgoff */
+	unsigned long vm_pgoff;
 
 	/* Physical range for io_remap_pfn_range() */
-	unsigned long mmio_pfn;
-	unsigned long num_pfns;
+	phys_addr_t mmio_addr;
+	size_t length;
 };
 
 /*
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 339a269ebbc81..0fb81a905cb13 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -514,17 +514,15 @@ static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
 	if (vma->vm_flags & VM_EXEC)
 		return -EPERM;
 
-	/* vma->vm_pgoff carries an index to an mtree entry (immap) */
-	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff);
+	/* vma->vm_pgoff carries a page-shifted start position to an immap */
+	immap = mtree_load(&ictx->mt_mmap, vma->vm_pgoff << PAGE_SHIFT);
 	if (!immap)
 		return -ENXIO;
 	/*
-	 * mtree_load() returns the immap for any contained pgoff, only allow
-	 * the immap thing to be mapped
+	 * mtree_load() returns the immap for any contained mmio_addr, so only
+	 * allow the exact immap thing to be mapped
 	 */
-	if (vma->vm_pgoff != immap->startp)
-		return -ENXIO;
-	if (length != immap->num_pfns << PAGE_SHIFT)
+	if (vma->vm_pgoff != immap->vm_pgoff || length != immap->length)
 		return -ENXIO;
 
 	vma->vm_pgoff = 0;
@@ -532,7 +530,8 @@ static int iommufd_fops_mmap(struct file *filp, struct vm_area_struct *vma)
 	vma->vm_ops = &iommufd_vma_ops;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	rc = io_remap_pfn_range(vma, vma->vm_start, immap->mmio_pfn, length,
+	rc = io_remap_pfn_range(vma, vma->vm_start,
+				immap->mmio_addr >> PAGE_SHIFT, length,
 				vma->vm_page_prot);
 	if (rc)
 		return rc;
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Nicolin Chen 3 months, 3 weeks ago
On Mon, Jun 16, 2025 at 11:13:19AM -0300, Jason Gunthorpe wrote:
> On Sat, Jun 14, 2025 at 12:14:38AM -0700, Nicolin Chen wrote:
> 
> > +/* Entry for iommufd_ctx::mt_mmap */
> > +struct iommufd_mmap {
> > +	struct iommufd_object *owner;
> > +
> > +	/* Allocated start position in mt_mmap tree */
> > +	unsigned long startp;
> 
> pgoff_t, looks like this is already in PAGE_SIZE units.
> 
> > +	/* Physical range for io_remap_pfn_range() */
> > +	unsigned long mmio_pfn;
> 
> physaddr_t and maybe don't use pfn?
> 
> > +	unsigned long num_pfns;
> 
> size_t

The mtree stores in pfn units. Looks like you prefer doing in
PAGE_SIZE units? I can give that a try and update them as you
suggested.

Thanks
Nicolin
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Jason Gunthorpe 3 months, 3 weeks ago
On Mon, Jun 16, 2025 at 07:37:26PM -0700, Nicolin Chen wrote:
> On Mon, Jun 16, 2025 at 11:13:19AM -0300, Jason Gunthorpe wrote:
> > On Sat, Jun 14, 2025 at 12:14:38AM -0700, Nicolin Chen wrote:
> > 
> > > +/* Entry for iommufd_ctx::mt_mmap */
> > > +struct iommufd_mmap {
> > > +	struct iommufd_object *owner;
> > > +
> > > +	/* Allocated start position in mt_mmap tree */
> > > +	unsigned long startp;
> > 
> > pgoff_t, looks like this is already in PAGE_SIZE units.
> > 
> > > +	/* Physical range for io_remap_pfn_range() */
> > > +	unsigned long mmio_pfn;
> > 
> > physaddr_t and maybe don't use pfn?
> > 
> > > +	unsigned long num_pfns;
> > 
> > size_t
> 
> The mtree stores in pfn units. Looks like you prefer doing in
> PAGE_SIZE units? I can give that a try and update them as you
> suggested.

PFN and PAGE_SIZE units are the same thing

Jason
Re: [PATCH v6 13/25] iommufd: Add mmap interface
Posted by Baolu Lu 3 months, 3 weeks ago
On 6/14/2025 3:14 PM, Nicolin Chen wrote:
> For vIOMMU passing through HW resources to user space (VMs), allowing a VM
> to control the passed through HW directly by accessing hardware registers,
> add an mmap infrastructure to map the physical MMIO pages to user space.
> 
> Maintain a maple tree per ictx as a translation table managing mmappable
> regions, from an allocated for-user mmap offset to an iommufd_mmap struct,
> where it stores the real PFN range for an io_remap_pfn_range call.
> 
> Keep track of the lifecycle of the mmappable region by taking refcount of
> its owner, so as to enforce user space to unmap the region first before it
> can destroy its owner object.
> 
> To allow an IOMMU driver to add and delete mmappable regions onto/from the
> maple tree, add iommufd_viommu_alloc/destroy_mmap helpers.
> 
> Reviewed-by: Kevin Tian<kevin.tian@intel.com>
> Signed-off-by: Nicolin Chen<nicolinc@nvidia.com>

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>