[RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations

Mostafa Saleh posted 58 patches 1 year ago
[RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Mostafa Saleh 1 year ago
With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
hypervisor, has an extra overhead with map_sg, as it loops over
iommu_map, and for each map requires context switching, disabling
interrupts...

Instead, add an new domain operations:
- alloc_cookie_sg: Allocate a new sg deferred cookie
- add_deferred_map_sg: Add a mapping to the cookie
- consume_deferred_map_sg: Consume and release the cookie

Alternativly, we can pass the sg list as is. However, this would
duplicate some of the logic and it would make more sense to
conolidate all the sg list parsing for IOMMU drivers in one place.

virtio-iommu is another IOMMU that can benfit from this, but it
would need to have a new operation that standerdize passing
an sglist based on these ops.

Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
 drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
 include/linux/iommu.h | 19 ++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 83c8e617a2c5..3a3c48631dd6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2608,6 +2608,37 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_fast);
 
+static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
+			  unsigned long iova, phys_addr_t paddr, size_t size)
+{
+	struct iommu_domain *domain = cookie_sg->domain;
+	const struct iommu_domain_ops *ops = domain->ops;
+	unsigned int min_pagesz;
+	size_t pgsize, count;
+
+	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+		return -EINVAL;
+
+	if (WARN_ON(domain->pgsize_bitmap == 0UL))
+		return -ENODEV;
+
+	/* find out the minimum page size supported */
+	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
+
+	/*
+	 * both the virtual address and the physical one, as well as
+	 * the size of the mapping, must be aligned (at least) to the
+	 * size of the smallest page supported by the hardware
+	 */
+	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
+		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
+		       iova, &paddr, size, min_pagesz);
+		return -EINVAL;
+	}
+	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
+	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
+}
+
 ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 		     struct scatterlist *sg, unsigned int nents, int prot,
 		     gfp_t gfp)
@@ -2617,6 +2648,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 	phys_addr_t start;
 	unsigned int i = 0;
 	int ret;
+	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
+			   ops->consume_deferred_map_sg;
+	struct iommu_map_cookie_sg *cookie_sg;
 
 	might_sleep_if(gfpflags_allow_blocking(gfp));
 
@@ -2625,12 +2659,24 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 				__GFP_HIGHMEM)))
 		return -EINVAL;
 
+	if (deferred_sg) {
+		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
+		if (!cookie_sg) {
+			pr_err("iommu: failed alloc cookie\n");
+			return -ENOMEM;
+		}
+		cookie_sg->domain = domain;
+	}
+
 	while (i <= nents) {
 		phys_addr_t s_phys = sg_phys(sg);
 
 		if (len && s_phys != start + len) {
-			ret = __iommu_map(domain, iova + mapped, start,
-					len, prot, gfp);
+			if (deferred_sg)
+				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
+			else
+				ret = __iommu_map(domain, iova + mapped, start,
+						  len, prot, gfp);
 
 			if (ret)
 				goto out_err;
@@ -2654,6 +2700,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
 			sg = sg_next(sg);
 	}
 
+	if (deferred_sg)
+		ops->consume_deferred_map_sg(cookie_sg);
+
 	if (ops->iotlb_sync_map) {
 		ret = ops->iotlb_sync_map(domain, iova, mapped);
 		if (ret)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c75877044185..5e60ac349228 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -601,6 +601,14 @@ struct iommu_ops {
 	u8 user_pasid_table:1;
 };
 
+/**
+ * struct iommu_map_cookie_sg - Cookie for a deferred map sg
+ * @domain: Domain for the sg lit
+ */
+struct iommu_map_cookie_sg {
+	struct iommu_domain *domain;
+};
+
 /**
  * struct iommu_domain_ops - domain specific operations
  * @attach_dev: attach an iommu domain to a device
@@ -638,6 +646,11 @@ struct iommu_ops {
  * @enable_nesting: Enable nesting
  * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
  * @free: Release the domain after use.
+ * @alloc_cookie_sg: Allocate a cookie that would be used to create
+ *		     a sg list, filled from the next functions
+ * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
+ * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
+ *			     it should also release the cookie as it's not used.
  */
 struct iommu_domain_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
@@ -668,6 +681,12 @@ struct iommu_domain_ops {
 				  unsigned long quirks);
 
 	void (*free)(struct iommu_domain *domain);
+
+	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
+						       unsigned int nents, gfp_t gfp);
+	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
+				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
+	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
 };
 
 /**
-- 
2.47.0.338.g60cca15819-goog
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Robin Murphy 12 months ago
On 2024-12-12 6:04 pm, Mostafa Saleh wrote:
> With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
> hypervisor, has an extra overhead with map_sg, as it loops over
> iommu_map, and for each map requires context switching, disabling
> interrupts...
> 
> Instead, add an new domain operations:
> - alloc_cookie_sg: Allocate a new sg deferred cookie
> - add_deferred_map_sg: Add a mapping to the cookie
> - consume_deferred_map_sg: Consume and release the cookie
> 
> Alternativly, we can pass the sg list as is. However, this would
> duplicate some of the logic and it would make more sense to
> conolidate all the sg list parsing for IOMMU drivers in one place.

But why bother with fiddly overly-specific machinery at all when you can 
already make ->map_pages asynchronous and consolidate the expensive part 
into ->iotlb_sync_map in general, like s390 does?

Thanks,
Robin.

> virtio-iommu is another IOMMU that can benfit from this, but it
> would need to have a new operation that standerdize passing
> an sglist based on these ops.
> 
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>   drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
>   include/linux/iommu.h | 19 ++++++++++++++++
>   2 files changed, 70 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 83c8e617a2c5..3a3c48631dd6 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -2608,6 +2608,37 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
>   }
>   EXPORT_SYMBOL_GPL(iommu_unmap_fast);
>   
> +static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
> +			  unsigned long iova, phys_addr_t paddr, size_t size)
> +{
> +	struct iommu_domain *domain = cookie_sg->domain;
> +	const struct iommu_domain_ops *ops = domain->ops;
> +	unsigned int min_pagesz;
> +	size_t pgsize, count;
> +
> +	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
> +		return -EINVAL;
> +
> +	if (WARN_ON(domain->pgsize_bitmap == 0UL))
> +		return -ENODEV;
> +
> +	/* find out the minimum page size supported */
> +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> +
> +	/*
> +	 * both the virtual address and the physical one, as well as
> +	 * the size of the mapping, must be aligned (at least) to the
> +	 * size of the smallest page supported by the hardware
> +	 */
> +	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
> +		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
> +		       iova, &paddr, size, min_pagesz);
> +		return -EINVAL;
> +	}
> +	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
> +	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
> +}
> +
>   ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   		     struct scatterlist *sg, unsigned int nents, int prot,
>   		     gfp_t gfp)
> @@ -2617,6 +2648,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   	phys_addr_t start;
>   	unsigned int i = 0;
>   	int ret;
> +	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
> +			   ops->consume_deferred_map_sg;
> +	struct iommu_map_cookie_sg *cookie_sg;
>   
>   	might_sleep_if(gfpflags_allow_blocking(gfp));
>   
> @@ -2625,12 +2659,24 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   				__GFP_HIGHMEM)))
>   		return -EINVAL;
>   
> +	if (deferred_sg) {
> +		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
> +		if (!cookie_sg) {
> +			pr_err("iommu: failed alloc cookie\n");
> +			return -ENOMEM;
> +		}
> +		cookie_sg->domain = domain;
> +	}
> +
>   	while (i <= nents) {
>   		phys_addr_t s_phys = sg_phys(sg);
>   
>   		if (len && s_phys != start + len) {
> -			ret = __iommu_map(domain, iova + mapped, start,
> -					len, prot, gfp);
> +			if (deferred_sg)
> +				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
> +			else
> +				ret = __iommu_map(domain, iova + mapped, start,
> +						  len, prot, gfp);
>   
>   			if (ret)
>   				goto out_err;
> @@ -2654,6 +2700,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
>   			sg = sg_next(sg);
>   	}
>   
> +	if (deferred_sg)
> +		ops->consume_deferred_map_sg(cookie_sg);
> +
>   	if (ops->iotlb_sync_map) {
>   		ret = ops->iotlb_sync_map(domain, iova, mapped);
>   		if (ret)
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index c75877044185..5e60ac349228 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -601,6 +601,14 @@ struct iommu_ops {
>   	u8 user_pasid_table:1;
>   };
>   
> +/**
> + * struct iommu_map_cookie_sg - Cookie for a deferred map sg
> + * @domain: Domain for the sg lit
> + */
> +struct iommu_map_cookie_sg {
> +	struct iommu_domain *domain;
> +};
> +
>   /**
>    * struct iommu_domain_ops - domain specific operations
>    * @attach_dev: attach an iommu domain to a device
> @@ -638,6 +646,11 @@ struct iommu_ops {
>    * @enable_nesting: Enable nesting
>    * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
>    * @free: Release the domain after use.
> + * @alloc_cookie_sg: Allocate a cookie that would be used to create
> + *		     a sg list, filled from the next functions
> + * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
> + * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
> + *			     it should also release the cookie as it's not used.
>    */
>   struct iommu_domain_ops {
>   	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
> @@ -668,6 +681,12 @@ struct iommu_domain_ops {
>   				  unsigned long quirks);
>   
>   	void (*free)(struct iommu_domain *domain);
> +
> +	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
> +						       unsigned int nents, gfp_t gfp);
> +	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
> +				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
> +	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
>   };
>   
>   /**
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Mostafa Saleh 12 months ago
Hi Robin,

On Thu, Dec 19, 2024 at 12:48:27PM +0000, Robin Murphy wrote:
> On 2024-12-12 6:04 pm, Mostafa Saleh wrote:
> > With pKVM SMMUv3 driver which para-virtualizes the IOMMU in the
> > hypervisor, has an extra overhead with map_sg, as it loops over
> > iommu_map, and for each map requires context switching, disabling
> > interrupts...
> > 
> > Instead, add an new domain operations:
> > - alloc_cookie_sg: Allocate a new sg deferred cookie
> > - add_deferred_map_sg: Add a mapping to the cookie
> > - consume_deferred_map_sg: Consume and release the cookie
> > 
> > Alternativly, we can pass the sg list as is. However, this would
> > duplicate some of the logic and it would make more sense to
> > conolidate all the sg list parsing for IOMMU drivers in one place.
> 
> But why bother with fiddly overly-specific machinery at all when you can
> already make ->map_pages asynchronous and consolidate the expensive part
> into ->iotlb_sync_map in general, like s390 does?

This was my initial idea too. But I believe there is no enough context in
iotlb_sync_map, so we either have to create a per-domain deferred_map list
which is synced on any iotlb_sync_map, but that would require to lock the
map operation, hence impacting concurrency.

Or we have to use some complex logic to extract context from iotlb_sync_map,
(something like range iova tree in map and then on sync we can retrieve that)
That’s why I proposed this approach, where the IOMMU subsystem by design is
aware of the semantics and “helps” by providing the right data structures/calls.

I had a quick look now at s390, and it seems a bit different as they only
notify the hypervisor about the iova range being changed, and don’t need
to provide iova->paddr mapping which pKVM does.

Thanks,
Mostafa
> 
> Thanks,
> Robin.
> 
> > virtio-iommu is another IOMMU that can benfit from this, but it
> > would need to have a new operation that standerdize passing
> > an sglist based on these ops.
> > 
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> >   drivers/iommu/iommu.c | 53 +++++++++++++++++++++++++++++++++++++++++--
> >   include/linux/iommu.h | 19 ++++++++++++++++
> >   2 files changed, 70 insertions(+), 2 deletions(-)
> > 
> > diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> > index 83c8e617a2c5..3a3c48631dd6 100644
> > --- a/drivers/iommu/iommu.c
> > +++ b/drivers/iommu/iommu.c
> > @@ -2608,6 +2608,37 @@ size_t iommu_unmap_fast(struct iommu_domain *domain,
> >   }
> >   EXPORT_SYMBOL_GPL(iommu_unmap_fast);
> > +static int __iommu_add_sg(struct iommu_map_cookie_sg *cookie_sg,
> > +			  unsigned long iova, phys_addr_t paddr, size_t size)
> > +{
> > +	struct iommu_domain *domain = cookie_sg->domain;
> > +	const struct iommu_domain_ops *ops = domain->ops;
> > +	unsigned int min_pagesz;
> > +	size_t pgsize, count;
> > +
> > +	if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
> > +		return -EINVAL;
> > +
> > +	if (WARN_ON(domain->pgsize_bitmap == 0UL))
> > +		return -ENODEV;
> > +
> > +	/* find out the minimum page size supported */
> > +	min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
> > +
> > +	/*
> > +	 * both the virtual address and the physical one, as well as
> > +	 * the size of the mapping, must be aligned (at least) to the
> > +	 * size of the smallest page supported by the hardware
> > +	 */
> > +	if (!IS_ALIGNED(iova | paddr | size, min_pagesz)) {
> > +		pr_err("unaligned: iova 0x%lx pa %pa size 0x%zx min_pagesz 0x%x\n",
> > +		       iova, &paddr, size, min_pagesz);
> > +		return -EINVAL;
> > +	}
> > +	pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
> > +	return ops->add_deferred_map_sg(cookie_sg, paddr, pgsize, count);
> > +}
> > +
> >   ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   		     struct scatterlist *sg, unsigned int nents, int prot,
> >   		     gfp_t gfp)
> > @@ -2617,6 +2648,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   	phys_addr_t start;
> >   	unsigned int i = 0;
> >   	int ret;
> > +	bool deferred_sg = ops->alloc_cookie_sg && ops->add_deferred_map_sg &&
> > +			   ops->consume_deferred_map_sg;
> > +	struct iommu_map_cookie_sg *cookie_sg;
> >   	might_sleep_if(gfpflags_allow_blocking(gfp));
> > @@ -2625,12 +2659,24 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   				__GFP_HIGHMEM)))
> >   		return -EINVAL;
> > +	if (deferred_sg) {
> > +		cookie_sg = ops->alloc_cookie_sg(iova, prot, nents, gfp);
> > +		if (!cookie_sg) {
> > +			pr_err("iommu: failed alloc cookie\n");
> > +			return -ENOMEM;
> > +		}
> > +		cookie_sg->domain = domain;
> > +	}
> > +
> >   	while (i <= nents) {
> >   		phys_addr_t s_phys = sg_phys(sg);
> >   		if (len && s_phys != start + len) {
> > -			ret = __iommu_map(domain, iova + mapped, start,
> > -					len, prot, gfp);
> > +			if (deferred_sg)
> > +				ret = __iommu_add_sg(cookie_sg, iova + mapped, start, len);
> > +			else
> > +				ret = __iommu_map(domain, iova + mapped, start,
> > +						  len, prot, gfp);
> >   			if (ret)
> >   				goto out_err;
> > @@ -2654,6 +2700,9 @@ ssize_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
> >   			sg = sg_next(sg);
> >   	}
> > +	if (deferred_sg)
> > +		ops->consume_deferred_map_sg(cookie_sg);
> > +
> >   	if (ops->iotlb_sync_map) {
> >   		ret = ops->iotlb_sync_map(domain, iova, mapped);
> >   		if (ret)
> > diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> > index c75877044185..5e60ac349228 100644
> > --- a/include/linux/iommu.h
> > +++ b/include/linux/iommu.h
> > @@ -601,6 +601,14 @@ struct iommu_ops {
> >   	u8 user_pasid_table:1;
> >   };
> > +/**
> > + * struct iommu_map_cookie_sg - Cookie for a deferred map sg
> > + * @domain: Domain for the sg lit
> > + */
> > +struct iommu_map_cookie_sg {
> > +	struct iommu_domain *domain;
> > +};
> > +
> >   /**
> >    * struct iommu_domain_ops - domain specific operations
> >    * @attach_dev: attach an iommu domain to a device
> > @@ -638,6 +646,11 @@ struct iommu_ops {
> >    * @enable_nesting: Enable nesting
> >    * @set_pgtable_quirks: Set io page table quirks (IO_PGTABLE_QUIRK_*)
> >    * @free: Release the domain after use.
> > + * @alloc_cookie_sg: Allocate a cookie that would be used to create
> > + *		     a sg list, filled from the next functions
> > + * @add_deferred_map_sg: Add a mapping to a cookie of a sg list.
> > + * @consume_deferred_map_sg: Consume the sg list as now all mappings are added,
> > + *			     it should also release the cookie as it's not used.
> >    */
> >   struct iommu_domain_ops {
> >   	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
> > @@ -668,6 +681,12 @@ struct iommu_domain_ops {
> >   				  unsigned long quirks);
> >   	void (*free)(struct iommu_domain *domain);
> > +
> > +	struct iommu_map_cookie_sg *(*alloc_cookie_sg)(unsigned long iova, int prot,
> > +						       unsigned int nents, gfp_t gfp);
> > +	int (*add_deferred_map_sg)(struct iommu_map_cookie_sg *cookie,
> > +				   phys_addr_t paddr, size_t pgsize, size_t pgcount);
> > +	int (*consume_deferred_map_sg)(struct iommu_map_cookie_sg *cookie);
> >   };
> >   /**
> 
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Jason Gunthorpe 11 months, 2 weeks ago
On Thu, Dec 19, 2024 at 02:24:05PM +0000, Mostafa Saleh wrote:
> I had a quick look now at s390, and it seems a bit different as they only
> notify the hypervisor about the iova range being changed, and don’t need
> to provide iova->paddr mapping which pKVM does.

Can you explain this statement some more. It seems strange to me, why
would the pkvm side, which has it's own page table, need to be be told
about the iova->paddr during range unmapping requests - and why would
the host/guest side have to unnecessarily store this information?

Jason
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Mostafa Saleh 11 months, 2 weeks ago
On Thu, Jan 02, 2025 at 04:18:31PM -0400, Jason Gunthorpe wrote:
> On Thu, Dec 19, 2024 at 02:24:05PM +0000, Mostafa Saleh wrote:
> > I had a quick look now at s390, and it seems a bit different as they only
> > notify the hypervisor about the iova range being changed, and don’t need
> > to provide iova->paddr mapping which pKVM does.
> 
> Can you explain this statement some more. It seems strange to me, why
> would the pkvm side, which has it's own page table, need to be be told
> about the iova->paddr during range unmapping requests - and why would
> the host/guest side have to unnecessarily store this information?

No, it doesn’t need to be told about the paddr on unmapping.

The problem is as follows;
- With the current IOMMU API, an iommu_map_sg() function ends up looping on
  iommu_map() which is very slow as map is a hypercall and there is a lot of
  context switching.
- So, we add a map_sg hypercall, instead of consuming the kernel scatterlist
  which duplicates logic and can be complicated to do. A new iommu operation is
  added add_deferred_map_sg(), where at iommu_map_sg() instead of calling
  iommu_map() it calls add_deferred_map_sg() to add mapping, and 2 other new ops
  also added to create and consume a sg request which comes from a single iommu_map_sg().

The drawback of this approach is that it adds 3 new iommu_ops with a bit of niche
semantics, which for now only used by this driver.

An alternative approach as Robin suggested, is to treat all iommu_map as sg map,
and when the driver gets the iotlb_sync_map() call it can just issue the hypercall,
however this call only provides the IOVA range, which requires extra work or locking
as mentioned in the thread, and as Robin mentioned s390 doing something similar,
I was highlighting that in their driver, this call only notifies the hypervisor
about an IOVA and not an actual pv map as pKVM, so it much simpler in their case.

Thanks,
Mostafa
> 
> Jason
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Jason Gunthorpe 11 months, 2 weeks ago
On Fri, Jan 03, 2025 at 03:35:20PM +0000, Mostafa Saleh wrote:

> An alternative approach as Robin suggested, is to treat all iommu_map as sg map,
> and when the driver gets the iotlb_sync_map() call it can just issue the hypercall,
> however this call only provides the IOVA range, which requires extra work or locking
> as mentioned in the thread, and as Robin mentioned s390 doing something similar,
> I was highlighting that in their driver, this call only notifies the hypervisor
> about an IOVA and not an actual pv map as pKVM, so it much simpler in their case.

Oh, that is much clearer, maybe incorporate some of that into the
commit message.

We are going in a general direction of trying to make the fast dma
mapping path not require sg, so adding sg specific optimizations to
the low level driver is not good.

Batching the hypercalls in the gather and then flushing to execute the
batch seems more reasonable.

You could probably work on advancing this infrastructure separately
via virtio-iommu..

Jason
Re: [RFC PATCH v2 55/58] drivers/iommu: Add deferred map_sg operations
Posted by Mostafa Saleh 11 months, 1 week ago
On Fri, Jan 03, 2025 at 11:47:57AM -0400, Jason Gunthorpe wrote:
> On Fri, Jan 03, 2025 at 03:35:20PM +0000, Mostafa Saleh wrote:
> 
> > An alternative approach as Robin suggested, is to treat all iommu_map as sg map,
> > and when the driver gets the iotlb_sync_map() call it can just issue the hypercall,
> > however this call only provides the IOVA range, which requires extra work or locking
> > as mentioned in the thread, and as Robin mentioned s390 doing something similar,
> > I was highlighting that in their driver, this call only notifies the hypervisor
> > about an IOVA and not an actual pv map as pKVM, so it much simpler in their case.
> 
> Oh, that is much clearer, maybe incorporate some of that into the
> commit message.

Sure.

> 
> We are going in a general direction of trying to make the fast dma
> mapping path not require sg, so adding sg specific optimizations to
> the low level driver is not good.
> 
> Batching the hypercalls in the gather and then flushing to execute the
> batch seems more reasonable.
> 
> You could probably work on advancing this infrastructure separately
> via virtio-iommu..

That was my impression also, I mentioned it in the commit message,
but for virtio-iommu we need a change in the standard to define this
operation, I can look into that.

Thanks,
Mostafa
> 
> Jason