From: Leon Romanovsky <leonro@nvidia.com>
Introduce new DMA APIs to perform DMA linkage of buffers
in layers higher than DMA.
In proposed API, the callers will perform the following steps.
In map path:
if (dma_can_use_iova(...))
dma_iova_alloc()
for (page in range)
dma_iova_link_next(...)
dma_iova_sync(...)
else
/* Fallback to legacy map pages */
for (all pages)
dma_map_page(...)
In unmap path:
if (dma_can_use_iova(...))
dma_iova_destroy()
else
for (all pages)
dma_unmap_page(...)
Reviewed-by: Christoph Hellwig <hch@lst.de>
Tested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/iommu/dma-iommu.c | 261 ++++++++++++++++++++++++++++++++++++
include/linux/dma-mapping.h | 32 +++++
2 files changed, 293 insertions(+)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index d2c298083e0a..2e014db5a244 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1818,6 +1818,267 @@ void dma_iova_free(struct device *dev, struct dma_iova_state *state)
}
EXPORT_SYMBOL_GPL(dma_iova_free);
+static int __dma_iova_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+
+ return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
+ dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC);
+}
+
+static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t bounce_len,
+ enum dma_data_direction dir, unsigned long attrs,
+ size_t iova_start_pad)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+ phys_addr_t bounce_phys;
+ int error;
+
+ bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs);
+ if (bounce_phys == DMA_MAPPING_ERROR)
+ return -ENOMEM;
+
+ error = __dma_iova_link(dev, addr - iova_start_pad,
+ bounce_phys - iova_start_pad,
+ iova_align(iovad, bounce_len), dir, attrs);
+ if (error)
+ swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir,
+ attrs);
+ return error;
+}
+
+static int iommu_dma_iova_link_swiotlb(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+ size_t iova_end_pad = iova_offset(iovad, phys + size);
+ dma_addr_t addr = state->addr + offset;
+ size_t mapped = 0;
+ int error;
+
+ if (iova_start_pad) {
+ size_t bounce_len = min(size, iovad->granule - iova_start_pad);
+
+ error = iommu_dma_iova_bounce_and_link(dev, addr, phys,
+ bounce_len, dir, attrs, iova_start_pad);
+ if (error)
+ return error;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+
+ mapped += bounce_len;
+ size -= bounce_len;
+ if (!size)
+ return 0;
+ }
+
+ size -= iova_end_pad;
+ error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir,
+ attrs);
+ if (error)
+ goto out_unmap;
+ mapped += size;
+
+ if (iova_end_pad) {
+ error = iommu_dma_iova_bounce_and_link(dev, addr + mapped,
+ phys + mapped, iova_end_pad, dir, attrs, 0);
+ if (error)
+ goto out_unmap;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+ }
+
+ return 0;
+
+out_unmap:
+ dma_iova_unlink(dev, state, 0, mapped, dir, attrs);
+ return error;
+}
+
+/**
+ * dma_iova_link - Link a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @phys: physical address to link
+ * @offset: offset into the IOVA state to map into
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Link a range of IOVA space for the given IOVA state without IOTLB sync.
+ * This function is used to link multiple physical addresses in contiguous
+ * IOVA space without performing costly IOTLB sync.
+ *
+ * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
+ * the end of linkage.
+ */
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+
+ if (WARN_ON_ONCE(iova_start_pad && offset > 0))
+ return -EIO;
+
+ if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size))
+ return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
+ size, dir, attrs);
+
+ return __dma_iova_link(dev, state->addr + offset - iova_start_pad,
+ phys - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), dir, attrs);
+}
+EXPORT_SYMBOL_GPL(dma_iova_link);
+
+/**
+ * dma_iova_sync - Sync IOTLB
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to sync
+ * @size: size of the buffer
+ *
+ * Sync IOTLB for the given IOVA state. This function should be called on
+ * the IOVA-contiguous range created by one ore more dma_iova_link() calls
+ * to sync the IOTLB.
+ */
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+
+ return iommu_sync_map(domain, addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad));
+}
+EXPORT_SYMBOL_GPL(dma_iova_sync);
+
+static void iommu_dma_iova_unlink_range_slow(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ dma_addr_t end = addr + size;
+
+ do {
+ phys_addr_t phys;
+ size_t len;
+
+ phys = iommu_iova_to_phys(domain, addr);
+ if (WARN_ON(!phys))
+ /* Something very horrible happen here */
+ return;
+
+ len = min_t(size_t,
+ end - addr, iovad->granule - iova_start_pad);
+
+ if (!dev_is_dma_coherent(dev) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_cpu(phys, len, dir);
+
+ swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs);
+
+ addr += len;
+ iova_start_pad = 0;
+ } while (addr < end);
+}
+
+static void __iommu_dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs,
+ bool free_iova)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t unmapped;
+
+ if ((state->__size & DMA_IOVA_USE_SWIOTLB) ||
+ (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))
+ iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs);
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain);
+
+ size = iova_align(iovad, size + iova_start_pad);
+ addr -= iova_start_pad;
+ unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!iotlb_gather.queued)
+ iommu_iotlb_sync(domain, &iotlb_gather);
+ if (free_iova)
+ iommu_dma_free_iova(domain, addr, size, &iotlb_gather);
+}
+
+/**
+ * dma_iova_unlink - Unlink a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to unlink
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink a range of IOVA space for the given IOVA state.
+ */
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false);
+}
+EXPORT_SYMBOL_GPL(dma_iova_unlink);
+
+/**
+ * dma_iova_destroy - Finish a DMA mapping transaction
+ * @dev: DMA device
+ * @state: IOVA state
+ * @mapped_len: number of bytes to unmap
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The
+ * range of IOVA from dma_addr to @mapped_len must all be linked, and be the
+ * only linked IOVA in state.
+ */
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (mapped_len)
+ __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs,
+ true);
+ else
+ /*
+ * We can be here if first call to dma_iova_link() failed and
+ * there is nothing to unlink, so let's be more clear.
+ */
+ dma_iova_free(dev, state);
+}
+EXPORT_SYMBOL_GPL(dma_iova_destroy);
+
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index de7f73810d54..a71e110f1e9d 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -309,6 +309,17 @@ static inline bool dma_use_iova(struct dma_iova_state *state)
bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
phys_addr_t phys, size_t size);
void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
#else /* CONFIG_IOMMU_DMA */
static inline bool dma_use_iova(struct dma_iova_state *state)
{
@@ -323,6 +334,27 @@ static inline void dma_iova_free(struct device *dev,
struct dma_iova_state *state)
{
}
+static inline void dma_iova_destroy(struct device *dev,
+ struct dma_iova_state *state, size_t mapped_len,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
+static inline int dma_iova_sync(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+static inline int dma_iova_link(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ return -EOPNOTSUPP;
+}
+static inline void dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
#endif /* CONFIG_IOMMU_DMA */
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
--
2.49.0
On Wed, Apr 23, 2025 at 11:12:58AM +0300, Leon Romanovsky wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
>
> Introduce new DMA APIs to perform DMA linkage of buffers
> in layers higher than DMA.
>
> In proposed API, the callers will perform the following steps.
> In map path:
> if (dma_can_use_iova(...))
> dma_iova_alloc()
> for (page in range)
> dma_iova_link_next(...)
> dma_iova_sync(...)
> else
> /* Fallback to legacy map pages */
> for (all pages)
> dma_map_page(...)
>
> In unmap path:
> if (dma_can_use_iova(...))
> dma_iova_destroy()
> else
> for (all pages)
> dma_unmap_page(...)
>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Tested-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
> drivers/iommu/dma-iommu.c | 261 ++++++++++++++++++++++++++++++++++++
> include/linux/dma-mapping.h | 32 +++++
> 2 files changed, 293 insertions(+)
>
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index d2c298083e0a..2e014db5a244 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1818,6 +1818,267 @@ void dma_iova_free(struct device *dev, struct dma_iova_state *state)
> }
> EXPORT_SYMBOL_GPL(dma_iova_free);
>
> +static int __dma_iova_link(struct device *dev, dma_addr_t addr,
> + phys_addr_t phys, size_t size, enum dma_data_direction dir,
> + unsigned long attrs)
> +{
> + bool coherent = dev_is_dma_coherent(dev);
> +
> + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> + arch_sync_dma_for_device(phys, size, dir);
So arch_sync_dma_for_device() is a no-op on some architectures, notably x86.
So since you're doing this work and given the above pattern is common on
the non iova case, we could save ourselves 2 branches checks on x86 on
__dma_iova_link() and also generalize savings for the non-iova case as
well. For the non-iova case we have two use cases, one with the attrs on
initial mapping, and one without on subsequent sync ops. For the iova
case the attr is always consistently used.
So we could just have something like this:
#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE
static inline void arch_sync_dma_device(struct device *dev,
phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, size, dir);
}
static inline void arch_sync_dma_device_attrs(struct device *dev,
phys_addr_t paddr, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
arch_sync_dma_for_device(paddr, size, dir);
}
#else
static inline void arch_sync_dma_device(struct device *dev,
phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
{
}
static inline void arch_sync_dma_device_attrs(struct device *dev,
phys_addr_t paddr, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
}
#endif
> +/**
> + * dma_iova_link - Link a range of IOVA space
> + * @dev: DMA device
> + * @state: IOVA state
> + * @phys: physical address to link
> + * @offset: offset into the IOVA state to map into
> + * @size: size of the buffer
> + * @dir: DMA direction
> + * @attrs: attributes of mapping properties
> + *
> + * Link a range of IOVA space for the given IOVA state without IOTLB sync.
> + * This function is used to link multiple physical addresses in contiguous
> + * IOVA space without performing costly IOTLB sync.
> + *
> + * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
> + * the end of linkage.
> + */
> +int dma_iova_link(struct device *dev, struct dma_iova_state *state,
> + phys_addr_t phys, size_t offset, size_t size,
> + enum dma_data_direction dir, unsigned long attrs)
> +{
> + struct iommu_domain *domain = iommu_get_dma_domain(dev);
> + struct iommu_dma_cookie *cookie = domain->iova_cookie;
> + struct iova_domain *iovad = &cookie->iovad;
> + size_t iova_start_pad = iova_offset(iovad, phys);
> +
> + if (WARN_ON_ONCE(iova_start_pad && offset > 0))
> + return -EIO;
> +
> + if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size))
There is already a similar check for the non-iova case for this on
iommu_dma_map_page() and a nice comment about what why this checked,
this seems to be just screaming for a helper:
/*
* Checks if a physical buffer has unaligned boundaries with respect to
* the IOMMU granule. Returns non-zero if either the start or end
* address is not aligned to the granule boundary.
*/
static inline size_t iova_unaligned(struct iova_domain *iovad,
phys_addr_t phys,
size_t size)
{
return iova_offset(iovad, phys | size);
}
Other than that, looks good.
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Luis
On Sat, Apr 26, 2025 at 03:46:30PM -0700, Luis Chamberlain wrote:
> On Wed, Apr 23, 2025 at 11:12:58AM +0300, Leon Romanovsky wrote:
> > From: Leon Romanovsky <leonro@nvidia.com>
> >
> > Introduce new DMA APIs to perform DMA linkage of buffers
> > in layers higher than DMA.
> >
> > In proposed API, the callers will perform the following steps.
> > In map path:
> > if (dma_can_use_iova(...))
> > dma_iova_alloc()
> > for (page in range)
> > dma_iova_link_next(...)
> > dma_iova_sync(...)
> > else
> > /* Fallback to legacy map pages */
> > for (all pages)
> > dma_map_page(...)
> >
> > In unmap path:
> > if (dma_can_use_iova(...))
> > dma_iova_destroy()
> > else
> > for (all pages)
> > dma_unmap_page(...)
> >
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Tested-by: Jens Axboe <axboe@kernel.dk>
> > Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> > ---
> > drivers/iommu/dma-iommu.c | 261 ++++++++++++++++++++++++++++++++++++
> > include/linux/dma-mapping.h | 32 +++++
> > 2 files changed, 293 insertions(+)
> >
> > diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> > index d2c298083e0a..2e014db5a244 100644
> > --- a/drivers/iommu/dma-iommu.c
> > +++ b/drivers/iommu/dma-iommu.c
> > @@ -1818,6 +1818,267 @@ void dma_iova_free(struct device *dev, struct dma_iova_state *state)
> > }
> > EXPORT_SYMBOL_GPL(dma_iova_free);
> >
> > +static int __dma_iova_link(struct device *dev, dma_addr_t addr,
> > + phys_addr_t phys, size_t size, enum dma_data_direction dir,
> > + unsigned long attrs)
> > +{
> > + bool coherent = dev_is_dma_coherent(dev);
> > +
> > + if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> > + arch_sync_dma_for_device(phys, size, dir);
>
> So arch_sync_dma_for_device() is a no-op on some architectures, notably x86.
> So since you're doing this work and given the above pattern is common on
> the non iova case, we could save ourselves 2 branches checks on x86 on
> __dma_iova_link() and also generalize savings for the non-iova case as
> well. For the non-iova case we have two use cases, one with the attrs on
> initial mapping, and one without on subsequent sync ops. For the iova
> case the attr is always consistently used.
I want to believe that compiler will discards these "if (!coherent &&
!(attrs & DMA_ATTR_SKIP_CPU_SYNC)))" branch if case is empty.
>
> So we could just have something like this:
>
> #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE
> static inline void arch_sync_dma_device(struct device *dev,
> phys_addr_t paddr, size_t size,
> enum dma_data_direction dir)
> {
> if (!dev_is_dma_coherent(dev))
> arch_sync_dma_for_device(paddr, size, dir);
> }
>
> static inline void arch_sync_dma_device_attrs(struct device *dev,
> phys_addr_t paddr, size_t size,
> enum dma_data_direction dir,
> unsigned long attrs)
> {
> if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
> arch_sync_dma_for_device(paddr, size, dir);
> }
> #else
> static inline void arch_sync_dma_device(struct device *dev,
> phys_addr_t paddr, size_t size,
> enum dma_data_direction dir)
> {
> }
>
> static inline void arch_sync_dma_device_attrs(struct device *dev,
> phys_addr_t paddr, size_t size,
> enum dma_data_direction dir,
> unsigned long attrs)
> {
> }
> #endif
The problem is that dev_is_dma_coherent() and DMA_ATTR_SKIP_CPU_SYNC
checks are scattered over all dma-iommu.c file with different
combinations. While we can do new static functions for small number of
use cases, it will be half-solution.
>
> > +/**
> > + * dma_iova_link - Link a range of IOVA space
> > + * @dev: DMA device
> > + * @state: IOVA state
> > + * @phys: physical address to link
> > + * @offset: offset into the IOVA state to map into
> > + * @size: size of the buffer
> > + * @dir: DMA direction
> > + * @attrs: attributes of mapping properties
> > + *
> > + * Link a range of IOVA space for the given IOVA state without IOTLB sync.
> > + * This function is used to link multiple physical addresses in contiguous
> > + * IOVA space without performing costly IOTLB sync.
> > + *
> > + * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
> > + * the end of linkage.
> > + */
> > +int dma_iova_link(struct device *dev, struct dma_iova_state *state,
> > + phys_addr_t phys, size_t offset, size_t size,
> > + enum dma_data_direction dir, unsigned long attrs)
> > +{
> > + struct iommu_domain *domain = iommu_get_dma_domain(dev);
> > + struct iommu_dma_cookie *cookie = domain->iova_cookie;
> > + struct iova_domain *iovad = &cookie->iovad;
> > + size_t iova_start_pad = iova_offset(iovad, phys);
> > +
> > + if (WARN_ON_ONCE(iova_start_pad && offset > 0))
> > + return -EIO;
> > +
> > + if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size))
>
> There is already a similar check for the non-iova case for this on
> iommu_dma_map_page() and a nice comment about what why this checked,
> this seems to be just screaming for a helper:
>
> /*
> * Checks if a physical buffer has unaligned boundaries with respect to
> * the IOMMU granule. Returns non-zero if either the start or end
> * address is not aligned to the granule boundary.
> */
> static inline size_t iova_unaligned(struct iova_domain *iovad,
> phys_addr_t phys,
> size_t size)
> {
> return iova_offset(iovad, phys | size);
> }
I added this function, thanks.
> Other than that, looks good.
>
> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
>
> Luis
On Sun, Apr 27, 2025 at 11:13:12AM +0300, Leon Romanovsky wrote: > > So arch_sync_dma_for_device() is a no-op on some architectures, notably x86. > > So since you're doing this work and given the above pattern is common on > > the non iova case, we could save ourselves 2 branches checks on x86 on > > __dma_iova_link() and also generalize savings for the non-iova case as > > well. For the non-iova case we have two use cases, one with the attrs on > > initial mapping, and one without on subsequent sync ops. For the iova > > case the attr is always consistently used. > > I want to believe that compiler will discards these "if (!coherent && > !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))" branch if case is empty. Yes, it is the poster child for dead code elimination using the IS_ENABLED() helper. > checks are scattered over all dma-iommu.c file with different > combinations. While we can do new static functions for small number of > use cases, it will be half-solution. Don't bother.
On Sun, Apr 27, 2025 at 11:13:12AM +0300, Leon Romanovsky wrote: > > So arch_sync_dma_for_device() is a no-op on some architectures, notably x86. > > So since you're doing this work and given the above pattern is common on > > the non iova case, we could save ourselves 2 branches checks on x86 on > > __dma_iova_link() and also generalize savings for the non-iova case as > > well. For the non-iova case we have two use cases, one with the attrs on > > initial mapping, and one without on subsequent sync ops. For the iova > > case the attr is always consistently used. > > I want to believe that compiler will discards these "if (!coherent && > !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))" branch if case is empty. Yeah, I'm pretty sure it will Jason
© 2016 - 2025 Red Hat, Inc.