From: Leon Romanovsky <leonro@nvidia.com>
Introduce new DMA APIs to perform DMA linkage of buffers
in layers higher than DMA.
In proposed API, the callers will perform the following steps.
In map path:
if (dma_can_use_iova(...))
dma_iova_alloc()
for (page in range)
dma_iova_link_next(...)
dma_iova_sync(...)
else
/* Fallback to legacy map pages */
for (all pages)
dma_map_page(...)
In unmap path:
if (dma_can_use_iova(...))
dma_iova_destroy()
else
for (all pages)
dma_unmap_page(...)
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/iommu/dma-iommu.c | 259 ++++++++++++++++++++++++++++++++++++
include/linux/dma-mapping.h | 32 +++++
2 files changed, 291 insertions(+)
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index e1eaad500d27..4a504a879cc0 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1834,6 +1834,265 @@ void dma_iova_free(struct device *dev, struct dma_iova_state *state)
}
EXPORT_SYMBOL_GPL(dma_iova_free);
+static int __dma_iova_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ bool coherent = dev_is_dma_coherent(dev);
+
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+
+ return iommu_map_nosync(iommu_get_dma_domain(dev), addr, phys, size,
+ dma_info_to_prot(dir, coherent, attrs), GFP_ATOMIC);
+}
+
+static int iommu_dma_iova_bounce_and_link(struct device *dev, dma_addr_t addr,
+ phys_addr_t phys, size_t bounce_len,
+ enum dma_data_direction dir, unsigned long attrs,
+ size_t iova_start_pad)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iova_domain *iovad = &domain->iova_cookie->iovad;
+ phys_addr_t bounce_phys;
+ int error;
+
+ bounce_phys = iommu_dma_map_swiotlb(dev, phys, bounce_len, dir, attrs);
+ if (bounce_phys == DMA_MAPPING_ERROR)
+ return -ENOMEM;
+
+ error = __dma_iova_link(dev, addr - iova_start_pad,
+ bounce_phys - iova_start_pad,
+ iova_align(iovad, bounce_len), dir, attrs);
+ if (error)
+ swiotlb_tbl_unmap_single(dev, bounce_phys, bounce_len, dir,
+ attrs);
+ return error;
+}
+
+static int iommu_dma_iova_link_swiotlb(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+ size_t iova_end_pad = iova_offset(iovad, phys + size);
+ dma_addr_t addr = state->addr + offset;
+ size_t mapped = 0;
+ int error;
+
+ if (iova_start_pad) {
+ size_t bounce_len = min(size, iovad->granule - iova_start_pad);
+
+ error = iommu_dma_iova_bounce_and_link(dev, addr, phys,
+ bounce_len, dir, attrs, iova_start_pad);
+ if (error)
+ return error;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+
+ mapped += bounce_len;
+ size -= bounce_len;
+ if (!size)
+ return 0;
+ }
+
+ size -= iova_end_pad;
+ error = __dma_iova_link(dev, addr + mapped, phys + mapped, size, dir,
+ attrs);
+ if (error)
+ goto out_unmap;
+ mapped += size;
+
+ if (iova_end_pad) {
+ error = iommu_dma_iova_bounce_and_link(dev, addr + mapped,
+ phys + mapped, iova_end_pad, dir, attrs, 0);
+ if (error)
+ goto out_unmap;
+ state->__size |= DMA_IOVA_USE_SWIOTLB;
+ }
+
+ return 0;
+
+out_unmap:
+ dma_iova_unlink(dev, state, 0, mapped, dir, attrs);
+ return error;
+}
+
+/**
+ * dma_iova_link - Link a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @phys: physical address to link
+ * @offset: offset into the IOVA state to map into
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Link a range of IOVA space for the given IOVA state without IOTLB sync.
+ * This function is used to link multiple physical addresses in contigueous
+ * IOVA space without performing costly IOTLB sync.
+ *
+ * The caller is responsible to call to dma_iova_sync() to sync IOTLB at
+ * the end of linkage.
+ */
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, phys);
+
+ if (WARN_ON_ONCE(iova_start_pad && offset > 0))
+ return -EIO;
+
+ if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size))
+ return iommu_dma_iova_link_swiotlb(dev, state, phys, offset,
+ size, dir, attrs);
+
+ return __dma_iova_link(dev, state->addr + offset - iova_start_pad,
+ phys - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad), dir, attrs);
+}
+EXPORT_SYMBOL_GPL(dma_iova_link);
+
+/**
+ * dma_iova_sync - Sync IOTLB
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to sync
+ * @size: size of the buffer
+ *
+ * Sync IOTLB for the given IOVA state. This function should be called on
+ * the IOVA-contigous range created by one ore more dma_iova_link() calls
+ * to sync the IOTLB.
+ */
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+
+ return iommu_sync_map(domain, addr - iova_start_pad,
+ iova_align(iovad, size + iova_start_pad));
+}
+EXPORT_SYMBOL_GPL(dma_iova_sync);
+
+static void iommu_dma_iova_unlink_range_slow(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ dma_addr_t end = addr + size;
+
+ do {
+ phys_addr_t phys;
+ size_t len;
+
+ phys = iommu_iova_to_phys(domain, addr);
+ if (WARN_ON(!phys))
+ continue;
+ len = min_t(size_t,
+ end - addr, iovad->granule - iova_start_pad);
+
+ if (!dev_is_dma_coherent(dev) &&
+ !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_cpu(phys, len, dir);
+
+ swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs);
+
+ addr += len;
+ iova_start_pad = 0;
+ } while (addr < end);
+}
+
+static void __iommu_dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs,
+ bool free_iova)
+{
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
+ struct iommu_dma_cookie *cookie = domain->iova_cookie;
+ struct iova_domain *iovad = &cookie->iovad;
+ dma_addr_t addr = state->addr + offset;
+ size_t iova_start_pad = iova_offset(iovad, addr);
+ struct iommu_iotlb_gather iotlb_gather;
+ size_t unmapped;
+
+ if ((state->__size & DMA_IOVA_USE_SWIOTLB) ||
+ (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)))
+ iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs);
+
+ iommu_iotlb_gather_init(&iotlb_gather);
+ iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain);
+
+ size = iova_align(iovad, size + iova_start_pad);
+ addr -= iova_start_pad;
+ unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather);
+ WARN_ON(unmapped != size);
+
+ if (!iotlb_gather.queued)
+ iommu_iotlb_sync(domain, &iotlb_gather);
+ if (free_iova)
+ iommu_dma_free_iova(cookie, addr, size, &iotlb_gather);
+}
+
+/**
+ * dma_iova_unlink - Unlink a range of IOVA space
+ * @dev: DMA device
+ * @state: IOVA state
+ * @offset: offset into the IOVA state to unlink
+ * @size: size of the buffer
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink a range of IOVA space for the given IOVA state.
+ */
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ __iommu_dma_iova_unlink(dev, state, offset, size, dir, attrs, false);
+}
+EXPORT_SYMBOL_GPL(dma_iova_unlink);
+
+/**
+ * dma_iova_destroy - Finish a DMA mapping transaction
+ * @dev: DMA device
+ * @state: IOVA state
+ * @mapped_len: number of bytes to unmap
+ * @dir: DMA direction
+ * @attrs: attributes of mapping properties
+ *
+ * Unlink the IOVA range up to @mapped_len and free the entire IOVA space. The
+ * range of IOVA from dma_addr to @mapped_len must all be linked, and be the
+ * only linked IOVA in state.
+ */
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ if (mapped_len)
+ __iommu_dma_iova_unlink(dev, state, 0, mapped_len, dir, attrs,
+ true);
+ else
+ /*
+ * We can be here if first call to dma_iova_link() failed and
+ * there is nothing to unlink, so let's be more clear.
+ */
+ dma_iova_free(dev, state);
+}
+EXPORT_SYMBOL_GPL(dma_iova_destroy);
+
void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 817f11bce7bc..8074a3b5c807 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -313,6 +313,17 @@ static inline bool dma_use_iova(struct dma_iova_state *state)
bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state,
phys_addr_t phys, size_t size);
void dma_iova_free(struct device *dev, struct dma_iova_state *state);
+void dma_iova_destroy(struct device *dev, struct dma_iova_state *state,
+ size_t mapped_len, enum dma_data_direction dir,
+ unsigned long attrs);
+int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size);
+int dma_iova_link(struct device *dev, struct dma_iova_state *state,
+ phys_addr_t phys, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+void dma_iova_unlink(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
#else /* CONFIG_IOMMU_DMA */
static inline bool dma_use_iova(struct dma_iova_state *state)
{
@@ -327,6 +338,27 @@ static inline void dma_iova_free(struct device *dev,
struct dma_iova_state *state)
{
}
+static inline void dma_iova_destroy(struct device *dev,
+ struct dma_iova_state *state, size_t mapped_len,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
+static inline int dma_iova_sync(struct device *dev, struct dma_iova_state *state,
+ size_t offset, size_t size)
+{
+ return -EOPNOTSUPP;
+}
+static inline int dma_iova_link(struct device *dev,
+ struct dma_iova_state *state, phys_addr_t phys, size_t offset,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ return -EOPNOTSUPP;
+}
+static inline void dma_iova_unlink(struct device *dev,
+ struct dma_iova_state *state, size_t offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+}
#endif /* CONFIG_IOMMU_DMA */
#if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC)
--
2.47.0
On Sun, Nov 10, 2024 at 03:46:54PM +0200, Leon Romanovsky wrote: > From: Leon Romanovsky <leonro@nvidia.com> > > Introduce new DMA APIs to perform DMA linkage of buffers > in layers higher than DMA. > > In proposed API, the callers will perform the following steps. > In map path: > if (dma_can_use_iova(...)) > dma_iova_alloc() > for (page in range) > dma_iova_link_next(...) > dma_iova_sync(...) > else > /* Fallback to legacy map pages */ > for (all pages) > dma_map_page(...) > > In unmap path: > if (dma_can_use_iova(...)) > dma_iova_destroy() > else > for (all pages) > dma_unmap_page(...) > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > --- > drivers/iommu/dma-iommu.c | 259 ++++++++++++++++++++++++++++++++++++ > include/linux/dma-mapping.h | 32 +++++ > 2 files changed, 291 insertions(+) [...] > +/** > + * dma_iova_link - Link a range of IOVA space > + * @dev: DMA device > + * @state: IOVA state > + * @phys: physical address to link > + * @offset: offset into the IOVA state to map into > + * @size: size of the buffer > + * @dir: DMA direction > + * @attrs: attributes of mapping properties > + * > + * Link a range of IOVA space for the given IOVA state without IOTLB sync. > + * This function is used to link multiple physical addresses in contigueous > + * IOVA space without performing costly IOTLB sync. > + * > + * The caller is responsible to call to dma_iova_sync() to sync IOTLB at > + * the end of linkage. > + */ > +int dma_iova_link(struct device *dev, struct dma_iova_state *state, > + phys_addr_t phys, size_t offset, size_t size, > + enum dma_data_direction dir, unsigned long attrs) > +{ > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > + struct iova_domain *iovad = &cookie->iovad; > + size_t iova_start_pad = iova_offset(iovad, phys); > + > + if (WARN_ON_ONCE(iova_start_pad && offset > 0)) > + return -EIO; > + > + if (dev_use_swiotlb(dev, size, dir) && iova_offset(iovad, phys | size)) > + return iommu_dma_iova_link_swiotlb(dev, state, phys, offset, > + size, dir, attrs); > + > + return __dma_iova_link(dev, state->addr + offset - iova_start_pad, > + phys - iova_start_pad, > + iova_align(iovad, size + iova_start_pad), dir, attrs); > +} > +EXPORT_SYMBOL_GPL(dma_iova_link); > + > +/** > + * dma_iova_sync - Sync IOTLB > + * @dev: DMA device > + * @state: IOVA state > + * @offset: offset into the IOVA state to sync > + * @size: size of the buffer > + * > + * Sync IOTLB for the given IOVA state. This function should be called on > + * the IOVA-contigous range created by one ore more dma_iova_link() calls > + * to sync the IOTLB. > + */ > +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, > + size_t offset, size_t size) > +{ > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > + struct iova_domain *iovad = &cookie->iovad; > + dma_addr_t addr = state->addr + offset; > + size_t iova_start_pad = iova_offset(iovad, addr); > + > + return iommu_sync_map(domain, addr - iova_start_pad, > + iova_align(iovad, size + iova_start_pad)); > +} > +EXPORT_SYMBOL_GPL(dma_iova_sync); > + > +static void iommu_dma_iova_unlink_range_slow(struct device *dev, > + dma_addr_t addr, size_t size, enum dma_data_direction dir, > + unsigned long attrs) > +{ > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > + struct iova_domain *iovad = &cookie->iovad; > + size_t iova_start_pad = iova_offset(iovad, addr); > + dma_addr_t end = addr + size; > + > + do { > + phys_addr_t phys; > + size_t len; > + > + phys = iommu_iova_to_phys(domain, addr); > + if (WARN_ON(!phys)) > + continue; > + len = min_t(size_t, > + end - addr, iovad->granule - iova_start_pad); > + > + if (!dev_is_dma_coherent(dev) && > + !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) > + arch_sync_dma_for_cpu(phys, len, dir); > + > + swiotlb_tbl_unmap_single(dev, phys, len, dir, attrs); > + > + addr += len; > + iova_start_pad = 0; > + } while (addr < end); > +} > + > +static void __iommu_dma_iova_unlink(struct device *dev, > + struct dma_iova_state *state, size_t offset, size_t size, > + enum dma_data_direction dir, unsigned long attrs, > + bool free_iova) > +{ > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > + struct iova_domain *iovad = &cookie->iovad; > + dma_addr_t addr = state->addr + offset; > + size_t iova_start_pad = iova_offset(iovad, addr); > + struct iommu_iotlb_gather iotlb_gather; > + size_t unmapped; > + > + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || > + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) > + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); > + > + iommu_iotlb_gather_init(&iotlb_gather); > + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); > + > + size = iova_align(iovad, size + iova_start_pad); > + addr -= iova_start_pad; > + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); > + WARN_ON(unmapped != size); Does the new API require that the 'size' passed to dma_iova_unlink() exactly match the 'size' passed to the corresponding call to dma_iova_link()? I ask because the IOMMU page-table code is built around the assumption that partial unmap() operations never occur (i.e. operations which could require splitting a huge mapping). We just removed [1] that code from the Arm IO page-table implementations, so it would be good to avoid adding it back for this. Will [1] https://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git/commit/?h=arm/smmu&id=33729a5fc0caf7a97d20507acbeee6b012e7e519
On Mon, Nov 18, 2024 at 02:59:30PM +0000, Will Deacon wrote: > On Sun, Nov 10, 2024 at 03:46:54PM +0200, Leon Romanovsky wrote: > > From: Leon Romanovsky <leonro@nvidia.com> > > > > Introduce new DMA APIs to perform DMA linkage of buffers > > in layers higher than DMA. > > > > In proposed API, the callers will perform the following steps. > > In map path: > > if (dma_can_use_iova(...)) > > dma_iova_alloc() > > for (page in range) > > dma_iova_link_next(...) > > dma_iova_sync(...) > > else > > /* Fallback to legacy map pages */ > > for (all pages) > > dma_map_page(...) > > > > In unmap path: > > if (dma_can_use_iova(...)) > > dma_iova_destroy() > > else > > for (all pages) > > dma_unmap_page(...) > > > > Signed-off-by: Leon Romanovsky <leonro@nvidia.com> > > --- > > drivers/iommu/dma-iommu.c | 259 ++++++++++++++++++++++++++++++++++++ > > include/linux/dma-mapping.h | 32 +++++ > > 2 files changed, 291 insertions(+) > <...> > > +static void __iommu_dma_iova_unlink(struct device *dev, > > + struct dma_iova_state *state, size_t offset, size_t size, > > + enum dma_data_direction dir, unsigned long attrs, > > + bool free_iova) > > +{ > > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > > + struct iova_domain *iovad = &cookie->iovad; > > + dma_addr_t addr = state->addr + offset; > > + size_t iova_start_pad = iova_offset(iovad, addr); > > + struct iommu_iotlb_gather iotlb_gather; > > + size_t unmapped; > > + > > + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || > > + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) > > + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); > > + > > + iommu_iotlb_gather_init(&iotlb_gather); > > + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); > > + > > + size = iova_align(iovad, size + iova_start_pad); > > + addr -= iova_start_pad; > > + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); > > + WARN_ON(unmapped != size); > > Does the new API require that the 'size' passed to dma_iova_unlink() > exactly match the 'size' passed to the corresponding call to > dma_iova_link()? I ask because the IOMMU page-table code is built around > the assumption that partial unmap() operations never occur (i.e. > operations which could require splitting a huge mapping). We just > removed [1] that code from the Arm IO page-table implementations, so it > would be good to avoid adding it back for this. dma_iova_link/dma_iova_unlink() don't have any assumptions in addition to already existing for dma_map_sg/dma_unmap_sg(). In reality, it means that all calls to unlink will have same size as for link. Thanks > > Will > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/iommu/linux.git/commit/?h=arm/smmu&id=33729a5fc0caf7a97d20507acbeee6b012e7e519
On Mon, Nov 18, 2024 at 08:55:33PM +0200, Leon Romanovsky wrote: > On Mon, Nov 18, 2024 at 02:59:30PM +0000, Will Deacon wrote: > > On Sun, Nov 10, 2024 at 03:46:54PM +0200, Leon Romanovsky wrote: > > > +static void __iommu_dma_iova_unlink(struct device *dev, > > > + struct dma_iova_state *state, size_t offset, size_t size, > > > + enum dma_data_direction dir, unsigned long attrs, > > > + bool free_iova) > > > +{ > > > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > > > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > > > + struct iova_domain *iovad = &cookie->iovad; > > > + dma_addr_t addr = state->addr + offset; > > > + size_t iova_start_pad = iova_offset(iovad, addr); > > > + struct iommu_iotlb_gather iotlb_gather; > > > + size_t unmapped; > > > + > > > + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || > > > + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) > > > + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); > > > + > > > + iommu_iotlb_gather_init(&iotlb_gather); > > > + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); > > > + > > > + size = iova_align(iovad, size + iova_start_pad); > > > + addr -= iova_start_pad; > > > + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); > > > + WARN_ON(unmapped != size); > > > > Does the new API require that the 'size' passed to dma_iova_unlink() > > exactly match the 'size' passed to the corresponding call to > > dma_iova_link()? I ask because the IOMMU page-table code is built around > > the assumption that partial unmap() operations never occur (i.e. > > operations which could require splitting a huge mapping). We just > > removed [1] that code from the Arm IO page-table implementations, so it > > would be good to avoid adding it back for this. > > dma_iova_link/dma_iova_unlink() don't have any assumptions in addition > to already existing for dma_map_sg/dma_unmap_sg(). In reality, it means > that all calls to unlink will have same size as for link. Ok, great. Any chance you could call that out in the documentation patch, please? Will
On Tue, Nov 19, 2024 at 09:05:08AM +0000, Will Deacon wrote: > On Mon, Nov 18, 2024 at 08:55:33PM +0200, Leon Romanovsky wrote: > > On Mon, Nov 18, 2024 at 02:59:30PM +0000, Will Deacon wrote: > > > On Sun, Nov 10, 2024 at 03:46:54PM +0200, Leon Romanovsky wrote: > > > > +static void __iommu_dma_iova_unlink(struct device *dev, > > > > + struct dma_iova_state *state, size_t offset, size_t size, > > > > + enum dma_data_direction dir, unsigned long attrs, > > > > + bool free_iova) > > > > +{ > > > > + struct iommu_domain *domain = iommu_get_dma_domain(dev); > > > > + struct iommu_dma_cookie *cookie = domain->iova_cookie; > > > > + struct iova_domain *iovad = &cookie->iovad; > > > > + dma_addr_t addr = state->addr + offset; > > > > + size_t iova_start_pad = iova_offset(iovad, addr); > > > > + struct iommu_iotlb_gather iotlb_gather; > > > > + size_t unmapped; > > > > + > > > > + if ((state->__size & DMA_IOVA_USE_SWIOTLB) || > > > > + (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))) > > > > + iommu_dma_iova_unlink_range_slow(dev, addr, size, dir, attrs); > > > > + > > > > + iommu_iotlb_gather_init(&iotlb_gather); > > > > + iotlb_gather.queued = free_iova && READ_ONCE(cookie->fq_domain); > > > > + > > > > + size = iova_align(iovad, size + iova_start_pad); > > > > + addr -= iova_start_pad; > > > > + unmapped = iommu_unmap_fast(domain, addr, size, &iotlb_gather); > > > > + WARN_ON(unmapped != size); > > > > > > Does the new API require that the 'size' passed to dma_iova_unlink() > > > exactly match the 'size' passed to the corresponding call to > > > dma_iova_link()? I ask because the IOMMU page-table code is built around > > > the assumption that partial unmap() operations never occur (i.e. > > > operations which could require splitting a huge mapping). We just > > > removed [1] that code from the Arm IO page-table implementations, so it > > > would be good to avoid adding it back for this. > > > > dma_iova_link/dma_iova_unlink() don't have any assumptions in addition > > to already existing for dma_map_sg/dma_unmap_sg(). In reality, it means > > that all calls to unlink will have same size as for link. > > Ok, great. Any chance you could call that out in the documentation patch, > please? Can you suggest what should I add there, as it is not specific to new API, but general note applicable to all __iommu_unmap() callers? Thanks
On Tue, Nov 19, 2024 at 03:57:43PM +0200, Leon Romanovsky wrote: > > > dma_iova_link/dma_iova_unlink() don't have any assumptions in addition > > > to already existing for dma_map_sg/dma_unmap_sg(). In reality, it means > > > that all calls to unlink will have same size as for link. > > > > Ok, great. Any chance you could call that out in the documentation patch, > > please? > > Can you suggest what should I add there, as it is not specific to new > API, but general note applicable to all __iommu_unmap() callers? This is what I wrote: +/** + * iommu_unmap() - Remove mappings from a range of IOVA + * @domain: Domain to manipulate + * @iova: IO virtual address to start + * @size: Length of the range starting from @iova + * + * iommu_unmap() will remove a translation created by iommu_map(). It cannot + * subdivide a mapping created by iommu_map(), so it should be called with IOVA + * ranges that match what was passed to iommu_map(). The range can aggregate + * contiguous iommu_map() calls so long as no individual range is split. + * + * Returns: Number of bytes of IOVA unmapped. iova + res will be the point + * unmapping stopped. + */ Jason
© 2016 - 2024 Red Hat, Inc.