[net-next v3 02/12] net: tso: Add tso_dma_map helpers

Joe Damato posted 12 patches 2 weeks, 4 days ago
There is a newer version of this series
[net-next v3 02/12] net: tso: Add tso_dma_map helpers
Posted by Joe Damato 2 weeks, 4 days ago
Adds skb_frag_phys() to skbuff.h, returning the physical address
of a paged fragment's data, which is used by the tso_dma_map helpers
introduced in this commit described below:

tso_dma_map_init(): DMA-maps the linear payload region and all frags
upfront. Prefers the DMA IOVA API for a single contiguous mapping with
one IOTLB sync; falls back to per-region dma_map_phys() otherwise.
Returns 0 on success, cleans up partial mappings on failure.

tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths.

tso_dma_map_count(): counts how many descriptors the next N bytes of
payload will need. Returns 1 if IOVA is used since the mapping is
contiguous.

tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair.
On the IOVA path, each segment is a single contiguous chunk. On the
fallback path, indicates when a chunk starts a new DMA mapping so the
driver can set dma_unmap_len on that descriptor for completion-time
unmapping.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Joe Damato <joe@dama.to>
---
 v3:
   - Added skb_frag_phys helper include/linux/skbuff.h.
   - Added tso_dma_map_use_iova() inline helper in tso.h.
   - Updated the helpers to use the DMA IOVA API and falls back to per-region
     mapping instead.

 include/linux/skbuff.h |  11 ++
 include/net/tso.h      |  21 ++++
 net/core/tso.c         | 274 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 306 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9cc98f850f1d..d8630eb366c5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3758,6 +3758,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
 	return ptr + skb_frag_off(frag);
 }
 
+/**
+ * skb_frag_phys - gets the physical address of the data in a paged fragment
+ * @frag: the paged fragment buffer
+ *
+ * Returns: the physical address of the data within @frag.
+ */
+static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag)
+{
+	return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag);
+}
+
 /**
  * skb_frag_page_copy() - sets the page in a fragment from another fragment
  * @fragto: skb fragment where page is set
diff --git a/include/net/tso.h b/include/net/tso.h
index 8f8d9d74e873..919058b4c9a1 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -68,4 +68,25 @@ struct tso_dma_map {
 	} frags[MAX_SKB_FRAGS];
 };
 
+int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
+		     const struct sk_buff *skb, unsigned int hdr_len);
+void tso_dma_map_cleanup(struct tso_dma_map *map);
+unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len);
+bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
+		      unsigned int *chunk_len, unsigned int *mapping_len,
+		      unsigned int seg_remaining);
+
+/**
+ * tso_dma_map_use_iova - check if this map used the DMA IOVA path
+ * @map: the map to check
+ *
+ * Returns true if the IOVA API was used for this mapping. When true,
+ * the driver must call tso_dma_map_cleanup() at completion time instead
+ * of doing per-region DMA unmaps.
+ */
+static inline bool tso_dma_map_use_iova(struct tso_dma_map *map)
+{
+	return dma_use_iova(&map->iova_state);
+}
+
 #endif	/* _TSO_H */
diff --git a/net/core/tso.c b/net/core/tso.c
index 6df997b9076e..731d5a5be1f8 100644
--- a/net/core/tso.c
+++ b/net/core/tso.c
@@ -3,6 +3,7 @@
 #include <linux/if_vlan.h>
 #include <net/ip.h>
 #include <net/tso.h>
+#include <linux/dma-mapping.h>
 #include <linux/unaligned.h>
 
 void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
@@ -87,3 +88,275 @@ int tso_start(struct sk_buff *skb, struct tso_t *tso)
 	return hdr_len;
 }
 EXPORT_SYMBOL(tso_start);
+
+static int tso_dma_iova_try(struct device *dev, struct tso_dma_map *map,
+			    phys_addr_t phys, size_t linear_len, size_t total_len,
+			    size_t *offset)
+{
+	const struct sk_buff *skb;
+	unsigned int nr_frags;
+	int i;
+
+	if (!dma_iova_try_alloc(dev, &map->iova_state, phys, total_len))
+		return 1;
+
+	skb = map->skb;
+	nr_frags = skb_shinfo(skb)->nr_frags;
+
+	if (linear_len) {
+		if (dma_iova_link(dev, &map->iova_state,
+				  phys, *offset, linear_len,
+				  DMA_TO_DEVICE, 0))
+			goto iova_fail;
+		map->linear_len = linear_len;
+		*offset += linear_len;
+	}
+
+	for (i = 0; i < nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		unsigned int frag_len = skb_frag_size(frag);
+
+		if (dma_iova_link(dev, &map->iova_state,
+				  skb_frag_phys(frag), *offset,
+				  frag_len, DMA_TO_DEVICE, 0)) {
+			map->nr_frags = i;
+			goto iova_fail;
+		}
+		map->frags[i].len = frag_len;
+		*offset += frag_len;
+		map->nr_frags = i + 1;
+	}
+
+	if (dma_iova_sync(dev, &map->iova_state, 0, total_len))
+		goto iova_fail;
+
+	return 0;
+
+iova_fail:
+	dma_iova_destroy(dev, &map->iova_state, *offset,
+			 DMA_TO_DEVICE, 0);
+	memset(&map->iova_state, 0, sizeof(map->iova_state));
+
+	/* reset map state */
+	map->frag_idx = -1;
+	map->offset = 0;
+	map->linear_len = 0;
+	map->nr_frags = 0;
+
+	return 1;
+}
+
+/**
+ * tso_dma_map_init - DMA-map GSO payload regions
+ * @map: map struct to initialize
+ * @dev: device for DMA mapping
+ * @skb: the GSO skb
+ * @hdr_len: per-segment header length in bytes
+ *
+ * DMA-maps the linear payload (after headers) and all frags.
+ * Prefers the DMA IOVA API (one contiguous mapping, one IOTLB sync);
+ * falls back to per-region dma_map_phys() when IOVA is not available.
+ * Positions the iterator at byte 0 of the payload.
+ *
+ * Returns 0 on success, -ENOMEM on DMA mapping failure (partial mappings
+ * are cleaned up internally).
+ */
+int tso_dma_map_init(struct tso_dma_map *map, struct device *dev,
+		     const struct sk_buff *skb, unsigned int hdr_len)
+{
+	unsigned int linear_len = skb_headlen(skb) - hdr_len;
+	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	size_t total_len = skb->len - hdr_len;
+	size_t offset = 0;
+	phys_addr_t phys;
+	int i;
+
+	if (!total_len)
+		return 0;
+
+	map->dev = dev;
+	map->skb = skb;
+	map->hdr_len = hdr_len;
+	map->frag_idx = -1;
+	map->offset = 0;
+	map->iova_offset = 0;
+	map->total_len = total_len;
+	map->linear_len = 0;
+	map->nr_frags = 0;
+	memset(&map->iova_state, 0, sizeof(map->iova_state));
+
+	if (linear_len)
+		phys = virt_to_phys(skb->data + hdr_len);
+	else
+		phys = skb_frag_phys(&skb_shinfo(skb)->frags[0]);
+
+	if (tso_dma_iova_try(dev, map, phys, linear_len, total_len, &offset)) {
+		/* IOVA path failed, map state was reset. Fallback to
+		 * per-region dma_map_phys()
+		 */
+		if (linear_len) {
+			map->linear_dma = dma_map_phys(dev, phys, linear_len,
+						       DMA_TO_DEVICE, 0);
+			if (dma_mapping_error(dev, map->linear_dma))
+				return -ENOMEM;
+			map->linear_len = linear_len;
+		}
+
+		for (i = 0; i < nr_frags; i++) {
+			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+			unsigned int frag_len = skb_frag_size(frag);
+
+			map->frags[i].len = frag_len;
+			map->frags[i].dma = dma_map_phys(dev, skb_frag_phys(frag),
+							 frag_len, DMA_TO_DEVICE, 0);
+			if (dma_mapping_error(dev, map->frags[i].dma)) {
+				tso_dma_map_cleanup(map);
+				return -ENOMEM;
+			}
+			map->nr_frags = i + 1;
+		}
+	}
+
+	if (linear_len == 0 && nr_frags > 0)
+		map->frag_idx = 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(tso_dma_map_init);
+
+/**
+ * tso_dma_map_cleanup - unmap all DMA regions in a tso_dma_map
+ * @map: the map to clean up
+ *
+ * Handles both IOVA and fallback paths. For IOVA, calls
+ * dma_iova_destroy(). For fallback, unmaps each region individually.
+ */
+void tso_dma_map_cleanup(struct tso_dma_map *map)
+{
+	int i;
+
+	if (dma_use_iova(&map->iova_state)) {
+		dma_iova_destroy(map->dev, &map->iova_state, map->total_len,
+				 DMA_TO_DEVICE, 0);
+		memset(&map->iova_state, 0, sizeof(map->iova_state));
+		map->linear_len = 0;
+		map->nr_frags = 0;
+		return;
+	}
+
+	if (map->linear_len)
+		dma_unmap_phys(map->dev, map->linear_dma, map->linear_len,
+			       DMA_TO_DEVICE, 0);
+
+	for (i = 0; i < map->nr_frags; i++)
+		dma_unmap_phys(map->dev, map->frags[i].dma, map->frags[i].len,
+			       DMA_TO_DEVICE, 0);
+
+	map->linear_len = 0;
+	map->nr_frags = 0;
+}
+EXPORT_SYMBOL(tso_dma_map_cleanup);
+
+/**
+ * tso_dma_map_count - count descriptors for a payload range
+ * @map: the payload map
+ * @len: number of payload bytes in this segment
+ *
+ * Counts how many contiguous DMA region chunks the next @len bytes
+ * will span, without advancing the iterator. On the IOVA path this
+ * is always 1 (contiguous). On the fallback path, uses region sizes
+ * from the current position.
+ *
+ * Returns the number of descriptors needed for @len bytes of payload.
+ */
+unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len)
+{
+	unsigned int offset = map->offset;
+	int idx = map->frag_idx;
+	unsigned int count = 0;
+
+	if (!len)
+		return 0;
+
+	if (dma_use_iova(&map->iova_state))
+		return 1;
+
+	while (len > 0) {
+		unsigned int region_len, chunk;
+
+		if (idx == -1)
+			region_len = map->linear_len;
+		else
+			region_len = map->frags[idx].len;
+
+		chunk = min(len, region_len - offset);
+		len -= chunk;
+		count++;
+		offset = 0;
+		idx++;
+	}
+
+	return count;
+}
+EXPORT_SYMBOL(tso_dma_map_count);
+
+/**
+ * tso_dma_map_next - yield the next DMA address range
+ * @map: the payload map
+ * @addr: output DMA address
+ * @chunk_len: output chunk length
+ * @mapping_len: full DMA mapping length when this chunk starts a new
+ *               mapping region, or 0 when continuing a previous one.
+ *               On the IOVA path this is always 0 (driver must not
+ *               do per-region unmaps; use tso_dma_map_cleanup instead).
+ * @seg_remaining: bytes left in current segment
+ *
+ * Yields the next (dma_addr, chunk_len) pair and advances the iterator.
+ * On the IOVA path, the entire payload is contiguous so each segment
+ * is always a single chunk.
+ *
+ * Returns true if a chunk was yielded, false when @seg_remaining is 0.
+ */
+bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr,
+		      unsigned int *chunk_len, unsigned int *mapping_len,
+		      unsigned int seg_remaining)
+{
+	unsigned int region_len, chunk;
+
+	if (!seg_remaining)
+		return false;
+
+	/* IOVA path: contiguous DMA range, no region boundaries */
+	if (dma_use_iova(&map->iova_state)) {
+		*addr = map->iova_state.addr + map->iova_offset;
+		*chunk_len = seg_remaining;
+		*mapping_len = 0;
+		map->iova_offset += seg_remaining;
+		return true;
+	}
+
+	/* Fallback path: per-region iteration */
+
+	if (map->frag_idx == -1) {
+		region_len = map->linear_len;
+		chunk = min(seg_remaining, region_len - map->offset);
+		*addr = map->linear_dma + map->offset;
+		*mapping_len = (map->offset == 0) ? region_len : 0;
+	} else {
+		region_len = map->frags[map->frag_idx].len;
+		chunk = min(seg_remaining, region_len - map->offset);
+		*addr = map->frags[map->frag_idx].dma + map->offset;
+		*mapping_len = (map->offset == 0) ? region_len : 0;
+	}
+
+	*chunk_len = chunk;
+	map->offset += chunk;
+
+	if (map->offset >= region_len) {
+		map->frag_idx++;
+		map->offset = 0;
+	}
+
+	return true;
+}
+EXPORT_SYMBOL(tso_dma_map_next);
-- 
2.52.0
Re: [net-next v3 02/12] net: tso: Add tso_dma_map helpers
Posted by Leon Romanovsky 2 weeks, 4 days ago
On Wed, Mar 18, 2026 at 12:13:07PM -0700, Joe Damato wrote:
> Adds skb_frag_phys() to skbuff.h, returning the physical address
> of a paged fragment's data, which is used by the tso_dma_map helpers
> introduced in this commit described below:
> 
> tso_dma_map_init(): DMA-maps the linear payload region and all frags
> upfront. Prefers the DMA IOVA API for a single contiguous mapping with
> one IOTLB sync; falls back to per-region dma_map_phys() otherwise.
> Returns 0 on success, cleans up partial mappings on failure.
> 
> tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths.
> 
> tso_dma_map_count(): counts how many descriptors the next N bytes of
> payload will need. Returns 1 if IOVA is used since the mapping is
> contiguous.
> 
> tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair.
> On the IOVA path, each segment is a single contiguous chunk. On the
> fallback path, indicates when a chunk starts a new DMA mapping so the
> driver can set dma_unmap_len on that descriptor for completion-time
> unmapping.
> 
> Suggested-by: Jakub Kicinski <kuba@kernel.org>
> Signed-off-by: Joe Damato <joe@dama.to>
> ---
>  v3:
>    - Added skb_frag_phys helper include/linux/skbuff.h.
>    - Added tso_dma_map_use_iova() inline helper in tso.h.
>    - Updated the helpers to use the DMA IOVA API and falls back to per-region
>      mapping instead.
> 
>  include/linux/skbuff.h |  11 ++
>  include/net/tso.h      |  21 ++++
>  net/core/tso.c         | 274 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 306 insertions(+)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 9cc98f850f1d..d8630eb366c5 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -3758,6 +3758,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
>  	return ptr + skb_frag_off(frag);
>  }
>  
> +/**
> + * skb_frag_phys - gets the physical address of the data in a paged fragment
> + * @frag: the paged fragment buffer
> + *
> + * Returns: the physical address of the data within @frag.
> + */
> +static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag)
> +{
> +	return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag);
> +}

I skimmed through the patch and it looks generally correct to me. The one
thing that disappointed me is this function. It's unfortunate that you
have to implement it this way and cannot rely on phys_addr_t directly.

Thanks
Re: [net-next v3 02/12] net: tso: Add tso_dma_map helpers
Posted by Joe Damato 2 weeks, 3 days ago
On Thu, Mar 19, 2026 at 09:39:59AM +0200, Leon Romanovsky wrote:
> On Wed, Mar 18, 2026 at 12:13:07PM -0700, Joe Damato wrote:
> > Adds skb_frag_phys() to skbuff.h, returning the physical address
> > of a paged fragment's data, which is used by the tso_dma_map helpers
> > introduced in this commit described below:
> > 
> > tso_dma_map_init(): DMA-maps the linear payload region and all frags
> > upfront. Prefers the DMA IOVA API for a single contiguous mapping with
> > one IOTLB sync; falls back to per-region dma_map_phys() otherwise.
> > Returns 0 on success, cleans up partial mappings on failure.
> > 
> > tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths.
> > 
> > tso_dma_map_count(): counts how many descriptors the next N bytes of
> > payload will need. Returns 1 if IOVA is used since the mapping is
> > contiguous.
> > 
> > tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair.
> > On the IOVA path, each segment is a single contiguous chunk. On the
> > fallback path, indicates when a chunk starts a new DMA mapping so the
> > driver can set dma_unmap_len on that descriptor for completion-time
> > unmapping.
> > 
> > Suggested-by: Jakub Kicinski <kuba@kernel.org>
> > Signed-off-by: Joe Damato <joe@dama.to>
> > ---
> >  v3:
> >    - Added skb_frag_phys helper include/linux/skbuff.h.
> >    - Added tso_dma_map_use_iova() inline helper in tso.h.
> >    - Updated the helpers to use the DMA IOVA API and falls back to per-region
> >      mapping instead.
> > 
> >  include/linux/skbuff.h |  11 ++
> >  include/net/tso.h      |  21 ++++
> >  net/core/tso.c         | 274 +++++++++++++++++++++++++++++++++++++++++
> >  3 files changed, 306 insertions(+)
> > 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 9cc98f850f1d..d8630eb366c5 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -3758,6 +3758,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
> >  	return ptr + skb_frag_off(frag);
> >  }
> >  
> > +/**
> > + * skb_frag_phys - gets the physical address of the data in a paged fragment
> > + * @frag: the paged fragment buffer
> > + *
> > + * Returns: the physical address of the data within @frag.
> > + */
> > +static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag)
> > +{
> > +	return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag);
> > +}
> 
> I skimmed through the patch and it looks generally correct to me. The one
> thing that disappointed me is this function. It's unfortunate that you
> have to implement it this way and cannot rely on phys_addr_t directly.

Thanks for taking a look.

Would you prefer if I modified this and created a netmem_to_phys() helper
(page-only for now, but extensible later?) and use that instead?

Or is this patch acceptable as-is for this series with the understanding that
it only handles page-backed frags?

Just want to make sure I'm following what you mean.

Thanks.
Re: [net-next v3 02/12] net: tso: Add tso_dma_map helpers
Posted by Leon Romanovsky 2 weeks, 3 days ago
On Thu, Mar 19, 2026 at 10:09:06AM -0700, Joe Damato wrote:
> On Thu, Mar 19, 2026 at 09:39:59AM +0200, Leon Romanovsky wrote:
> > On Wed, Mar 18, 2026 at 12:13:07PM -0700, Joe Damato wrote:
> > > Adds skb_frag_phys() to skbuff.h, returning the physical address
> > > of a paged fragment's data, which is used by the tso_dma_map helpers
> > > introduced in this commit described below:
> > > 
> > > tso_dma_map_init(): DMA-maps the linear payload region and all frags
> > > upfront. Prefers the DMA IOVA API for a single contiguous mapping with
> > > one IOTLB sync; falls back to per-region dma_map_phys() otherwise.
> > > Returns 0 on success, cleans up partial mappings on failure.
> > > 
> > > tso_dma_map_cleanup(): Handles both IOVA and fallback teardown paths.
> > > 
> > > tso_dma_map_count(): counts how many descriptors the next N bytes of
> > > payload will need. Returns 1 if IOVA is used since the mapping is
> > > contiguous.
> > > 
> > > tso_dma_map_next(): yields the next (dma_addr, chunk_len) pair.
> > > On the IOVA path, each segment is a single contiguous chunk. On the
> > > fallback path, indicates when a chunk starts a new DMA mapping so the
> > > driver can set dma_unmap_len on that descriptor for completion-time
> > > unmapping.
> > > 
> > > Suggested-by: Jakub Kicinski <kuba@kernel.org>
> > > Signed-off-by: Joe Damato <joe@dama.to>
> > > ---
> > >  v3:
> > >    - Added skb_frag_phys helper include/linux/skbuff.h.
> > >    - Added tso_dma_map_use_iova() inline helper in tso.h.
> > >    - Updated the helpers to use the DMA IOVA API and falls back to per-region
> > >      mapping instead.
> > > 
> > >  include/linux/skbuff.h |  11 ++
> > >  include/net/tso.h      |  21 ++++
> > >  net/core/tso.c         | 274 +++++++++++++++++++++++++++++++++++++++++
> > >  3 files changed, 306 insertions(+)
> > > 
> > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > > index 9cc98f850f1d..d8630eb366c5 100644
> > > --- a/include/linux/skbuff.h
> > > +++ b/include/linux/skbuff.h
> > > @@ -3758,6 +3758,17 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
> > >  	return ptr + skb_frag_off(frag);
> > >  }
> > >  
> > > +/**
> > > + * skb_frag_phys - gets the physical address of the data in a paged fragment
> > > + * @frag: the paged fragment buffer
> > > + *
> > > + * Returns: the physical address of the data within @frag.
> > > + */
> > > +static inline phys_addr_t skb_frag_phys(const skb_frag_t *frag)
> > > +{
> > > +	return page_to_phys(skb_frag_page(frag)) + skb_frag_off(frag);
> > > +}
> > 
> > I skimmed through the patch and it looks generally correct to me. The one
> > thing that disappointed me is this function. It's unfortunate that you
> > have to implement it this way and cannot rely on phys_addr_t directly.
> 
> Thanks for taking a look.
> 
> Would you prefer if I modified this and created a netmem_to_phys() helper
> (page-only for now, but extensible later?) and use that instead?
> 
> Or is this patch acceptable as-is for this series with the understanding that
> it only handles page-backed frags?
> 
> Just want to make sure I'm following what you mean.

It is acceptable as is. I'm just expressing my view about performing so
much translations.

Thanks

> 
> Thanks.
>