[net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support

Joe Damato posted 12 patches 6 days, 18 hours ago
There is a newer version of this series
[net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Joe Damato 6 days, 18 hours ago
Update __bnxt_tx_int and bnxt_free_one_tx_ring_skbs to handle SW GSO
segments:

- MID segments: adjust tx_pkts/tx_bytes accounting and skip skb free
  (the skb is shared across all segments and freed only once)

- LAST segments: if the DMA IOVA path was used, use dma_iova_destroy to
  tear down the contiguous mapping. On the fallback path, payload DMA
  unmapping is handled by the existing per-BD dma_unmap_len walk.

Both MID and LAST completions advance tx_inline_cons to release the
segment's inline header slot back to the ring.

is_sw_gso is initialized to zero, so the new code paths are not run.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Joe Damato <joe@dama.to>
---
 v5:
   - Added Pavan's Reviewed-by. No functional changes.

 v3:
   - completion paths updated to use DMA IOVA APIs to teardown mappings.

 rfcv2:
   - Update the shared header buffer consumer on TX completion.

 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 82 +++++++++++++++++--
 .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 19 ++++-
 2 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 9e3a74ce5b75..946608db92a0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -74,6 +74,8 @@
 #include "bnxt_debugfs.h"
 #include "bnxt_coredump.h"
 #include "bnxt_hwmon.h"
+#include "bnxt_gso.h"
+#include <net/tso.h>
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 #define BNXT_DEF_MSG_ENABLE	(NETIF_MSG_DRV | NETIF_MSG_HW | \
@@ -817,12 +819,13 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 	bool rc = false;
 
 	while (RING_TX(bp, cons) != hw_cons) {
-		struct bnxt_sw_tx_bd *tx_buf;
+		struct bnxt_sw_tx_bd *tx_buf, *head_buf;
 		struct sk_buff *skb;
 		bool is_ts_pkt;
 		int j, last;
 
 		tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)];
+		head_buf = tx_buf;
 		skb = tx_buf->skb;
 
 		if (unlikely(!skb)) {
@@ -869,6 +872,23 @@ static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
 							    DMA_TO_DEVICE, 0);
 			}
 		}
+
+		if (unlikely(head_buf->is_sw_gso)) {
+			txr->tx_inline_cons++;
+			if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
+				if (dma_use_iova(&head_buf->iova_state))
+					dma_iova_destroy(&pdev->dev,
+							 &head_buf->iova_state,
+							 head_buf->iova_total_len,
+							 DMA_TO_DEVICE, 0);
+			} else {
+				tx_pkts--;
+				tx_bytes -= skb->len;
+				skb = NULL;
+			}
+			head_buf->is_sw_gso = 0;
+		}
+
 		if (unlikely(is_ts_pkt)) {
 			if (BNXT_CHIP_P5(bp)) {
 				/* PTP worker takes ownership of the skb */
@@ -3420,6 +3440,7 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp,
 
 	for (i = 0; i < max_idx;) {
 		struct bnxt_sw_tx_bd *tx_buf = &txr->tx_buf_ring[i];
+		struct bnxt_sw_tx_bd *head_buf = tx_buf;
 		struct sk_buff *skb;
 		int j, last;
 
@@ -3472,7 +3493,20 @@ static void bnxt_free_one_tx_ring_skbs(struct bnxt *bp,
 							    DMA_TO_DEVICE, 0);
 			}
 		}
-		dev_kfree_skb(skb);
+		if (head_buf->is_sw_gso) {
+			txr->tx_inline_cons++;
+			if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
+				if (dma_use_iova(&head_buf->iova_state))
+					dma_iova_destroy(&pdev->dev,
+							 &head_buf->iova_state,
+							 head_buf->iova_total_len,
+							 DMA_TO_DEVICE, 0);
+			} else {
+				skb = NULL;
+			}
+		}
+		if (skb)
+			dev_kfree_skb(skb);
 	}
 	netdev_tx_reset_queue(netdev_get_tx_queue(bp->dev, idx));
 }
@@ -3998,9 +4032,9 @@ static void bnxt_free_tx_inline_buf(struct bnxt_tx_ring_info *txr,
 	txr->tx_inline_size = 0;
 }
 
-static int __maybe_unused bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr,
-						   struct pci_dev *pdev,
-						   unsigned int size)
+static int bnxt_alloc_tx_inline_buf(struct bnxt_tx_ring_info *txr,
+				    struct pci_dev *pdev,
+				    unsigned int size)
 {
 	txr->tx_inline_buf = kmalloc(size, GFP_KERNEL);
 	if (!txr->tx_inline_buf)
@@ -4103,6 +4137,14 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp)
 				sizeof(struct tx_push_bd);
 			txr->data_mapping = cpu_to_le64(mapping);
 		}
+		if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+		    (bp->dev->features & NETIF_F_GSO_UDP_L4)) {
+			rc = bnxt_alloc_tx_inline_buf(txr, pdev,
+						      BNXT_SW_USO_MAX_SEGS *
+						      TSO_HEADER_SIZE);
+			if (rc)
+				return rc;
+		}
 		qidx = bp->tc_to_qidx[j];
 		ring->queue_id = bp->q_info[qidx].queue_id;
 		spin_lock_init(&txr->xdp_tx_lock);
@@ -4645,6 +4687,10 @@ static int bnxt_init_tx_rings(struct bnxt *bp)
 
 	bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
 				   BNXT_MIN_TX_DESC_CNT);
+	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+	    (bp->dev->features & NETIF_F_GSO_UDP_L4))
+		bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
+					   BNXT_SW_USO_MAX_DESCS);
 
 	for (i = 0; i < bp->tx_nr_rings; i++) {
 		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
@@ -13832,6 +13878,11 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
 	if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false))
 		features &= ~NETIF_F_NTUPLE;
 
+	if ((features & NETIF_F_GSO_UDP_L4) &&
+	    !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+	    bp->tx_ring_size < 2 * BNXT_SW_USO_MAX_DESCS)
+		features &= ~NETIF_F_GSO_UDP_L4;
+
 	if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
 		features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
 
@@ -13877,6 +13928,15 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
 	int rc = 0;
 	bool re_init = false;
 
+	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
+		if (features & NETIF_F_GSO_UDP_L4)
+			bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
+						   BNXT_SW_USO_MAX_DESCS);
+		else
+			bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
+						   BNXT_MIN_TX_DESC_CNT);
+	}
+
 	flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS;
 	if (features & NETIF_F_GRO_HW)
 		flags |= BNXT_FLAG_GRO;
@@ -16880,8 +16940,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			   NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM |
 			   NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH |
 			   NETIF_F_RXCSUM | NETIF_F_GRO;
-	if (bp->flags & BNXT_FLAG_UDP_GSO_CAP)
-		dev->hw_features |= NETIF_F_GSO_UDP_L4;
+	dev->hw_features |= NETIF_F_GSO_UDP_L4;
 
 	if (BNXT_SUPPORTS_TPA(bp))
 		dev->hw_features |= NETIF_F_LRO;
@@ -16914,8 +16973,15 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
-	if (bp->tso_max_segs)
+	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
+		u16 max_segs = BNXT_SW_USO_MAX_SEGS;
+
+		if (bp->tso_max_segs)
+			max_segs = min_t(u16, max_segs, bp->tso_max_segs);
+		netif_set_tso_max_segs(dev, max_segs);
+	} else if (bp->tso_max_segs) {
 		netif_set_tso_max_segs(dev, bp->tso_max_segs);
+	}
 
 	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
 			    NETDEV_XDP_ACT_RX_SG;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index b87ac2bb43dd..a3d17f436b22 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -33,6 +33,7 @@
 #include "bnxt_xdp.h"
 #include "bnxt_ptp.h"
 #include "bnxt_ethtool.h"
+#include "bnxt_gso.h"
 #include "bnxt_nvm_defs.h"	/* NVRAM content constant and structure defs */
 #include "bnxt_fw_hdr.h"	/* Firmware hdr constant and structure defs */
 #include "bnxt_coredump.h"
@@ -852,12 +853,18 @@ static int bnxt_set_ringparam(struct net_device *dev,
 	u8 tcp_data_split = kernel_ering->tcp_data_split;
 	struct bnxt *bp = netdev_priv(dev);
 	u8 hds_config_mod;
+	int rc;
 
 	if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) ||
 	    (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) ||
 	    (ering->tx_pending < BNXT_MIN_TX_DESC_CNT))
 		return -EINVAL;
 
+	if ((dev->features & NETIF_F_GSO_UDP_L4) &&
+	    !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
+	    ering->tx_pending < 2 * BNXT_SW_USO_MAX_DESCS)
+		return -EINVAL;
+
 	hds_config_mod = tcp_data_split != dev->cfg->hds_config;
 	if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod)
 		return -EINVAL;
@@ -882,9 +889,17 @@ static int bnxt_set_ringparam(struct net_device *dev,
 	bp->tx_ring_size = ering->tx_pending;
 	bnxt_set_ring_params(bp);
 
-	if (netif_running(dev))
-		return bnxt_open_nic(bp, false, false);
+	if (netif_running(dev)) {
+		rc = bnxt_open_nic(bp, false, false);
+		if (rc)
+			return rc;
+	}
 
+	/* ring size changes may affect features (SW USO requires a minimum
+	 * ring size), so recalculate features to ensure the correct features
+	 * are blocked/available.
+	 */
+	netdev_update_features(dev);
 	return 0;
 }
 
-- 
2.52.0
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Jakub Kicinski 2 days, 18 hours ago
On Thu, 26 Mar 2026 16:52:28 -0700 Joe Damato wrote:
> @@ -4645,6 +4687,10 @@ static int bnxt_init_tx_rings(struct bnxt *bp)
>  
>  	bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
>  				   BNXT_MIN_TX_DESC_CNT);
> +	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
> +	    (bp->dev->features & NETIF_F_GSO_UDP_L4))
> +		bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
> +					   BNXT_SW_USO_MAX_DESCS);
>  
>  	for (i = 0; i < bp->tx_nr_rings; i++) {
>  		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
> @@ -13832,6 +13878,11 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
>  	if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false))
>  		features &= ~NETIF_F_NTUPLE;
>  
> +	if ((features & NETIF_F_GSO_UDP_L4) &&
> +	    !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
> +	    bp->tx_ring_size < 2 * BNXT_SW_USO_MAX_DESCS)
> +		features &= ~NETIF_F_GSO_UDP_L4;
> +
>  	if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
>  		features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
>  
> @@ -13877,6 +13928,15 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
>  	int rc = 0;
>  	bool re_init = false;
>  
> +	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
> +		if (features & NETIF_F_GSO_UDP_L4)
> +			bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
> +						   BNXT_SW_USO_MAX_DESCS);
> +		else
> +			bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
> +						   BNXT_MIN_TX_DESC_CNT);

Adding extra handling for min ring size all over the place looks a bit
messy. Can you factor something out of this logic?
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Joe Damato 1 day, 21 hours ago
On Mon, Mar 30, 2026 at 04:57:48PM -0700, Jakub Kicinski wrote:
> On Thu, 26 Mar 2026 16:52:28 -0700 Joe Damato wrote:
> > @@ -4645,6 +4687,10 @@ static int bnxt_init_tx_rings(struct bnxt *bp)
> >  
> >  	bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
> >  				   BNXT_MIN_TX_DESC_CNT);
> > +	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
> > +	    (bp->dev->features & NETIF_F_GSO_UDP_L4))
> > +		bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
> > +					   BNXT_SW_USO_MAX_DESCS);
> >  
> >  	for (i = 0; i < bp->tx_nr_rings; i++) {
> >  		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
> > @@ -13832,6 +13878,11 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
> >  	if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false))
> >  		features &= ~NETIF_F_NTUPLE;
> >  
> > +	if ((features & NETIF_F_GSO_UDP_L4) &&
> > +	    !(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
> > +	    bp->tx_ring_size < 2 * BNXT_SW_USO_MAX_DESCS)
> > +		features &= ~NETIF_F_GSO_UDP_L4;
> > +
> >  	if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog)
> >  		features &= ~(NETIF_F_LRO | NETIF_F_GRO_HW);
> >  
> > @@ -13877,6 +13928,15 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features)
> >  	int rc = 0;
> >  	bool re_init = false;
> >  
> > +	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
> > +		if (features & NETIF_F_GSO_UDP_L4)
> > +			bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
> > +						   BNXT_SW_USO_MAX_DESCS);
> > +		else
> > +			bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
> > +						   BNXT_MIN_TX_DESC_CNT);
> 
> Adding extra handling for min ring size all over the place looks a bit
> messy. Can you factor something out of this logic?

Could add something like:

  static int bnxt_min_tx_desc_cnt(struct bnxt *bp)
  {
      if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
          (bp->dev->features & NETIF_F_GSO_UDP_L4))
          return BNXT_SW_USO_MAX_DESCS;
      return BNXT_MIN_TX_DESC_CNT;
  }

and then when setting the tx_wake_thresh, it becomes:

  bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
                             bnxt_min_tx_desc_cnt(bp));

and fix_features can use the same helper.

Question then is just do we still want to bump BNXT_MIN_TX_DESC_CNT (as per
your previous comment)?
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Jakub Kicinski 1 day, 17 hours ago
On Tue, 31 Mar 2026 14:14:09 -0700 Joe Damato wrote:
> > > +	if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP)) {
> > > +		if (features & NETIF_F_GSO_UDP_L4)
> > > +			bp->tx_wake_thresh = max_t(int, bp->tx_wake_thresh,
> > > +						   BNXT_SW_USO_MAX_DESCS);
> > > +		else
> > > +			bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
> > > +						   BNXT_MIN_TX_DESC_CNT);  
> > 
> > Adding extra handling for min ring size all over the place looks a bit
> > messy. Can you factor something out of this logic?  
> 
> Could add something like:
> 
>   static int bnxt_min_tx_desc_cnt(struct bnxt *bp)
>   {
>       if (!(bp->flags & BNXT_FLAG_UDP_GSO_CAP) &&
>           (bp->dev->features & NETIF_F_GSO_UDP_L4))
>           return BNXT_SW_USO_MAX_DESCS;
>       return BNXT_MIN_TX_DESC_CNT;
>   }
> 
> and then when setting the tx_wake_thresh, it becomes:
> 
>   bp->tx_wake_thresh = max_t(int, bp->tx_ring_size / 2,
>                              bnxt_min_tx_desc_cnt(bp));
> 
> and fix_features can use the same helper.

Something along these lines looks reasonable.

> Question then is just do we still want to bump BNXT_MIN_TX_DESC_CNT (as per
> your previous comment)?

No, sorry, I sent that one before looking at the relevant part of
the diffs
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Jakub Kicinski 3 days, 20 hours ago
On Thu, 26 Mar 2026 16:52:28 -0700 Joe Damato wrote:
> +			if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
> +				if (dma_use_iova(&head_buf->iova_state))
> +					dma_iova_destroy(&pdev->dev,
> +							 &head_buf->iova_state,
> +							 head_buf->iova_total_len,
> +							 DMA_TO_DEVICE, 0);

Do we have to expose the dma_use_iova() stuff to the driver at all?
Could we have a function the driver is supposed to call to clean up,
always, and what the function does is up to the TSO lib?
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Joe Damato 3 days, 1 hour ago
On Sun, Mar 29, 2026 at 03:22:36PM -0700, Jakub Kicinski wrote:
> On Thu, 26 Mar 2026 16:52:28 -0700 Joe Damato wrote:
> > +			if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
> > +				if (dma_use_iova(&head_buf->iova_state))
> > +					dma_iova_destroy(&pdev->dev,
> > +							 &head_buf->iova_state,
> > +							 head_buf->iova_total_len,
> > +							 DMA_TO_DEVICE, 0);
> 
> Do we have to expose the dma_use_iova() stuff to the driver at all?
> Could we have a function the driver is supposed to call to clean up,
> always, and what the function does is up to the TSO lib?

I could add a tso_dma_map_destroy(dev, iova_state, len) that the driver calls,
but the driver would still need to stash iova_state and total_len on the ring.

That would be easiest, but I'm not sure if you were thinking that the IOVA
stuff should be as opague as possible?

Because if you do want it to be as opague as possible, maybe:

/* Add a struct to tso.h to track completion state */
struct tso_dma_map_completion_state {
  struct dma_iova_state iova_state;
  size_t total_len;
}

Add a save function: tso_dma_map_completion_save(map, completion_state);

And then: 
 - the bnxt sw bd stores a struct tso_dma_map_completion_state.
 - xmit calls tso_dma_map_completion_save to store the iova_state
 - completion calls tso_dma_complete(dev, &head_buf->completion_state)

LMK if you meant the easier way or the more opague way?
Re: [net-next v6 09/12] net: bnxt: Add SW GSO completion and teardown support
Posted by Jakub Kicinski 2 days, 18 hours ago
On Mon, 30 Mar 2026 10:07:17 -0700 Joe Damato wrote:
> On Sun, Mar 29, 2026 at 03:22:36PM -0700, Jakub Kicinski wrote:
> > On Thu, 26 Mar 2026 16:52:28 -0700 Joe Damato wrote:  
> > > +			if (head_buf->is_sw_gso == BNXT_SW_GSO_LAST) {
> > > +				if (dma_use_iova(&head_buf->iova_state))
> > > +					dma_iova_destroy(&pdev->dev,
> > > +							 &head_buf->iova_state,
> > > +							 head_buf->iova_total_len,
> > > +							 DMA_TO_DEVICE, 0);  
> > 
> > Do we have to expose the dma_use_iova() stuff to the driver at all?
> > Could we have a function the driver is supposed to call to clean up,
> > always, and what the function does is up to the TSO lib?  
> 
> I could add a tso_dma_map_destroy(dev, iova_state, len) that the driver calls,
> but the driver would still need to stash iova_state and total_len on the ring.
> 
> That would be easiest, but I'm not sure if you were thinking that the IOVA
> stuff should be as opague as possible?
> 
> Because if you do want it to be as opague as possible, maybe:
> 
> /* Add a struct to tso.h to track completion state */
> struct tso_dma_map_completion_state {
>   struct dma_iova_state iova_state;
>   size_t total_len;
> }
> 
> Add a save function: tso_dma_map_completion_save(map, completion_state);
> 
> And then: 
>  - the bnxt sw bd stores a struct tso_dma_map_completion_state.
>  - xmit calls tso_dma_map_completion_save to store the iova_state
>  - completion calls tso_dma_complete(dev, &head_buf->completion_state)
> 
> LMK if you meant the easier way or the more opague way?

I'm not gonna lie, only noticed you keep the tso struct on the stack
now. But the proposal above looks pretty clean to me.