[PATCH] usb: gadget: tegra-xudc: drain EP pipeline before dma_unmap

Vishal Kumar posted 1 patch 2 days, 2 hours ago
drivers/usb/gadget/udc/tegra-xudc.c | 47 ++++++++++++++++++++++------
1 file changed, 38 insertions(+), 9 deletions(-)
[PATCH] usb: gadget: tegra-xudc: drain EP pipeline before dma_unmap
Posted by Vishal Kumar 2 days, 2 hours ago
On Tegra186/194/234 the XUDC posts a transfer-completion event when the
DMA write is dispatched to the AXI interconnect, before the store is
committed to memory.  Under SMMU strict mode dma_unmap() synchronously
invalidates the IOVA TLB entry.  An in-flight AXI write to the
just-unmapped IOVA triggers a translation fault (fsr=0x402) that
permanently wedges the bulk-OUT endpoint.

Observed on Tegra234 (Jetson Orin Nano) at ~170 MB/s USB-NCM transfers:

  arm-smmu 8000000.iommu: Unhandled context fault: fsr=0x402,
    iova=0xfffb5000, cbfrsynra=0x100f, cb=3
  tegra-mc 2c00000.memory-controller: EMEM address decode error

cbfrsynra=0x100f identifies XUDC (StreamID 0x0f per DT), cb=3 is iommu
group 4 (3550000.usb).  fsr=0x402 is a translation fault on a DMA write.

Fix: poll EP_THREAD_ACTIVE before calling usb_gadget_unmap_request() for
non-control endpoints.  EP_THREAD_ACTIVE clearing is the hardware's
guarantee that the endpoint sequencer is idle and all AXI transactions
have completed, so the subsequent TLB invalidation cannot race an
in-flight write.

Also change ep_wait_for_inactive() to return the readl_poll_timeout()
status so callers can detect a timeout.  On timeout in the completion
path, skip dma_unmap() to avoid the translation fault and force
req->usb_req.status = -EIO so the gadget driver does not treat the
transfer as successful or requeue the still-mapped buffer.  On timeout
in the dequeue path, emit a warning.

Fixes: 49d6f3dd4abe ("usb: gadget: add tegra xusb device mode driver")
Cc: <stable@vger.kernel.org>
Signed-off-by: Vishal Kumar <vishalmimani008@gmail.com>
---
 drivers/usb/gadget/udc/tegra-xudc.c | 47 ++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index 0b63b8c0a..3f18beddf 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -1023,9 +1023,9 @@ static void ep_wait_for_stopped(struct tegra_xudc *xudc, unsigned int ep)
 	xudc_writel(xudc, BIT(ep), EP_STOPPED);
 }
 
-static void ep_wait_for_inactive(struct tegra_xudc *xudc, unsigned int ep)
+static int ep_wait_for_inactive(struct tegra_xudc *xudc, unsigned int ep)
 {
-	xudc_readl_poll(xudc, EP_THREAD_ACTIVE, BIT(ep), 0);
+	return xudc_readl_poll(xudc, EP_THREAD_ACTIVE, BIT(ep), 0);
 }
 
 static void tegra_xudc_req_done(struct tegra_xudc_ep *ep,
@@ -1046,8 +1046,39 @@ static void tegra_xudc_req_done(struct tegra_xudc_ep *ep,
 					 (xudc->setup_state ==
 					  DATA_STAGE_XFER));
 	} else {
-		usb_gadget_unmap_request(&xudc->gadget, &req->usb_req,
-					 usb_endpoint_dir_in(ep->desc));
+		/*
+		 * Drain the endpoint DMA pipeline before unmapping.
+		 *
+		 * Under SMMU strict mode dma_unmap() synchronously
+		 * invalidates the IOVA TLB entry.  On Tegra186/194/234 the
+		 * XUDC appears to post the completion event when the DMA
+		 * write is dispatched to the AXI interconnect, before the
+		 * store is committed to memory.  A subsequent dma_unmap()
+		 * can remove the IOVA translation while the write is still
+		 * in-flight, triggering a translation fault (fsr=0x402) that
+		 * permanently wedges the bulk endpoint.
+		 *
+		 * Wait for EP_THREAD_ACTIVE to clear (endpoint sequencer
+		 * idle).  On timeout skip the unmap to avoid the SMMU fault;
+		 * the DMA mapping leaks but the hardware is already in an
+		 * unrecoverable state.
+		 */
+		if (!WARN_ONCE(ep_wait_for_inactive(xudc, ep->index),
+			       "ep%u: DMA drain timed out; skipping dma_unmap\n",
+			       ep->index)) {
+			/* Read-back completes the poll barrier; EP_THREAD_ACTIVE=0 guarantees DMA is idle. */
+			xudc_readl(xudc, EP_THREAD_ACTIVE);
+			usb_gadget_unmap_request(&xudc->gadget, &req->usb_req,
+						 usb_endpoint_dir_in(ep->desc));
+		} else {
+			/*
+			 * Timeout: mapping is intentionally leaked to avoid the
+			 * SMMU fault.  Force -EIO so the gadget driver does not
+			 * treat this as a successful transfer and reuse the
+			 * still-mapped buffer.
+			 */
+			req->usb_req.status = -EIO;
+		}
 	}
 
 	spin_unlock(&xudc->lock);
@@ -1443,10 +1474,12 @@ __tegra_xudc_ep_dequeue(struct tegra_xudc_ep *ep,
 		return 0;
 	}
 
-	/* Halt DMA for this endpiont. */
+	/* Halt DMA for this endpoint. */
 	if (ep_ctx_read_state(ep->context) == EP_STATE_RUNNING) {
 		ep_pause(xudc, ep->index);
-		ep_wait_for_inactive(xudc, ep->index);
+		if (ep_wait_for_inactive(xudc, ep->index))
+			dev_warn(xudc->dev, "ep%u: DMA drain timed out during dequeue\n",
+				 ep->index);
 	}
 
 	deq_trb = trb_phys_to_virt(ep, ep_ctx_read_deq_ptr(ep->context));

2.39.0