[RFC PATCH v2 19/27] PCI: dwc: ep: Cache MSI outbound iATU mapping

Koichiro Den posted 27 patches 2 days, 6 hours ago
[RFC PATCH v2 19/27] PCI: dwc: ep: Cache MSI outbound iATU mapping
Posted by Koichiro Den 2 days, 6 hours ago
dw_pcie_ep_raise_msi_irq() currently programs an outbound iATU window
for the MSI target address on every interrupt and tears it down again
via dw_pcie_ep_unmap_addr().

On systems that heavily use the AXI bridge interface (for example when
the integrated eDMA engine is active), this means the outbound iATU
registers are updated while traffic is in flight. The DesignWare
endpoint spec warns that updating iATU registers in this situation is
not supported, and the behavior is undefined.

Under high MSI and eDMA load this pattern results in occasional bogus
outbound transactions and IOMMU faults such as:

  ipmmu-vmsa eed40000.iommu: Unhandled fault: status 0x00001502 iova 0xfe000000

followed by the system becoming unresponsive. This is the actual output
observed on Renesas R-Car S4, with its ipmmu_hc used with PCIe ch0.

There is no need to reprogram the iATU region used for MSI on every
interrupt. The host-provided MSI address is stable while MSI is enabled,
and the endpoint driver already dedicates a scratch buffer for MSI
generation.

Cache the aligned MSI address and map size, program the outbound iATU
once, and keep the window enabled. Subsequent interrupts only perform a
write to the MSI scratch buffer, avoiding dynamic iATU reprogramming in
the hot path and fixing the lockups seen under load.

Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
 .../pci/controller/dwc/pcie-designware-ep.c   | 48 ++++++++++++++++---
 drivers/pci/controller/dwc/pcie-designware.h  |  5 ++
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 3780a9bd6f79..ef8ded34d9ab 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -778,6 +778,16 @@ static void dw_pcie_ep_stop(struct pci_epc *epc)
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
+	/*
+	 * Tear down the dedicated outbound window used for MSI
+	 * generation. This avoids leaking an iATU window across
+	 * endpoint stop/start cycles.
+	 */
+	if (ep->msi_iatu_mapped) {
+		dw_pcie_ep_unmap_addr(epc, 0, 0, ep->msi_mem_phys);
+		ep->msi_iatu_mapped = false;
+	}
+
 	dw_pcie_stop_link(pci);
 }
 
@@ -881,14 +891,37 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
 	msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower;
 
 	msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset);
-	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
-				  map_size);
-	if (ret)
-		return ret;
 
-	writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);
+	/*
+	 * Program the outbound iATU once and keep it enabled.
+	 *
+	 * The spec warns that updating iATU registers while there are
+	 * operations in flight on the AXI bridge interface is not
+	 * supported, so we avoid reprogramming the region on every MSI,
+	 * specifically unmapping immediately after writel().
+	 */
+	if (!ep->msi_iatu_mapped) {
+		ret = dw_pcie_ep_map_addr(epc, func_no, 0,
+					  ep->msi_mem_phys, msg_addr,
+					  map_size);
+		if (ret)
+			return ret;
 
-	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
+		ep->msi_iatu_mapped = true;
+		ep->msi_msg_addr = msg_addr;
+		ep->msi_map_size = map_size;
+	} else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr ||
+				ep->msi_map_size != map_size)) {
+		/*
+		 * The host changed the MSI target address or the required
+		 * mapping size. Reprogramming the iATU at runtime is unsafe
+		 * on this controller, so bail out instead of trying to update
+		 * the existing region.
+		 */
+		return -EINVAL;
+	}
+
+	writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);
 
 	return 0;
 }
@@ -1268,6 +1301,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 	INIT_LIST_HEAD(&ep->func_list);
 	INIT_LIST_HEAD(&ep->ib_map_list);
 	spin_lock_init(&ep->ib_map_lock);
+	ep->msi_iatu_mapped = false;
+	ep->msi_msg_addr = 0;
+	ep->msi_map_size = 0;
 
 	epc = devm_pci_epc_create(dev, &epc_ops);
 	if (IS_ERR(epc)) {
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 269a9fe0501f..1770a2318557 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -481,6 +481,11 @@ struct dw_pcie_ep {
 	void __iomem		*msi_mem;
 	phys_addr_t		msi_mem_phys;
 	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
+
+	/* MSI outbound iATU state */
+	bool			msi_iatu_mapped;
+	u64			msi_msg_addr;
+	size_t			msi_map_size;
 };
 
 struct dw_pcie_ops {
-- 
2.48.1
Re: [RFC PATCH v2 19/27] PCI: dwc: ep: Cache MSI outbound iATU mapping
Posted by Frank Li an hour ago
On Sun, Nov 30, 2025 at 01:03:57AM +0900, Koichiro Den wrote:
> dw_pcie_ep_raise_msi_irq() currently programs an outbound iATU window
> for the MSI target address on every interrupt and tears it down again
> via dw_pcie_ep_unmap_addr().
>
> On systems that heavily use the AXI bridge interface (for example when
> the integrated eDMA engine is active), this means the outbound iATU
> registers are updated while traffic is in flight. The DesignWare
> endpoint spec warns that updating iATU registers in this situation is
> not supported, and the behavior is undefined.
>
> Under high MSI and eDMA load this pattern results in occasional bogus
> outbound transactions and IOMMU faults such as:
>
>   ipmmu-vmsa eed40000.iommu: Unhandled fault: status 0x00001502 iova 0xfe000000
>

I agree needn't map/unmap MSI every time. But I think there should be
logic problem behind this. IOMMU report error means page table already
removed, but you still try to access it after that. You'd better find where
access MSI memory after dw_pcie_ep_unmap_addr().

dw_pcie_ep_unmap_addr() use writel(), which use dma_dmb() before change
register, previous write should be completed before write ATU register.

Frank

> followed by the system becoming unresponsive. This is the actual output
> observed on Renesas R-Car S4, with its ipmmu_hc used with PCIe ch0.
>
> There is no need to reprogram the iATU region used for MSI on every
> interrupt. The host-provided MSI address is stable while MSI is enabled,
> and the endpoint driver already dedicates a scratch buffer for MSI
> generation.
>
> Cache the aligned MSI address and map size, program the outbound iATU
> once, and keep the window enabled. Subsequent interrupts only perform a
> write to the MSI scratch buffer, avoiding dynamic iATU reprogramming in
> the hot path and fixing the lockups seen under load.
>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  .../pci/controller/dwc/pcie-designware-ep.c   | 48 ++++++++++++++++---
>  drivers/pci/controller/dwc/pcie-designware.h  |  5 ++
>  2 files changed, 47 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 3780a9bd6f79..ef8ded34d9ab 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -778,6 +778,16 @@ static void dw_pcie_ep_stop(struct pci_epc *epc)
>  	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>
> +	/*
> +	 * Tear down the dedicated outbound window used for MSI
> +	 * generation. This avoids leaking an iATU window across
> +	 * endpoint stop/start cycles.
> +	 */
> +	if (ep->msi_iatu_mapped) {
> +		dw_pcie_ep_unmap_addr(epc, 0, 0, ep->msi_mem_phys);
> +		ep->msi_iatu_mapped = false;
> +	}
> +
>  	dw_pcie_stop_link(pci);
>  }
>
> @@ -881,14 +891,37 @@ int dw_pcie_ep_raise_msi_irq(struct dw_pcie_ep *ep, u8 func_no,
>  	msg_addr = ((u64)msg_addr_upper) << 32 | msg_addr_lower;
>
>  	msg_addr = dw_pcie_ep_align_addr(epc, msg_addr, &map_size, &offset);
> -	ret = dw_pcie_ep_map_addr(epc, func_no, 0, ep->msi_mem_phys, msg_addr,
> -				  map_size);
> -	if (ret)
> -		return ret;
>
> -	writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);
> +	/*
> +	 * Program the outbound iATU once and keep it enabled.
> +	 *
> +	 * The spec warns that updating iATU registers while there are
> +	 * operations in flight on the AXI bridge interface is not
> +	 * supported, so we avoid reprogramming the region on every MSI,
> +	 * specifically unmapping immediately after writel().
> +	 */
> +	if (!ep->msi_iatu_mapped) {
> +		ret = dw_pcie_ep_map_addr(epc, func_no, 0,
> +					  ep->msi_mem_phys, msg_addr,
> +					  map_size);
> +		if (ret)
> +			return ret;
>
> -	dw_pcie_ep_unmap_addr(epc, func_no, 0, ep->msi_mem_phys);
> +		ep->msi_iatu_mapped = true;
> +		ep->msi_msg_addr = msg_addr;
> +		ep->msi_map_size = map_size;
> +	} else if (WARN_ON_ONCE(ep->msi_msg_addr != msg_addr ||
> +				ep->msi_map_size != map_size)) {
> +		/*
> +		 * The host changed the MSI target address or the required
> +		 * mapping size. Reprogramming the iATU at runtime is unsafe
> +		 * on this controller, so bail out instead of trying to update
> +		 * the existing region.
> +		 */
> +		return -EINVAL;
> +	}
> +
> +	writel(msg_data | (interrupt_num - 1), ep->msi_mem + offset);
>
>  	return 0;
>  }
> @@ -1268,6 +1301,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>  	INIT_LIST_HEAD(&ep->func_list);
>  	INIT_LIST_HEAD(&ep->ib_map_list);
>  	spin_lock_init(&ep->ib_map_lock);
> +	ep->msi_iatu_mapped = false;
> +	ep->msi_msg_addr = 0;
> +	ep->msi_map_size = 0;
>
>  	epc = devm_pci_epc_create(dev, &epc_ops);
>  	if (IS_ERR(epc)) {
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 269a9fe0501f..1770a2318557 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -481,6 +481,11 @@ struct dw_pcie_ep {
>  	void __iomem		*msi_mem;
>  	phys_addr_t		msi_mem_phys;
>  	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
> +
> +	/* MSI outbound iATU state */
> +	bool			msi_iatu_mapped;
> +	u64			msi_msg_addr;
> +	size_t			msi_map_size;
>  };
>
>  struct dw_pcie_ops {
> --
> 2.48.1
>