[PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU

Koichiro Den posted 6 patches 3 weeks, 6 days ago
There is a newer version of this series
[PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Koichiro Den 3 weeks, 6 days ago
Extend dw_pcie_ep_set_bar() to support inbound mappings for BAR
subranges using Address Match Mode IB iATU.

Rename the existing BAR-match helper into dw_pcie_ep_ib_atu_bar() and
introduce dw_pcie_ep_ib_atu_addr() for Address Match Mode. When
use_submap is set, read the assigned BAR base address and program one
inbound iATU window per subrange. Validate the submap array before
programming:
- each subrange is aligned to pci->region_align
- subranges cover the whole BAR (no gaps and no overlaps)
- subranges are sorted in ascending order by offset

Track Address Match Mode mappings and tear them down on clear_bar() and
on set_bar() error paths to avoid leaving half-programmed state or
untranslated BAR holes.

Advertise this capability by setting subrange_mapping in the EPC
features returned from dw_pcie_ep_get_features().

Reviewed-by: Niklas Cassel <cassel@kernel.org>
Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
 .../pci/controller/dwc/pcie-designware-ep.c   | 230 +++++++++++++++++-
 drivers/pci/controller/dwc/pcie-designware.h  |   2 +
 2 files changed, 222 insertions(+), 10 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 0e5a8d200b00..b2ea2c2c986f 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -139,9 +139,10 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 	return 0;
 }
 
-static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
-				  dma_addr_t parent_bus_addr, enum pci_barno bar,
-				  size_t size)
+/* BAR Match Mode inbound iATU mapping */
+static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
+				 dma_addr_t parent_bus_addr, enum pci_barno bar,
+				 size_t size)
 {
 	int ret;
 	u32 free_win;
@@ -174,6 +175,208 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
 	return 0;
 }
 
+/* Inbound mapping bookkeeping for Address Match Mode */
+struct dw_pcie_ib_map {
+	struct list_head	list;
+	enum pci_barno		bar;
+	u64			pci_addr;
+	u64			parent_bus_addr;
+	u64			size;
+	u32			index;
+};
+
+static void dw_pcie_ep_clear_ib_maps(struct dw_pcie_ep *ep, enum pci_barno bar)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	struct dw_pcie_ib_map *m, *tmp;
+	struct device *dev = pci->dev;
+	u32 atu_index;
+
+	/* Tear down the BAR Match Mode mapping, if any. */
+	if (ep->bar_to_atu[bar]) {
+		atu_index = ep->bar_to_atu[bar] - 1;
+		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
+		clear_bit(atu_index, ep->ib_window_map);
+		ep->bar_to_atu[bar] = 0;
+	}
+
+	/* Tear down all Address Match Mode mappings, if any. */
+	guard(spinlock_irqsave)(&ep->ib_map_lock);
+	list_for_each_entry_safe(m, tmp, &ep->ib_map_list, list) {
+		if (m->bar != bar)
+			continue;
+		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, m->index);
+		clear_bit(m->index, ep->ib_window_map);
+		list_del(&m->list);
+		devm_kfree(dev, m);
+	}
+}
+
+static u64 dw_pcie_ep_read_bar_assigned(struct dw_pcie_ep *ep, u8 func_no,
+					enum pci_barno bar, int flags)
+{
+	u32 reg = PCI_BASE_ADDRESS_0 + (4 * bar);
+	u32 lo, hi;
+	u64 addr;
+
+	lo = dw_pcie_ep_readl_dbi(ep, func_no, reg);
+
+	if (flags & PCI_BASE_ADDRESS_SPACE)
+		return lo & PCI_BASE_ADDRESS_IO_MASK;
+
+	addr = lo & PCI_BASE_ADDRESS_MEM_MASK;
+	if (!(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
+		return addr;
+
+	hi = dw_pcie_ep_readl_dbi(ep, func_no, reg + 4);
+	return addr | ((u64)hi << 32);
+}
+
+static int dw_pcie_ep_validate_submap(struct dw_pcie_ep *ep,
+				      const struct pci_epf_bar_submap *submap,
+				      unsigned int num_submap, size_t bar_size)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	u32 align = pci->region_align;
+	size_t expected = 0;
+	size_t size, off;
+	unsigned int i;
+
+	if (!align || !IS_ALIGNED(bar_size, align))
+		return -EINVAL;
+
+	/*
+	 * The array is expected to be sorted by offset before calling this
+	 * helper. With sorted entries, we can enforce a strict, gapless
+	 * decomposition of the BAR:
+	 *  - each entry has a non-zero size
+	 *  - offset/size/phys_addr are aligned to pci->region_align
+	 *  - each entry lies within the BAR range
+	 *  - entries are contiguous (no overlaps, no holes)
+	 *  - the entries exactly cover the whole BAR
+	 *
+	 * Note: dw_pcie_prog_inbound_atu() also checks alignment for
+	 * offset/phys_addr, but validating up-front avoids partially
+	 * programming iATU windows in vain.
+	 */
+	for (i = 0; i < num_submap; i++) {
+		off = submap[i].offset;
+		size = submap[i].size;
+
+		if (!size)
+			return -EINVAL;
+
+		if (!IS_ALIGNED(size, align) || !IS_ALIGNED(off, align))
+			return -EINVAL;
+
+		if (!IS_ALIGNED(submap[i].phys_addr, align))
+			return -EINVAL;
+
+		if (off > bar_size || size > bar_size - off)
+			return -EINVAL;
+
+		/* Enforce contiguity (no overlaps, no holes). */
+		if (off != expected)
+			return -EINVAL;
+
+		expected += size;
+	}
+	if (expected != bar_size)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Address Match Mode inbound iATU mapping */
+static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
+				  const struct pci_epf_bar *epf_bar)
+{
+	const struct pci_epf_bar_submap *submap = epf_bar->submap;
+	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+	enum pci_barno bar = epf_bar->barno;
+	struct device *dev = pci->dev;
+	u64 pci_addr, parent_bus_addr;
+	struct dw_pcie_ib_map *new;
+	u64 size, off, base;
+	unsigned long flags;
+	int free_win, ret;
+	unsigned int i;
+
+	if (!epf_bar->num_submap || !submap || !epf_bar->size)
+		return -EINVAL;
+
+	ret = dw_pcie_ep_validate_submap(ep, submap, epf_bar->num_submap,
+					 epf_bar->size);
+	if (ret)
+		return ret;
+
+	base = dw_pcie_ep_read_bar_assigned(ep, func_no, bar, epf_bar->flags);
+	if (!base) {
+		dev_err(dev,
+			"BAR%u not assigned, cannot set up sub-range mappings\n",
+			bar);
+		return -EINVAL;
+	}
+
+	/* Tear down any existing mappings before (re)programming. */
+	dw_pcie_ep_clear_ib_maps(ep, bar);
+
+	for (i = 0; i < epf_bar->num_submap; i++) {
+		off = submap[i].offset;
+		size = submap[i].size;
+		parent_bus_addr = submap[i].phys_addr;
+
+		if (off > (~0ULL) - base) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		pci_addr = base + off;
+
+		new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
+		if (!new) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		spin_lock_irqsave(&ep->ib_map_lock, flags);
+
+		free_win = find_first_zero_bit(ep->ib_window_map,
+					       pci->num_ib_windows);
+		if (free_win >= pci->num_ib_windows) {
+			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+			devm_kfree(dev, new);
+			ret = -ENOSPC;
+			goto err;
+		}
+		set_bit(free_win, ep->ib_window_map);
+
+		new->bar = bar;
+		new->index = free_win;
+		new->pci_addr = pci_addr;
+		new->parent_bus_addr = parent_bus_addr;
+		new->size = size;
+		list_add_tail(&new->list, &ep->ib_map_list);
+
+		spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+
+		ret = dw_pcie_prog_inbound_atu(pci, free_win, type,
+					       parent_bus_addr, pci_addr, size);
+		if (ret) {
+			spin_lock_irqsave(&ep->ib_map_lock, flags);
+			list_del(&new->list);
+			clear_bit(free_win, ep->ib_window_map);
+			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+			devm_kfree(dev, new);
+			goto err;
+		}
+	}
+	return 0;
+err:
+	dw_pcie_ep_clear_ib_maps(ep, bar);
+	return ret;
+}
+
 static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep,
 				   struct dw_pcie_ob_atu_cfg *atu)
 {
@@ -204,17 +407,15 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 	enum pci_barno bar = epf_bar->barno;
-	u32 atu_index = ep->bar_to_atu[bar] - 1;
 
-	if (!ep->bar_to_atu[bar])
+	if (!ep->epf_bar[bar])
 		return;
 
 	__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
 
-	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
-	clear_bit(atu_index, ep->ib_window_map);
+	dw_pcie_ep_clear_ib_maps(ep, bar);
+
 	ep->epf_bar[bar] = NULL;
-	ep->bar_to_atu[bar] = 0;
 }
 
 static unsigned int dw_pcie_ep_get_rebar_offset(struct dw_pcie *pci,
@@ -408,8 +609,12 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
 	else
 		type = PCIE_ATU_TYPE_IO;
 
-	ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar,
-				     size);
+	if (epf_bar->use_submap)
+		ret = dw_pcie_ep_ib_atu_addr(ep, func_no, type, epf_bar);
+	else
+		ret = dw_pcie_ep_ib_atu_bar(ep, func_no, type,
+					    epf_bar->phys_addr, bar, size);
+
 	if (ret)
 		return ret;
 
@@ -638,6 +843,9 @@ dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
 	/* All DWC-based glue drivers support dynamic inbound mapping */
 	features->dynamic_inbound_mapping = true;
 
+	/* All DWC-based glue drivers support inbound subrange mapping */
+	features->subrange_mapping = true;
+
 	return features;
 }
 
@@ -1128,6 +1336,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 	struct device *dev = pci->dev;
 
 	INIT_LIST_HEAD(&ep->func_list);
+	INIT_LIST_HEAD(&ep->ib_map_list);
+	spin_lock_init(&ep->ib_map_lock);
 	ep->msi_iatu_mapped = false;
 	ep->msi_msg_addr = 0;
 	ep->msi_map_size = 0;
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 4dda9a38d46b..969b1f32dddf 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -479,6 +479,8 @@ struct dw_pcie_ep {
 	phys_addr_t		*outbound_addr;
 	unsigned long		*ib_window_map;
 	unsigned long		*ob_window_map;
+	struct list_head	ib_map_list;
+	spinlock_t		ib_map_lock;
 	void __iomem		*msi_mem;
 	phys_addr_t		msi_mem_phys;
 	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
-- 
2.51.0
Re: [PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Koichiro Den 3 weeks, 5 days ago
On Wed, Jan 14, 2026 at 01:27:18AM +0900, Koichiro Den wrote:
> Extend dw_pcie_ep_set_bar() to support inbound mappings for BAR
> subranges using Address Match Mode IB iATU.
> 
> Rename the existing BAR-match helper into dw_pcie_ep_ib_atu_bar() and
> introduce dw_pcie_ep_ib_atu_addr() for Address Match Mode. When
> use_submap is set, read the assigned BAR base address and program one
> inbound iATU window per subrange. Validate the submap array before
> programming:
> - each subrange is aligned to pci->region_align
> - subranges cover the whole BAR (no gaps and no overlaps)
> - subranges are sorted in ascending order by offset
> 
> Track Address Match Mode mappings and tear them down on clear_bar() and
> on set_bar() error paths to avoid leaving half-programmed state or
> untranslated BAR holes.
> 
> Advertise this capability by setting subrange_mapping in the EPC
> features returned from dw_pcie_ep_get_features().
> 
> Reviewed-by: Niklas Cassel <cassel@kernel.org>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  .../pci/controller/dwc/pcie-designware-ep.c   | 230 +++++++++++++++++-
>  drivers/pci/controller/dwc/pcie-designware.h  |   2 +
>  2 files changed, 222 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 0e5a8d200b00..b2ea2c2c986f 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -139,9 +139,10 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	return 0;
>  }
>  
> -static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
> -				  dma_addr_t parent_bus_addr, enum pci_barno bar,
> -				  size_t size)
> +/* BAR Match Mode inbound iATU mapping */
> +static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
> +				 dma_addr_t parent_bus_addr, enum pci_barno bar,
> +				 size_t size)
>  {
>  	int ret;
>  	u32 free_win;
> @@ -174,6 +175,208 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
>  	return 0;
>  }
>  
> +/* Inbound mapping bookkeeping for Address Match Mode */
> +struct dw_pcie_ib_map {
> +	struct list_head	list;
> +	enum pci_barno		bar;
> +	u64			pci_addr;
> +	u64			parent_bus_addr;
> +	u64			size;
> +	u32			index;
> +};
> +
> +static void dw_pcie_ep_clear_ib_maps(struct dw_pcie_ep *ep, enum pci_barno bar)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> +	struct dw_pcie_ib_map *m, *tmp;
> +	struct device *dev = pci->dev;
> +	u32 atu_index;
> +
> +	/* Tear down the BAR Match Mode mapping, if any. */
> +	if (ep->bar_to_atu[bar]) {
> +		atu_index = ep->bar_to_atu[bar] - 1;
> +		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
> +		clear_bit(atu_index, ep->ib_window_map);
> +		ep->bar_to_atu[bar] = 0;
> +	}
> +
> +	/* Tear down all Address Match Mode mappings, if any. */
> +	guard(spinlock_irqsave)(&ep->ib_map_lock);
> +	list_for_each_entry_safe(m, tmp, &ep->ib_map_list, list) {
> +		if (m->bar != bar)
> +			continue;
> +		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, m->index);
> +		clear_bit(m->index, ep->ib_window_map);
> +		list_del(&m->list);
> +		devm_kfree(dev, m);
> +	}
> +}

I realized that I missed one case in v7.

I think dw_pcie_ep_clear_ib_maps() should also be called from
dw_pcie_ep_ib_atu_bar() to tear down any existing inbound mappings for the
same BAR before re-programming it in BAR Match Mode.

This matters when updating inbound mappings for a BAR without resetting the
BAR in between. There are four possible transition patterns, and pattern #4
below was overlooked:

  1. BAR Match Mode -> BAR Match Mode
     As the current implementation does, the mapping is simply updated
     (with the same atu index)

  2. BAR Match Mode -> Address Match Mode
     This patch series already ensures the old BAR Match mapping is
     torn down before reprogramming.

  3. Address Match Mode -> Address Match Mode
     Likewise, existing Address Match mappings are cleared first.

  4. Address Match Mode  -> BAR Match Mode
     This case was not handled. The change below adds the missing
     teardown so that stale Address Match mappings do not remain active.

     --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
     +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
     @@ -148,9 +148,12 @@ static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
             u32 free_win;
             struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
     
     -       if (!ep->bar_to_atu[bar])
     +       if (!ep->bar_to_atu[bar]) {
     +               /* Tear down existing mappings before (re)programming. */
     +               dw_pcie_ep_clear_ib_maps(ep, bar);
     +
                     free_win = find_first_zero_bit(ep->ib_window_map,
                                                   pci->num_ib_windows);
     -       else
     +       } else
                     free_win = ep->bar_to_atu[bar] - 1;

Unless there are objections, I'll include this fix in v8.

Thanks,
Koichiro

> +
> +static u64 dw_pcie_ep_read_bar_assigned(struct dw_pcie_ep *ep, u8 func_no,
> +					enum pci_barno bar, int flags)
> +{
> +	u32 reg = PCI_BASE_ADDRESS_0 + (4 * bar);
> +	u32 lo, hi;
> +	u64 addr;
> +
> +	lo = dw_pcie_ep_readl_dbi(ep, func_no, reg);
> +
> +	if (flags & PCI_BASE_ADDRESS_SPACE)
> +		return lo & PCI_BASE_ADDRESS_IO_MASK;
> +
> +	addr = lo & PCI_BASE_ADDRESS_MEM_MASK;
> +	if (!(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
> +		return addr;
> +
> +	hi = dw_pcie_ep_readl_dbi(ep, func_no, reg + 4);
> +	return addr | ((u64)hi << 32);
> +}
> +
> +static int dw_pcie_ep_validate_submap(struct dw_pcie_ep *ep,
> +				      const struct pci_epf_bar_submap *submap,
> +				      unsigned int num_submap, size_t bar_size)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> +	u32 align = pci->region_align;
> +	size_t expected = 0;
> +	size_t size, off;
> +	unsigned int i;
> +
> +	if (!align || !IS_ALIGNED(bar_size, align))
> +		return -EINVAL;
> +
> +	/*
> +	 * The array is expected to be sorted by offset before calling this
> +	 * helper. With sorted entries, we can enforce a strict, gapless
> +	 * decomposition of the BAR:
> +	 *  - each entry has a non-zero size
> +	 *  - offset/size/phys_addr are aligned to pci->region_align
> +	 *  - each entry lies within the BAR range
> +	 *  - entries are contiguous (no overlaps, no holes)
> +	 *  - the entries exactly cover the whole BAR
> +	 *
> +	 * Note: dw_pcie_prog_inbound_atu() also checks alignment for
> +	 * offset/phys_addr, but validating up-front avoids partially
> +	 * programming iATU windows in vain.
> +	 */
> +	for (i = 0; i < num_submap; i++) {
> +		off = submap[i].offset;
> +		size = submap[i].size;
> +
> +		if (!size)
> +			return -EINVAL;
> +
> +		if (!IS_ALIGNED(size, align) || !IS_ALIGNED(off, align))
> +			return -EINVAL;
> +
> +		if (!IS_ALIGNED(submap[i].phys_addr, align))
> +			return -EINVAL;
> +
> +		if (off > bar_size || size > bar_size - off)
> +			return -EINVAL;
> +
> +		/* Enforce contiguity (no overlaps, no holes). */
> +		if (off != expected)
> +			return -EINVAL;
> +
> +		expected += size;
> +	}
> +	if (expected != bar_size)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/* Address Match Mode inbound iATU mapping */
> +static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
> +				  const struct pci_epf_bar *epf_bar)
> +{
> +	const struct pci_epf_bar_submap *submap = epf_bar->submap;
> +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> +	enum pci_barno bar = epf_bar->barno;
> +	struct device *dev = pci->dev;
> +	u64 pci_addr, parent_bus_addr;
> +	struct dw_pcie_ib_map *new;
> +	u64 size, off, base;
> +	unsigned long flags;
> +	int free_win, ret;
> +	unsigned int i;
> +
> +	if (!epf_bar->num_submap || !submap || !epf_bar->size)
> +		return -EINVAL;
> +
> +	ret = dw_pcie_ep_validate_submap(ep, submap, epf_bar->num_submap,
> +					 epf_bar->size);
> +	if (ret)
> +		return ret;
> +
> +	base = dw_pcie_ep_read_bar_assigned(ep, func_no, bar, epf_bar->flags);
> +	if (!base) {
> +		dev_err(dev,
> +			"BAR%u not assigned, cannot set up sub-range mappings\n",
> +			bar);
> +		return -EINVAL;
> +	}
> +
> +	/* Tear down any existing mappings before (re)programming. */
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +
> +	for (i = 0; i < epf_bar->num_submap; i++) {
> +		off = submap[i].offset;
> +		size = submap[i].size;
> +		parent_bus_addr = submap[i].phys_addr;
> +
> +		if (off > (~0ULL) - base) {
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +
> +		pci_addr = base + off;
> +
> +		new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
> +		if (!new) {
> +			ret = -ENOMEM;
> +			goto err;
> +		}
> +
> +		spin_lock_irqsave(&ep->ib_map_lock, flags);
> +
> +		free_win = find_first_zero_bit(ep->ib_window_map,
> +					       pci->num_ib_windows);
> +		if (free_win >= pci->num_ib_windows) {
> +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +			devm_kfree(dev, new);
> +			ret = -ENOSPC;
> +			goto err;
> +		}
> +		set_bit(free_win, ep->ib_window_map);
> +
> +		new->bar = bar;
> +		new->index = free_win;
> +		new->pci_addr = pci_addr;
> +		new->parent_bus_addr = parent_bus_addr;
> +		new->size = size;
> +		list_add_tail(&new->list, &ep->ib_map_list);
> +
> +		spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +
> +		ret = dw_pcie_prog_inbound_atu(pci, free_win, type,
> +					       parent_bus_addr, pci_addr, size);
> +		if (ret) {
> +			spin_lock_irqsave(&ep->ib_map_lock, flags);
> +			list_del(&new->list);
> +			clear_bit(free_win, ep->ib_window_map);
> +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +			devm_kfree(dev, new);
> +			goto err;
> +		}
> +	}
> +	return 0;
> +err:
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +	return ret;
> +}
> +
>  static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep,
>  				   struct dw_pcie_ob_atu_cfg *atu)
>  {
> @@ -204,17 +407,15 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>  	enum pci_barno bar = epf_bar->barno;
> -	u32 atu_index = ep->bar_to_atu[bar] - 1;
>  
> -	if (!ep->bar_to_atu[bar])
> +	if (!ep->epf_bar[bar])
>  		return;
>  
>  	__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
>  
> -	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
> -	clear_bit(atu_index, ep->ib_window_map);
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +
>  	ep->epf_bar[bar] = NULL;
> -	ep->bar_to_atu[bar] = 0;
>  }
>  
>  static unsigned int dw_pcie_ep_get_rebar_offset(struct dw_pcie *pci,
> @@ -408,8 +609,12 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	else
>  		type = PCIE_ATU_TYPE_IO;
>  
> -	ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar,
> -				     size);
> +	if (epf_bar->use_submap)
> +		ret = dw_pcie_ep_ib_atu_addr(ep, func_no, type, epf_bar);
> +	else
> +		ret = dw_pcie_ep_ib_atu_bar(ep, func_no, type,
> +					    epf_bar->phys_addr, bar, size);
> +
>  	if (ret)
>  		return ret;
>  
> @@ -638,6 +843,9 @@ dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
>  	/* All DWC-based glue drivers support dynamic inbound mapping */
>  	features->dynamic_inbound_mapping = true;
>  
> +	/* All DWC-based glue drivers support inbound subrange mapping */
> +	features->subrange_mapping = true;
> +
>  	return features;
>  }
>  
> @@ -1128,6 +1336,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>  	struct device *dev = pci->dev;
>  
>  	INIT_LIST_HEAD(&ep->func_list);
> +	INIT_LIST_HEAD(&ep->ib_map_list);
> +	spin_lock_init(&ep->ib_map_lock);
>  	ep->msi_iatu_mapped = false;
>  	ep->msi_msg_addr = 0;
>  	ep->msi_map_size = 0;
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 4dda9a38d46b..969b1f32dddf 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -479,6 +479,8 @@ struct dw_pcie_ep {
>  	phys_addr_t		*outbound_addr;
>  	unsigned long		*ib_window_map;
>  	unsigned long		*ob_window_map;
> +	struct list_head	ib_map_list;
> +	spinlock_t		ib_map_lock;
>  	void __iomem		*msi_mem;
>  	phys_addr_t		msi_mem_phys;
>  	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
> -- 
> 2.51.0
>
Re: [PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Niklas Cassel 3 weeks, 5 days ago
On Wed, Jan 14, 2026 at 12:54:37PM +0900, Koichiro Den wrote:
> I realized that I missed one case in v7.
> 
> I think dw_pcie_ep_clear_ib_maps() should also be called from
> dw_pcie_ep_ib_atu_bar() to tear down any existing inbound mappings for the
> same BAR before re-programming it in BAR Match Mode.
> 
> This matters when updating inbound mappings for a BAR without resetting the
> BAR in between. There are four possible transition patterns, and pattern #4
> below was overlooked:
> 
>   1. BAR Match Mode -> BAR Match Mode
>      As the current implementation does, the mapping is simply updated
>      (with the same atu index)
> 
>   2. BAR Match Mode -> Address Match Mode
>      This patch series already ensures the old BAR Match mapping is
>      torn down before reprogramming.
> 
>   3. Address Match Mode -> Address Match Mode
>      Likewise, existing Address Match mappings are cleared first.
> 
>   4. Address Match Mode  -> BAR Match Mode
>      This case was not handled. The change below adds the missing
>      teardown so that stale Address Match mappings do not remain active.
> 
>      --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
>      +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
>      @@ -148,9 +148,12 @@ static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
>              u32 free_win;
>              struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>      
>      -       if (!ep->bar_to_atu[bar])
>      +       if (!ep->bar_to_atu[bar]) {
>      +               /* Tear down existing mappings before (re)programming. */
>      +               dw_pcie_ep_clear_ib_maps(ep, bar);
>      +
>                      free_win = find_first_zero_bit(ep->ib_window_map,
>                                                    pci->num_ib_windows);
>      -       else
>      +       } else
>                      free_win = ep->bar_to_atu[bar] - 1;

If one of the branches has braces, both branches should have braces:
https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces


> 
> Unless there are objections, I'll include this fix in v8.

Isn't it easier/cleaner if we call dw_pcie_ep_clear_ib_maps() in
dw_pcie_ep_set_bar(), rather than calling it in both dw_pcie_ep_ib_atu_addr()
and dw_pcie_ep_ib_atu_bar() ?

dw_pcie_ep_set_bar() knows the condition if we are dynamically reprogramming
a BAR or not, and all the four cases are when dynamically reprogramming a BAR.

I.e. instead of adding additional code to dw_pcie_ep_ib_atu_bar(), we do
something like:

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index b2ea2c2c986f..63ae5471fe13 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -318,9 +318,6 @@ static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
                return -EINVAL;
        }
 
-       /* Tear down any existing mappings before (re)programming. */
-       dw_pcie_ep_clear_ib_maps(ep, bar);
-
        for (i = 0; i < epf_bar->num_submap; i++) {
                off = submap[i].offset;
                size = submap[i].size;
@@ -571,6 +568,9 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
                    ep->epf_bar[bar]->flags != flags)
                        return -EINVAL;
 
+               if (ep->epf_bar[bar]->num_submap || epf_bar->num_submap)
+                       dw_pcie_ep_clear_ib_maps(ep, bar);
+
                /*
                 * When dynamically changing a BAR, skip writing the BAR reg, as
                 * that would clear the BAR's PCI address assigned by the host.
Re: [PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Koichiro Den 3 weeks, 5 days ago
On Wed, Jan 14, 2026 at 11:39:03AM +0100, Niklas Cassel wrote:
> On Wed, Jan 14, 2026 at 12:54:37PM +0900, Koichiro Den wrote:
> > I realized that I missed one case in v7.
> > 
> > I think dw_pcie_ep_clear_ib_maps() should also be called from
> > dw_pcie_ep_ib_atu_bar() to tear down any existing inbound mappings for the
> > same BAR before re-programming it in BAR Match Mode.
> > 
> > This matters when updating inbound mappings for a BAR without resetting the
> > BAR in between. There are four possible transition patterns, and pattern #4
> > below was overlooked:
> > 
> >   1. BAR Match Mode -> BAR Match Mode
> >      As the current implementation does, the mapping is simply updated
> >      (with the same atu index)
> > 
> >   2. BAR Match Mode -> Address Match Mode
> >      This patch series already ensures the old BAR Match mapping is
> >      torn down before reprogramming.
> > 
> >   3. Address Match Mode -> Address Match Mode
> >      Likewise, existing Address Match mappings are cleared first.
> > 
> >   4. Address Match Mode  -> BAR Match Mode
> >      This case was not handled. The change below adds the missing
> >      teardown so that stale Address Match mappings do not remain active.
> > 
> >      --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> >      +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> >      @@ -148,9 +148,12 @@ static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
> >              u32 free_win;
> >              struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> >      
> >      -       if (!ep->bar_to_atu[bar])
> >      +       if (!ep->bar_to_atu[bar]) {
> >      +               /* Tear down existing mappings before (re)programming. */
> >      +               dw_pcie_ep_clear_ib_maps(ep, bar);
> >      +
> >                      free_win = find_first_zero_bit(ep->ib_window_map,
> >                                                    pci->num_ib_windows);
> >      -       else
> >      +       } else
> >                      free_win = ep->bar_to_atu[bar] - 1;
> 
> If one of the branches has braces, both branches should have braces:
> https://www.kernel.org/doc/html/latest/process/coding-style.html#placing-braces-and-spaces
> 
> 
> > 
> > Unless there are objections, I'll include this fix in v8.
> 
> Isn't it easier/cleaner if we call dw_pcie_ep_clear_ib_maps() in
> dw_pcie_ep_set_bar(), rather than calling it in both dw_pcie_ep_ib_atu_addr()
> and dw_pcie_ep_ib_atu_bar() ?
> 
> dw_pcie_ep_set_bar() knows the condition if we are dynamically reprogramming
> a BAR or not, and all the four cases are when dynamically reprogramming a BAR.
> 
> I.e. instead of adding additional code to dw_pcie_ep_ib_atu_bar(), we do
> something like:
> 
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index b2ea2c2c986f..63ae5471fe13 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -318,9 +318,6 @@ static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
>                 return -EINVAL;
>         }
>  
> -       /* Tear down any existing mappings before (re)programming. */
> -       dw_pcie_ep_clear_ib_maps(ep, bar);
> -
>         for (i = 0; i < epf_bar->num_submap; i++) {
>                 off = submap[i].offset;
>                 size = submap[i].size;
> @@ -571,6 +568,9 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>                     ep->epf_bar[bar]->flags != flags)
>                         return -EINVAL;
>  
> +               if (ep->epf_bar[bar]->num_submap || epf_bar->num_submap)
> +                       dw_pcie_ep_clear_ib_maps(ep, bar);
> +
>                 /*
>                  * When dynamically changing a BAR, skip writing the BAR reg, as
>                  * that would clear the BAR's PCI address assigned by the host.
> 

For pattern #2 and #3 (ie. either mode -> Address Match Mode), the v7 code
withholds the dw_pcie_ep_clear_ib_maps() call unless the submap validation
passes. The above patch differs slightly in that sense, but I agree it
looks much simpler. I don't think the difference matters much, since
pci_epc_set_bar() with an invalid submap should already indicate that
something has gone wrong (most likely a bug in the API call site). So I
think I'll go with your suggestion.

Thanks!
Koichiro

> 
>
Re: [PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Frank Li 3 weeks, 6 days ago
On Wed, Jan 14, 2026 at 01:27:18AM +0900, Koichiro Den wrote:
> Extend dw_pcie_ep_set_bar() to support inbound mappings for BAR
> subranges using Address Match Mode IB iATU.
>
> Rename the existing BAR-match helper into dw_pcie_ep_ib_atu_bar() and
> introduce dw_pcie_ep_ib_atu_addr() for Address Match Mode. When
> use_submap is set, read the assigned BAR base address and program one
> inbound iATU window per subrange. Validate the submap array before
> programming:
> - each subrange is aligned to pci->region_align
> - subranges cover the whole BAR (no gaps and no overlaps)
> - subranges are sorted in ascending order by offset
>
> Track Address Match Mode mappings and tear them down on clear_bar() and
> on set_bar() error paths to avoid leaving half-programmed state or
> untranslated BAR holes.
>
> Advertise this capability by setting subrange_mapping in the EPC
> features returned from dw_pcie_ep_get_features().
>
> Reviewed-by: Niklas Cassel <cassel@kernel.org>
> Signed-off-by: Koichiro Den <den@valinux.co.jp>
> ---
>  .../pci/controller/dwc/pcie-designware-ep.c   | 230 +++++++++++++++++-
>  drivers/pci/controller/dwc/pcie-designware.h  |   2 +
>  2 files changed, 222 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> index 0e5a8d200b00..b2ea2c2c986f 100644
> --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> @@ -139,9 +139,10 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	return 0;
>  }
>
> -static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
> -				  dma_addr_t parent_bus_addr, enum pci_barno bar,
> -				  size_t size)
> +/* BAR Match Mode inbound iATU mapping */
> +static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
> +				 dma_addr_t parent_bus_addr, enum pci_barno bar,
> +				 size_t size)
>  {
>  	int ret;
>  	u32 free_win;
> @@ -174,6 +175,208 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
>  	return 0;
>  }
>
...
> +static int dw_pcie_ep_validate_submap(struct dw_pcie_ep *ep,
> +				      const struct pci_epf_bar_submap *submap,
> +				      unsigned int num_submap, size_t bar_size)
> +{
> +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> +	u32 align = pci->region_align;
> +	size_t expected = 0;
> +	size_t size, off;
> +	unsigned int i;
> +
> +	if (!align || !IS_ALIGNED(bar_size, align))
> +		return -EINVAL;
> +
> +	/*
> +	 * The array is expected to be sorted by offset before calling this
> +	 * helper. With sorted entries, we can enforce a strict, gapless
> +	 * decomposition of the BAR:
> +	 *  - each entry has a non-zero size
> +	 *  - offset/size/phys_addr are aligned to pci->region_align
> +	 *  - each entry lies within the BAR range
> +	 *  - entries are contiguous (no overlaps, no holes)
> +	 *  - the entries exactly cover the whole BAR
> +	 *
> +	 * Note: dw_pcie_prog_inbound_atu() also checks alignment for
> +	 * offset/phys_addr, but validating up-front avoids partially
> +	 * programming iATU windows in vain.
> +	 */
> +	for (i = 0; i < num_submap; i++) {
> +		off = submap[i].offset;
> +		size = submap[i].size;
> +
> +		if (!size)
> +			return -EINVAL;
> +
> +		if (!IS_ALIGNED(size, align) || !IS_ALIGNED(off, align))
> +			return -EINVAL;
> +
> +		if (!IS_ALIGNED(submap[i].phys_addr, align))
> +			return -EINVAL;
> +
> +		if (off > bar_size || size > bar_size - off)
> +			return -EINVAL;
> +
> +		/* Enforce contiguity (no overlaps, no holes). */
> +		if (off != expected)
> +			return -EINVAL;

submap[i].offset is unnecessary, you can use expected += size as off.
code logic will be simple.

Frank
> +
> +		expected += size;
> +	}
> +	if (expected != bar_size)
> +		return -EINVAL;
> +
> +	return 0;
> +}
> +
> +/* Address Match Mode inbound iATU mapping */
> +static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
> +				  const struct pci_epf_bar *epf_bar)
> +{
> +	const struct pci_epf_bar_submap *submap = epf_bar->submap;
> +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> +	enum pci_barno bar = epf_bar->barno;
> +	struct device *dev = pci->dev;
> +	u64 pci_addr, parent_bus_addr;
> +	struct dw_pcie_ib_map *new;
> +	u64 size, off, base;
> +	unsigned long flags;
> +	int free_win, ret;
> +	unsigned int i;
> +
> +	if (!epf_bar->num_submap || !submap || !epf_bar->size)
> +		return -EINVAL;
> +
> +	ret = dw_pcie_ep_validate_submap(ep, submap, epf_bar->num_submap,
> +					 epf_bar->size);
> +	if (ret)
> +		return ret;
> +
> +	base = dw_pcie_ep_read_bar_assigned(ep, func_no, bar, epf_bar->flags);
> +	if (!base) {
> +		dev_err(dev,
> +			"BAR%u not assigned, cannot set up sub-range mappings\n",
> +			bar);
> +		return -EINVAL;
> +	}
> +
> +	/* Tear down any existing mappings before (re)programming. */
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +
> +	for (i = 0; i < epf_bar->num_submap; i++) {
> +		off = submap[i].offset;
> +		size = submap[i].size;
> +		parent_bus_addr = submap[i].phys_addr;
> +
> +		if (off > (~0ULL) - base) {
> +			ret = -EINVAL;
> +			goto err;
> +		}
> +
> +		pci_addr = base + off;
> +
> +		new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
> +		if (!new) {
> +			ret = -ENOMEM;
> +			goto err;
> +		}

Simple alloc an array struct dw_pcie_ib_map[num_submap] should be simpler
than link list and alloc some small news.

Frank
> +
> +		spin_lock_irqsave(&ep->ib_map_lock, flags);
> +
> +		free_win = find_first_zero_bit(ep->ib_window_map,
> +					       pci->num_ib_windows);
> +		if (free_win >= pci->num_ib_windows) {
> +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +			devm_kfree(dev, new);
> +			ret = -ENOSPC;
> +			goto err;
> +		}
> +		set_bit(free_win, ep->ib_window_map);
> +
> +		new->bar = bar;
> +		new->index = free_win;
> +		new->pci_addr = pci_addr;
> +		new->parent_bus_addr = parent_bus_addr;
> +		new->size = size;
> +		list_add_tail(&new->list, &ep->ib_map_list);
> +
> +		spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +
> +		ret = dw_pcie_prog_inbound_atu(pci, free_win, type,
> +					       parent_bus_addr, pci_addr, size);
> +		if (ret) {
> +			spin_lock_irqsave(&ep->ib_map_lock, flags);
> +			list_del(&new->list);
> +			clear_bit(free_win, ep->ib_window_map);
> +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> +			devm_kfree(dev, new);
> +			goto err;
> +		}
> +	}
> +	return 0;
> +err:
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +	return ret;
> +}
> +
>  static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep,
>  				   struct dw_pcie_ob_atu_cfg *atu)
>  {
> @@ -204,17 +407,15 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
>  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
>  	enum pci_barno bar = epf_bar->barno;
> -	u32 atu_index = ep->bar_to_atu[bar] - 1;
>
> -	if (!ep->bar_to_atu[bar])
> +	if (!ep->epf_bar[bar])
>  		return;
>
>  	__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
>
> -	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
> -	clear_bit(atu_index, ep->ib_window_map);
> +	dw_pcie_ep_clear_ib_maps(ep, bar);
> +
>  	ep->epf_bar[bar] = NULL;
> -	ep->bar_to_atu[bar] = 0;
>  }
>
>  static unsigned int dw_pcie_ep_get_rebar_offset(struct dw_pcie *pci,
> @@ -408,8 +609,12 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
>  	else
>  		type = PCIE_ATU_TYPE_IO;
>
> -	ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar,
> -				     size);
> +	if (epf_bar->use_submap)
> +		ret = dw_pcie_ep_ib_atu_addr(ep, func_no, type, epf_bar);
> +	else
> +		ret = dw_pcie_ep_ib_atu_bar(ep, func_no, type,
> +					    epf_bar->phys_addr, bar, size);
> +
>  	if (ret)
>  		return ret;
>
> @@ -638,6 +843,9 @@ dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
>  	/* All DWC-based glue drivers support dynamic inbound mapping */
>  	features->dynamic_inbound_mapping = true;
>
> +	/* All DWC-based glue drivers support inbound subrange mapping */
> +	features->subrange_mapping = true;
> +
>  	return features;
>  }
>
> @@ -1128,6 +1336,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
>  	struct device *dev = pci->dev;
>
>  	INIT_LIST_HEAD(&ep->func_list);
> +	INIT_LIST_HEAD(&ep->ib_map_list);
> +	spin_lock_init(&ep->ib_map_lock);
>  	ep->msi_iatu_mapped = false;
>  	ep->msi_msg_addr = 0;
>  	ep->msi_map_size = 0;
> diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> index 4dda9a38d46b..969b1f32dddf 100644
> --- a/drivers/pci/controller/dwc/pcie-designware.h
> +++ b/drivers/pci/controller/dwc/pcie-designware.h
> @@ -479,6 +479,8 @@ struct dw_pcie_ep {
>  	phys_addr_t		*outbound_addr;
>  	unsigned long		*ib_window_map;
>  	unsigned long		*ob_window_map;
> +	struct list_head	ib_map_list;
> +	spinlock_t		ib_map_lock;
>  	void __iomem		*msi_mem;
>  	phys_addr_t		msi_mem_phys;
>  	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
> --
> 2.51.0
>
Re: [PATCH v7 5/6] PCI: dwc: ep: Support BAR subrange inbound mapping via Address Match Mode iATU
Posted by Koichiro Den 3 weeks, 4 days ago
On Tue, Jan 13, 2026 at 03:53:52PM -0500, Frank Li wrote:
> On Wed, Jan 14, 2026 at 01:27:18AM +0900, Koichiro Den wrote:
> > Extend dw_pcie_ep_set_bar() to support inbound mappings for BAR
> > subranges using Address Match Mode IB iATU.
> >
> > Rename the existing BAR-match helper into dw_pcie_ep_ib_atu_bar() and
> > introduce dw_pcie_ep_ib_atu_addr() for Address Match Mode. When
> > use_submap is set, read the assigned BAR base address and program one
> > inbound iATU window per subrange. Validate the submap array before
> > programming:
> > - each subrange is aligned to pci->region_align
> > - subranges cover the whole BAR (no gaps and no overlaps)
> > - subranges are sorted in ascending order by offset
> >
> > Track Address Match Mode mappings and tear them down on clear_bar() and
> > on set_bar() error paths to avoid leaving half-programmed state or
> > untranslated BAR holes.
> >
> > Advertise this capability by setting subrange_mapping in the EPC
> > features returned from dw_pcie_ep_get_features().
> >
> > Reviewed-by: Niklas Cassel <cassel@kernel.org>
> > Signed-off-by: Koichiro Den <den@valinux.co.jp>
> > ---
> >  .../pci/controller/dwc/pcie-designware-ep.c   | 230 +++++++++++++++++-
> >  drivers/pci/controller/dwc/pcie-designware.h  |   2 +
> >  2 files changed, 222 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > index 0e5a8d200b00..b2ea2c2c986f 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware-ep.c
> > +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
> > @@ -139,9 +139,10 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
> >  	return 0;
> >  }
> >
> > -static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
> > -				  dma_addr_t parent_bus_addr, enum pci_barno bar,
> > -				  size_t size)
> > +/* BAR Match Mode inbound iATU mapping */
> > +static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
> > +				 dma_addr_t parent_bus_addr, enum pci_barno bar,
> > +				 size_t size)
> >  {
> >  	int ret;
> >  	u32 free_win;
> > @@ -174,6 +175,208 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
> >  	return 0;
> >  }
> >
> ...
> > +static int dw_pcie_ep_validate_submap(struct dw_pcie_ep *ep,
> > +				      const struct pci_epf_bar_submap *submap,
> > +				      unsigned int num_submap, size_t bar_size)
> > +{
> > +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +	u32 align = pci->region_align;
> > +	size_t expected = 0;
> > +	size_t size, off;
> > +	unsigned int i;
> > +
> > +	if (!align || !IS_ALIGNED(bar_size, align))
> > +		return -EINVAL;
> > +
> > +	/*
> > +	 * The array is expected to be sorted by offset before calling this
> > +	 * helper. With sorted entries, we can enforce a strict, gapless
> > +	 * decomposition of the BAR:
> > +	 *  - each entry has a non-zero size
> > +	 *  - offset/size/phys_addr are aligned to pci->region_align
> > +	 *  - each entry lies within the BAR range
> > +	 *  - entries are contiguous (no overlaps, no holes)
> > +	 *  - the entries exactly cover the whole BAR
> > +	 *
> > +	 * Note: dw_pcie_prog_inbound_atu() also checks alignment for
> > +	 * offset/phys_addr, but validating up-front avoids partially
> > +	 * programming iATU windows in vain.
> > +	 */
> > +	for (i = 0; i < num_submap; i++) {
> > +		off = submap[i].offset;
> > +		size = submap[i].size;
> > +
> > +		if (!size)
> > +			return -EINVAL;
> > +
> > +		if (!IS_ALIGNED(size, align) || !IS_ALIGNED(off, align))
> > +			return -EINVAL;
> > +
> > +		if (!IS_ALIGNED(submap[i].phys_addr, align))
> > +			return -EINVAL;
> > +
> > +		if (off > bar_size || size > bar_size - off)
> > +			return -EINVAL;
> > +
> > +		/* Enforce contiguity (no overlaps, no holes). */
> > +		if (off != expected)
> > +			return -EINVAL;
> 
> submap[i].offset is unnecessary, you can use expected += size as off.
> code logic will be simple.

Will fix this. as per my earlier response:
https://lore.kernel.org/all/ngvqrju3bi6sugynhksxsci6rmgqevzpoijjflp2373c6uxlum@vyepxqghbzvn/

> 
> Frank
> > +
> > +		expected += size;
> > +	}
> > +	if (expected != bar_size)
> > +		return -EINVAL;
> > +
> > +	return 0;
> > +}
> > +
> > +/* Address Match Mode inbound iATU mapping */
> > +static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
> > +				  const struct pci_epf_bar *epf_bar)
> > +{
> > +	const struct pci_epf_bar_submap *submap = epf_bar->submap;
> > +	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > +	enum pci_barno bar = epf_bar->barno;
> > +	struct device *dev = pci->dev;
> > +	u64 pci_addr, parent_bus_addr;
> > +	struct dw_pcie_ib_map *new;
> > +	u64 size, off, base;
> > +	unsigned long flags;
> > +	int free_win, ret;
> > +	unsigned int i;
> > +
> > +	if (!epf_bar->num_submap || !submap || !epf_bar->size)
> > +		return -EINVAL;
> > +
> > +	ret = dw_pcie_ep_validate_submap(ep, submap, epf_bar->num_submap,
> > +					 epf_bar->size);
> > +	if (ret)
> > +		return ret;
> > +
> > +	base = dw_pcie_ep_read_bar_assigned(ep, func_no, bar, epf_bar->flags);
> > +	if (!base) {
> > +		dev_err(dev,
> > +			"BAR%u not assigned, cannot set up sub-range mappings\n",
> > +			bar);
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* Tear down any existing mappings before (re)programming. */
> > +	dw_pcie_ep_clear_ib_maps(ep, bar);
> > +
> > +	for (i = 0; i < epf_bar->num_submap; i++) {
> > +		off = submap[i].offset;
> > +		size = submap[i].size;
> > +		parent_bus_addr = submap[i].phys_addr;
> > +
> > +		if (off > (~0ULL) - base) {
> > +			ret = -EINVAL;
> > +			goto err;
> > +		}
> > +
> > +		pci_addr = base + off;
> > +
> > +		new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
> > +		if (!new) {
> > +			ret = -ENOMEM;
> > +			goto err;
> > +		}
> 
> Simple alloc an array struct dw_pcie_ib_map[num_submap] should be simpler
> than link list and alloc some small news.

I'll do so in v8. Thank you for the review!

Koichiro

> 
> Frank
> > +
> > +		spin_lock_irqsave(&ep->ib_map_lock, flags);
> > +
> > +		free_win = find_first_zero_bit(ep->ib_window_map,
> > +					       pci->num_ib_windows);
> > +		if (free_win >= pci->num_ib_windows) {
> > +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> > +			devm_kfree(dev, new);
> > +			ret = -ENOSPC;
> > +			goto err;
> > +		}
> > +		set_bit(free_win, ep->ib_window_map);
> > +
> > +		new->bar = bar;
> > +		new->index = free_win;
> > +		new->pci_addr = pci_addr;
> > +		new->parent_bus_addr = parent_bus_addr;
> > +		new->size = size;
> > +		list_add_tail(&new->list, &ep->ib_map_list);
> > +
> > +		spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> > +
> > +		ret = dw_pcie_prog_inbound_atu(pci, free_win, type,
> > +					       parent_bus_addr, pci_addr, size);
> > +		if (ret) {
> > +			spin_lock_irqsave(&ep->ib_map_lock, flags);
> > +			list_del(&new->list);
> > +			clear_bit(free_win, ep->ib_window_map);
> > +			spin_unlock_irqrestore(&ep->ib_map_lock, flags);
> > +			devm_kfree(dev, new);
> > +			goto err;
> > +		}
> > +	}
> > +	return 0;
> > +err:
> > +	dw_pcie_ep_clear_ib_maps(ep, bar);
> > +	return ret;
> > +}
> > +
> >  static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep,
> >  				   struct dw_pcie_ob_atu_cfg *atu)
> >  {
> > @@ -204,17 +407,15 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
> >  	struct dw_pcie_ep *ep = epc_get_drvdata(epc);
> >  	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> >  	enum pci_barno bar = epf_bar->barno;
> > -	u32 atu_index = ep->bar_to_atu[bar] - 1;
> >
> > -	if (!ep->bar_to_atu[bar])
> > +	if (!ep->epf_bar[bar])
> >  		return;
> >
> >  	__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
> >
> > -	dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
> > -	clear_bit(atu_index, ep->ib_window_map);
> > +	dw_pcie_ep_clear_ib_maps(ep, bar);
> > +
> >  	ep->epf_bar[bar] = NULL;
> > -	ep->bar_to_atu[bar] = 0;
> >  }
> >
> >  static unsigned int dw_pcie_ep_get_rebar_offset(struct dw_pcie *pci,
> > @@ -408,8 +609,12 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
> >  	else
> >  		type = PCIE_ATU_TYPE_IO;
> >
> > -	ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar,
> > -				     size);
> > +	if (epf_bar->use_submap)
> > +		ret = dw_pcie_ep_ib_atu_addr(ep, func_no, type, epf_bar);
> > +	else
> > +		ret = dw_pcie_ep_ib_atu_bar(ep, func_no, type,
> > +					    epf_bar->phys_addr, bar, size);
> > +
> >  	if (ret)
> >  		return ret;
> >
> > @@ -638,6 +843,9 @@ dw_pcie_ep_get_features(struct pci_epc *epc, u8 func_no, u8 vfunc_no)
> >  	/* All DWC-based glue drivers support dynamic inbound mapping */
> >  	features->dynamic_inbound_mapping = true;
> >
> > +	/* All DWC-based glue drivers support inbound subrange mapping */
> > +	features->subrange_mapping = true;
> > +
> >  	return features;
> >  }
> >
> > @@ -1128,6 +1336,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
> >  	struct device *dev = pci->dev;
> >
> >  	INIT_LIST_HEAD(&ep->func_list);
> > +	INIT_LIST_HEAD(&ep->ib_map_list);
> > +	spin_lock_init(&ep->ib_map_lock);
> >  	ep->msi_iatu_mapped = false;
> >  	ep->msi_msg_addr = 0;
> >  	ep->msi_map_size = 0;
> > diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
> > index 4dda9a38d46b..969b1f32dddf 100644
> > --- a/drivers/pci/controller/dwc/pcie-designware.h
> > +++ b/drivers/pci/controller/dwc/pcie-designware.h
> > @@ -479,6 +479,8 @@ struct dw_pcie_ep {
> >  	phys_addr_t		*outbound_addr;
> >  	unsigned long		*ib_window_map;
> >  	unsigned long		*ob_window_map;
> > +	struct list_head	ib_map_list;
> > +	spinlock_t		ib_map_lock;
> >  	void __iomem		*msi_mem;
> >  	phys_addr_t		msi_mem_phys;
> >  	struct pci_epf_bar	*epf_bar[PCI_STD_NUM_BARS];
> > --
> > 2.51.0
> >