Extend dw_pcie_ep_set_bar() to support inbound mappings for BAR
subranges using Address Match Mode IB iATU.
Rename the existing BAR-match helper to dw_pcie_ep_ib_atu_bar() and
introduce dw_pcie_ep_ib_atu_addr() for Address Match Mode. When
use_submap is set, read the assigned BAR base address and program one
inbound iATU window per subrange. Validate the submap array before
programming:
- each subrange is aligned to pci->region_align
- subranges cover the whole BAR (no gaps and no overlaps)
Track address-match mappings and tear them down on clear_bar() and on
set_bar() error paths to avoid leaving half-programmed state or untranslated
BAR holes.
Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
.../pci/controller/dwc/pcie-designware-ep.c | 255 +++++++++++++++++-
drivers/pci/controller/dwc/pcie-designware.h | 2 +
2 files changed, 246 insertions(+), 11 deletions(-)
diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 1195d401df19..466f416694dd 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -8,8 +8,10 @@
#include <linux/align.h>
#include <linux/bitfield.h>
+#include <linux/cleanup.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/sort.h>
#include "pcie-designware.h"
#include <linux/pci-epc.h>
@@ -139,9 +141,10 @@ static int dw_pcie_ep_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
return 0;
}
-static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
- dma_addr_t parent_bus_addr, enum pci_barno bar,
- size_t size)
+/* Bar match mode inbound iATU mapping */
+static int dw_pcie_ep_ib_atu_bar(struct dw_pcie_ep *ep, u8 func_no, int type,
+ dma_addr_t parent_bus_addr, enum pci_barno bar,
+ size_t size)
{
int ret;
u32 free_win;
@@ -174,6 +177,229 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
return 0;
}
+/* Inbound mapping bookkeeping for address-match mode */
+struct dw_pcie_ib_map {
+ struct list_head list;
+ enum pci_barno bar;
+ u64 pci_addr;
+ u64 parent_bus_addr;
+ u64 size;
+ u32 index;
+};
+
+static void dw_pcie_ep_clear_ib_maps(struct dw_pcie_ep *ep, enum pci_barno bar)
+{
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+ struct dw_pcie_ib_map *m, *tmp;
+ struct device *dev = pci->dev;
+ u32 atu_index;
+
+ /* Tear down the BAR match-mode mapping, if any. */
+ if (ep->bar_to_atu[bar]) {
+ atu_index = ep->bar_to_atu[bar] - 1;
+ dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
+ clear_bit(atu_index, ep->ib_window_map);
+ ep->bar_to_atu[bar] = 0;
+ }
+
+ /* Tear down all address match-mode mappings, if any */
+ guard(spinlock_irqsave)(&ep->ib_map_lock);
+ list_for_each_entry_safe(m, tmp, &ep->ib_map_list, list) {
+ if (m->bar != bar)
+ continue;
+ dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, m->index);
+ clear_bit(m->index, ep->ib_window_map);
+ list_del(&m->list);
+ devm_kfree(dev, m);
+ }
+}
+
+static u64 dw_pcie_ep_read_bar_assigned(struct dw_pcie_ep *ep, u8 func_no,
+ enum pci_barno bar, int flags)
+{
+ u32 reg = PCI_BASE_ADDRESS_0 + (4 * bar);
+ u32 lo, hi;
+ u64 addr;
+
+ lo = dw_pcie_ep_readl_dbi(ep, func_no, reg);
+
+ if (flags & PCI_BASE_ADDRESS_SPACE)
+ return lo & PCI_BASE_ADDRESS_IO_MASK;
+
+ addr = lo & PCI_BASE_ADDRESS_MEM_MASK;
+ if (!(flags & PCI_BASE_ADDRESS_MEM_TYPE_64))
+ return addr;
+
+ hi = dw_pcie_ep_readl_dbi(ep, func_no, reg + 4);
+ return addr | ((u64)hi << 32);
+}
+
+static int dw_pcie_ep_submap_offset_cmp(const void *a, const void *b)
+{
+ const struct pci_epf_bar_submap *sa = a;
+ const struct pci_epf_bar_submap *sb = b;
+
+ if (sa->offset < sb->offset)
+ return -1;
+ if (sa->offset > sb->offset)
+ return 1;
+ return 0;
+}
+
+static int dw_pcie_ep_validate_submap(struct dw_pcie_ep *ep,
+ struct pci_epf_bar_submap *smap,
+ unsigned int num_submap, size_t bar_size)
+{
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+ u32 align = pci->region_align;
+ size_t expected = 0;
+ size_t size, off;
+ unsigned int i;
+
+ if (!align || !IS_ALIGNED(bar_size, align))
+ return -EINVAL;
+
+ /*
+ * The array is expected to be sorted by offset before calling this
+ * helper. With sorted entries, we can enforce a strict, gapless
+ * decomposition of the BAR:
+ * - each entry has a non-zero size
+ * - offset/size/phys_addr are aligned to pci->region_align
+ * - each entry lies within the BAR range
+ * - entries are contiguous (no overlaps, no holes)
+ * - the entries exactly cover the whole BAR
+ *
+ * Note: dw_pcie_prog_inbound_atu() also checks alignment for
+ * offset/phys_addr, but validating up-front avoids partially
+ * programming iATU windows in vain.
+ */
+ for (i = 0; i < num_submap; i++) {
+ off = smap[i].offset;
+ size = smap[i].size;
+
+ if (!size)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(size, align) || !IS_ALIGNED(off, align))
+ return -EINVAL;
+
+ if (!IS_ALIGNED(smap[i].phys_addr, align))
+ return -EINVAL;
+
+ if (off > bar_size || size > bar_size - off)
+ return -EINVAL;
+
+ /* Enforce contiguity (no overlaps, no holes). */
+ if (off != expected)
+ return -EINVAL;
+
+ expected += size;
+ }
+ if (expected != bar_size)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Address Match Mode IB iATU mapping */
+static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
+ const struct pci_epf_bar *epf_bar)
+{
+ struct pci_epf_bar_submap *submap = epf_bar->submap;
+ struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
+ enum pci_barno bar = epf_bar->barno;
+ struct device *dev = pci->dev;
+ u64 pci_addr, parent_bus_addr;
+ struct dw_pcie_ib_map *new;
+ u64 size, off, base;
+ unsigned long flags;
+ int free_win, ret;
+ unsigned int i;
+
+ if (!epf_bar->num_submap || !submap || !epf_bar->size)
+ return -EINVAL;
+
+ /* Work on a sorted copy */
+ struct pci_epf_bar_submap *smap __free(kfree) = kcalloc(
+ epf_bar->num_submap, sizeof(*smap), GFP_KERNEL);
+ if (!smap)
+ return -ENOMEM;
+
+ memcpy(smap, submap, epf_bar->num_submap * sizeof(*smap));
+ sort(smap, epf_bar->num_submap, sizeof(*smap),
+ dw_pcie_ep_submap_offset_cmp, NULL);
+
+ ret = dw_pcie_ep_validate_submap(ep, smap, epf_bar->num_submap, epf_bar->size);
+ if (ret)
+ return ret;
+
+ base = dw_pcie_ep_read_bar_assigned(ep, func_no, bar, epf_bar->flags);
+ if (!base) {
+ dev_err(dev,
+ "BAR%u not assigned, cannot set up sub-range mappings\n",
+ bar);
+ return -EINVAL;
+ }
+
+ /* Tear down any existing mappings before (re)programming. */
+ dw_pcie_ep_clear_ib_maps(ep, bar);
+
+ for (i = 0; i < epf_bar->num_submap; i++) {
+ off = smap[i].offset;
+ size = smap[i].size;
+ parent_bus_addr = smap[i].phys_addr;
+
+ if (off > (~0ULL) - base) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ pci_addr = base + off;
+
+ new = devm_kzalloc(dev, sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ spin_lock_irqsave(&ep->ib_map_lock, flags);
+
+ free_win = find_first_zero_bit(ep->ib_window_map,
+ pci->num_ib_windows);
+ if (free_win >= pci->num_ib_windows) {
+ spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+ devm_kfree(dev, new);
+ ret = -ENOSPC;
+ goto err;
+ }
+ set_bit(free_win, ep->ib_window_map);
+
+ new->bar = bar;
+ new->index = free_win;
+ new->pci_addr = pci_addr;
+ new->parent_bus_addr = parent_bus_addr;
+ new->size = size;
+ list_add_tail(&new->list, &ep->ib_map_list);
+
+ spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+
+ ret = dw_pcie_prog_inbound_atu(pci, free_win, type,
+ parent_bus_addr, pci_addr, size);
+ if (ret) {
+ spin_lock_irqsave(&ep->ib_map_lock, flags);
+ list_del(&new->list);
+ clear_bit(free_win, ep->ib_window_map);
+ spin_unlock_irqrestore(&ep->ib_map_lock, flags);
+ devm_kfree(dev, new);
+ goto err;
+ }
+ }
+ return 0;
+err:
+ dw_pcie_ep_clear_ib_maps(ep, bar);
+ return ret;
+}
+
static int dw_pcie_ep_outbound_atu(struct dw_pcie_ep *ep,
struct dw_pcie_ob_atu_cfg *atu)
{
@@ -204,17 +430,15 @@ static void dw_pcie_ep_clear_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
struct dw_pcie_ep *ep = epc_get_drvdata(epc);
struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
enum pci_barno bar = epf_bar->barno;
- u32 atu_index = ep->bar_to_atu[bar] - 1;
- if (!ep->bar_to_atu[bar])
+ if (!ep->epf_bar[bar])
return;
__dw_pcie_ep_reset_bar(pci, func_no, bar, epf_bar->flags);
- dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, atu_index);
- clear_bit(atu_index, ep->ib_window_map);
+ dw_pcie_ep_clear_ib_maps(ep, bar);
+
ep->epf_bar[bar] = NULL;
- ep->bar_to_atu[bar] = 0;
}
static unsigned int dw_pcie_ep_get_rebar_offset(struct dw_pcie *pci,
@@ -408,10 +632,17 @@ static int dw_pcie_ep_set_bar(struct pci_epc *epc, u8 func_no, u8 vfunc_no,
else
type = PCIE_ATU_TYPE_IO;
- ret = dw_pcie_ep_inbound_atu(ep, func_no, type, epf_bar->phys_addr, bar,
- size);
- if (ret)
+ if (epf_bar->use_submap)
+ ret = dw_pcie_ep_ib_atu_addr(ep, func_no, type, epf_bar);
+ else
+ ret = dw_pcie_ep_ib_atu_bar(ep, func_no, type,
+ epf_bar->phys_addr, bar, size);
+
+ if (ret) {
+ if (epf_bar->use_submap)
+ dw_pcie_ep_clear_bar(epc, func_no, vfunc_no, epf_bar);
return ret;
+ }
ep->epf_bar[bar] = epf_bar;
@@ -1120,6 +1351,8 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
struct device *dev = pci->dev;
INIT_LIST_HEAD(&ep->func_list);
+ INIT_LIST_HEAD(&ep->ib_map_list);
+ spin_lock_init(&ep->ib_map_lock);
ep->msi_iatu_mapped = false;
ep->msi_msg_addr = 0;
ep->msi_map_size = 0;
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index f87c67a7a482..1ebe8a9ee139 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -479,6 +479,8 @@ struct dw_pcie_ep {
phys_addr_t *outbound_addr;
unsigned long *ib_window_map;
unsigned long *ob_window_map;
+ struct list_head ib_map_list;
+ spinlock_t ib_map_lock;
void __iomem *msi_mem;
phys_addr_t msi_mem_phys;
struct pci_epf_bar *epf_bar[PCI_STD_NUM_BARS];
--
2.51.0
Hello Koichiro,
I like this design way more, where you have a one-shot (all-or-nothing)
submap programming to avoid leaving half-programmed BAR state.
On Wed, Jan 07, 2026 at 01:13:58PM +0900, Koichiro Den wrote:
> +/* Address Match Mode IB iATU mapping */
> +static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
> + const struct pci_epf_bar *epf_bar)
> +{
> + struct pci_epf_bar_submap *submap = epf_bar->submap;
> + struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> + enum pci_barno bar = epf_bar->barno;
> + struct device *dev = pci->dev;
> + u64 pci_addr, parent_bus_addr;
> + struct dw_pcie_ib_map *new;
> + u64 size, off, base;
> + unsigned long flags;
> + int free_win, ret;
> + unsigned int i;
> +
> + if (!epf_bar->num_submap || !submap || !epf_bar->size)
> + return -EINVAL;
> +
> + /* Work on a sorted copy */
> + struct pci_epf_bar_submap *smap __free(kfree) = kcalloc(
> + epf_bar->num_submap, sizeof(*smap), GFP_KERNEL);
> + if (!smap)
> + return -ENOMEM;
> +
> + memcpy(smap, submap, epf_bar->num_submap * sizeof(*smap));
> + sort(smap, epf_bar->num_submap, sizeof(*smap),
> + dw_pcie_ep_submap_offset_cmp, NULL);
My only comment is that:
Why not simply let dw_pcie_ep_validate_submap() return an error if the
caller of dw_pcie_ep_set_bar() did not provide a submap with offsets in
ascending order (i.e. sorted).
Performing an unconditional sort of the submap here looks a bit out of
place, IMO.
> +
> + ret = dw_pcie_ep_validate_submap(ep, smap, epf_bar->num_submap, epf_bar->size);
> + if (ret)
> + return ret;
Kind regards,
Niklas
On Wed, Jan 07, 2026 at 03:27:28PM +0100, Niklas Cassel wrote:
> Hello Koichiro,
>
>
> I like this design way more, where you have a one-shot (all-or-nothing)
> submap programming to avoid leaving half-programmed BAR state.
>
>
> On Wed, Jan 07, 2026 at 01:13:58PM +0900, Koichiro Den wrote:
> > +/* Address Match Mode IB iATU mapping */
> > +static int dw_pcie_ep_ib_atu_addr(struct dw_pcie_ep *ep, u8 func_no, int type,
> > + const struct pci_epf_bar *epf_bar)
> > +{
> > + struct pci_epf_bar_submap *submap = epf_bar->submap;
> > + struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
> > + enum pci_barno bar = epf_bar->barno;
> > + struct device *dev = pci->dev;
> > + u64 pci_addr, parent_bus_addr;
> > + struct dw_pcie_ib_map *new;
> > + u64 size, off, base;
> > + unsigned long flags;
> > + int free_win, ret;
> > + unsigned int i;
> > +
> > + if (!epf_bar->num_submap || !submap || !epf_bar->size)
> > + return -EINVAL;
> > +
> > + /* Work on a sorted copy */
> > + struct pci_epf_bar_submap *smap __free(kfree) = kcalloc(
> > + epf_bar->num_submap, sizeof(*smap), GFP_KERNEL);
> > + if (!smap)
> > + return -ENOMEM;
> > +
> > + memcpy(smap, submap, epf_bar->num_submap * sizeof(*smap));
> > + sort(smap, epf_bar->num_submap, sizeof(*smap),
> > + dw_pcie_ep_submap_offset_cmp, NULL);
>
> My only comment is that:
>
> Why not simply let dw_pcie_ep_validate_submap() return an error if the
> caller of dw_pcie_ep_set_bar() did not provide a submap with offsets in
> ascending order (i.e. sorted).
>
> Performing an unconditional sort of the submap here looks a bit out of
> place, IMO.
There wasn't a strong reason to sort the submap here, it was just to make
things easier for callers. That said, given the one-shot (all-or-nothing)
design, the caller is expected to know the complete layout at the time of
invocation, so requring the submap to be sorted is not a strong constraint.
I'll respin this accordingly and send a v3. Thank you for the feedback.
Koichiro
>
>
> > +
> > + ret = dw_pcie_ep_validate_submap(ep, smap, epf_bar->num_submap, epf_bar->size);
> > + if (ret)
> > + return ret;
>
>
> Kind regards,
> Niklas
© 2016 - 2026 Red Hat, Inc.