From: Stewart Hildebrand <stewart.hildebrand@amd.com>
A PCI device must have valid BARs in order to assign it to a domain. On
ARM, firmware is unlikely to have initialized the BARs, so we must do
this in Xen. During setup_hwdom_pci_devices(), check if each BAR is
valid. If the BAR happens to already be valid, remove the BAR range from
a rangeset of valid PCI ranges so as to avoid overlap when reserving a
new BAR. If not valid, reserve a new BAR address from the rangeset and
write it to the device.
Avaliable ranges are read from DT during init and stored in distinct
rangesets.
Signed-off-by: Stewart Hildebrand <stewart.hildebrand@amd.com>
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
---
v1->v2:
* move hwdom_uses_vpci to this patch
* fixup error reporting
---
xen/arch/arm/include/asm/pci.h | 7 +++
xen/arch/arm/pci/pci-host-common.c | 76 ++++++++++++++++++++++++++++
xen/arch/x86/include/asm/pci.h | 20 ++++++++
xen/common/rangeset.c | 35 +++++++++++++
xen/drivers/passthrough/pci.c | 79 ++++++++++++++++++++++++++++++
xen/include/xen/rangeset.h | 6 ++-
6 files changed, 222 insertions(+), 1 deletion(-)
diff --git a/xen/arch/arm/include/asm/pci.h b/xen/arch/arm/include/asm/pci.h
index 7db36e7bc3..46bdc7713b 100644
--- a/xen/arch/arm/include/asm/pci.h
+++ b/xen/arch/arm/include/asm/pci.h
@@ -74,6 +74,8 @@ struct pci_host_bridge {
struct pci_config_window *child_cfg;
const struct pci_ops *child_ops;
void *priv; /* Private data of the bridge. */
+ struct rangeset *bar_ranges;
+ struct rangeset *bar_ranges_prefetch;
};
struct pci_ops {
@@ -154,6 +156,11 @@ void pci_generic_init_bus_range_child(struct dt_device_node *dev,
bool arch_pci_device_physdevop(void);
+uint64_t pci_get_new_bar_addr(const struct pci_dev *pdev, uint64_t size,
+ bool is_64bit, bool prefetch);
+int pci_reserve_bar_range(const struct pci_dev *pdev, uint64_t addr,
+ uint64_t size, bool prefetch);
+
#else /*!CONFIG_HAS_PCI*/
struct pci_dev;
diff --git a/xen/arch/arm/pci/pci-host-common.c b/xen/arch/arm/pci/pci-host-common.c
index 67447d8696..86467e7cf6 100644
--- a/xen/arch/arm/pci/pci-host-common.c
+++ b/xen/arch/arm/pci/pci-host-common.c
@@ -21,6 +21,7 @@
#include <xen/rwlock.h>
#include <xen/sched.h>
#include <xen/vmap.h>
+#include <xen/resource.h>
#include <asm/setup.h>
@@ -232,6 +233,21 @@ static int pci_bus_find_domain_nr(struct dt_device_node *dev)
return domain;
}
+static int add_bar_range(const struct dt_device_node *dev, uint32_t flags,
+ uint64_t addr, uint64_t len, void *data)
+{
+ struct pci_host_bridge *bridge = data;
+
+ if ( !(flags & IORESOURCE_MEM) )
+ return 0;
+
+ if ( flags & IORESOURCE_PREFETCH )
+ return rangeset_add_range(bridge->bar_ranges_prefetch, addr,
+ addr + len - 1);
+ else
+ return rangeset_add_range(bridge->bar_ranges, addr, addr + len - 1);
+}
+
struct pci_host_bridge *
pci_host_common_probe(struct dt_device_node *dev,
const struct pci_ecam_ops *ops,
@@ -286,6 +302,14 @@ pci_host_common_probe(struct dt_device_node *dev,
pci_add_host_bridge(bridge);
pci_add_segment(bridge->segment);
+ bridge->bar_ranges = rangeset_new(NULL, "BAR ranges",
+ RANGESETF_prettyprint_hex);
+ bridge->bar_ranges_prefetch = rangeset_new(NULL,
+ "BAR ranges (prefetchable)",
+ RANGESETF_prettyprint_hex);
+ if ( bridge->bar_ranges && bridge->bar_ranges_prefetch )
+ dt_for_each_range(bridge->dt_node, add_bar_range, bridge);
+
return bridge;
err_child:
@@ -476,6 +500,58 @@ bool pci_check_bar(const struct pci_dev *pdev, mfn_t start, mfn_t end)
return bar_data.is_valid;
}
+
+uint64_t pci_get_new_bar_addr(const struct pci_dev *pdev, uint64_t size,
+ bool is_64bit, bool prefetch)
+{
+ struct pci_host_bridge *bridge;
+ struct rangeset *range;
+ uint64_t addr;
+
+ bridge = pci_find_host_bridge(pdev->seg, pdev->bus);
+ if ( !bridge )
+ return 0;
+
+ range = prefetch ? bridge->bar_ranges_prefetch : bridge->bar_ranges;
+
+ if ( size < PAGE_SIZE )
+ size = PAGE_SIZE;
+
+ if ( is_64bit && !rangeset_find_aligned_range(range, size, GB(4), &addr) )
+ {
+ if ( rangeset_remove_range(range, addr, addr + size - 1) )
+ printk(XENLOG_ERR "Failed to remove BAR range %lx-%lx from rangeset\n",
+ addr, addr + size - 1);
+ return addr;
+ }
+ if ( !rangeset_find_aligned_range(range, size, 0, &addr) )
+ {
+ if ( !is_64bit && addr >= GB(4) )
+ return 0;
+ if ( rangeset_remove_range(range, addr, addr + size - 1) )
+ printk(XENLOG_ERR "Failed to remove BAR range %lx-%lx from rangeset\n",
+ addr, addr + size - 1);
+ return addr;
+ }
+
+ return 0;
+}
+
+int pci_reserve_bar_range(const struct pci_dev *pdev, uint64_t addr,
+ uint64_t size, bool prefetch)
+{
+ struct pci_host_bridge *bridge;
+ struct rangeset *range;
+
+ bridge = pci_find_host_bridge(pdev->seg, pdev->bus);
+ if ( !bridge )
+ return 0;
+
+ range = prefetch ? bridge->bar_ranges_prefetch : bridge->bar_ranges;
+
+ return rangeset_remove_range(range, addr, addr + size - 1);
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/arch/x86/include/asm/pci.h b/xen/arch/x86/include/asm/pci.h
index 0b98081aea..4cc3f2e853 100644
--- a/xen/arch/x86/include/asm/pci.h
+++ b/xen/arch/x86/include/asm/pci.h
@@ -76,4 +76,24 @@ int pci_sanitize_bar_memory(struct rangeset *r);
void pci_setup(void);
+/* Unlike ARM, HW domain alyways uses vpci for x86 */
+static inline bool hwdom_uses_vpci(void)
+{
+ return true;
+}
+
+static inline uint64_t pci_get_new_bar_addr(const struct pci_dev *pdev,
+ uint64_t size, bool is_64bit,
+ bool prefetch)
+{
+ return 0;
+}
+
+static inline int pci_reserve_bar_range(const struct pci_dev *pdev,
+ uint64_t addr, uint64_t size,
+ bool prefetch)
+{
+ return 0;
+}
+
#endif /* __X86_PCI_H__ */
diff --git a/xen/common/rangeset.c b/xen/common/rangeset.c
index 0e3b9acd35..c1c6f8610c 100644
--- a/xen/common/rangeset.c
+++ b/xen/common/rangeset.c
@@ -357,6 +357,41 @@ int rangeset_claim_range(struct rangeset *r, unsigned long size,
return 0;
}
+int rangeset_find_aligned_range(struct rangeset *r, unsigned long size,
+ unsigned long min, unsigned long *s)
+{
+ struct range *x;
+
+ /* Power of 2 check */
+ if ( (size & (size - 1)) != 0 )
+ {
+ *s = 0;
+ return -EINVAL;
+ }
+
+ read_lock(&r->lock);
+
+ for ( x = first_range(r); x; x = next_range(r, x) )
+ {
+ /* Assumes size is a power of 2 */
+ unsigned long start_aligned = (x->s + size - 1) & ~(size - 1);
+
+ if ( x->e > start_aligned &&
+ (x->e - start_aligned) >= size &&
+ start_aligned >= min )
+ {
+ read_unlock(&r->lock);
+ *s = start_aligned;
+ return 0;
+ }
+ }
+
+ read_unlock(&r->lock);
+ *s = 0;
+
+ return -ENOSPC;
+}
+
int rangeset_consume_ranges(struct rangeset *r,
int (*cb)(unsigned long s, unsigned long e,
void *ctxt, unsigned long *c),
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 3edcfa8a04..4f5de9a542 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -1172,6 +1172,80 @@ int __init scan_pci_devices(void)
return ret;
}
+static void __init cf_check reserve_bar_range(struct pci_dev *pdev, uint8_t reg,
+ uint64_t addr, uint64_t size,
+ bool is_64bit, bool prefetch)
+{
+ if ( pci_check_bar(pdev, maddr_to_mfn(addr),
+ maddr_to_mfn(addr + size - 1)) )
+ pci_reserve_bar_range(pdev, addr, size, prefetch);
+}
+
+static void __init cf_check get_new_bar_addr(struct pci_dev *pdev, uint8_t reg,
+ uint64_t addr, uint64_t size,
+ bool is_64bit, bool prefetch)
+{
+ if ( !pci_check_bar(pdev, maddr_to_mfn(addr),
+ maddr_to_mfn(addr + size - 1)) )
+ {
+ uint16_t cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
+
+ addr = pci_get_new_bar_addr(pdev, size, is_64bit, prefetch);
+
+ pci_conf_write16(pdev->sbdf, PCI_COMMAND,
+ cmd & ~(PCI_COMMAND_MEMORY | PCI_COMMAND_IO));
+
+ pci_conf_write32(pdev->sbdf, reg,
+ (addr & GENMASK(31, 0)) |
+ (is_64bit ? PCI_BASE_ADDRESS_MEM_TYPE_64 : 0));
+
+ if ( is_64bit )
+ pci_conf_write32(pdev->sbdf, reg + 4, addr >> 32);
+
+ pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
+ }
+}
+
+static int __init cf_check bars_iterate(struct pci_seg *pseg, void *arg)
+{
+ struct pci_dev *pdev;
+ unsigned int i, ret, num_bars = PCI_HEADER_NORMAL_NR_BARS;
+ uint64_t addr, size;
+ void (*cb)(struct pci_dev *, uint8_t, uint64_t, uint64_t, bool, bool) = arg;
+
+ list_for_each_entry ( pdev, &pseg->alldevs_list, alldevs_list )
+ {
+ if ( (pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f) ==
+ PCI_HEADER_TYPE_NORMAL )
+ {
+ for ( i = 0; i < num_bars; i += ret )
+ {
+ uint8_t reg = PCI_BASE_ADDRESS_0 + i * 4;
+ bool prefetch;
+
+ if ( (pci_conf_read32(pdev->sbdf, reg) & PCI_BASE_ADDRESS_SPACE)
+ == PCI_BASE_ADDRESS_SPACE_IO )
+ {
+ ret = 1;
+ continue;
+ }
+
+ ret = pci_size_mem_bar(pdev->sbdf, reg, &addr, &size,
+ (i == num_bars - 1) ? PCI_BAR_LAST : 0);
+
+ if ( !size )
+ continue;
+ prefetch = !!(pci_conf_read32(pdev->sbdf, reg) &
+ PCI_BASE_ADDRESS_MEM_PREFETCH);
+
+ cb(pdev, reg, addr, size, ret == 2, prefetch);
+ }
+ }
+ }
+
+ return 0;
+}
+
struct setup_hwdom {
struct domain *d;
int (*handler)(uint8_t devfn, struct pci_dev *pdev);
@@ -1263,6 +1337,11 @@ void __hwdom_init setup_hwdom_pci_devices(
struct setup_hwdom ctxt = { .d = d, .handler = handler };
pcidevs_lock();
+ if ( hwdom_uses_vpci() )
+ {
+ pci_segments_iterate(bars_iterate, reserve_bar_range);
+ pci_segments_iterate(bars_iterate, get_new_bar_addr);
+ }
pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
pcidevs_unlock();
}
diff --git a/xen/include/xen/rangeset.h b/xen/include/xen/rangeset.h
index 817505badf..e71e810f82 100644
--- a/xen/include/xen/rangeset.h
+++ b/xen/include/xen/rangeset.h
@@ -56,11 +56,15 @@ void rangeset_limit(
bool __must_check rangeset_is_empty(
const struct rangeset *r);
-/* Add/claim/remove/query/purge a numeric range. */
+/* Add/claim/find/remove/query/purge a numeric range. */
int __must_check rangeset_add_range(
struct rangeset *r, unsigned long s, unsigned long e);
int __must_check rangeset_claim_range(struct rangeset *r, unsigned long size,
unsigned long *s);
+int __must_check rangeset_find_aligned_range(struct rangeset *r,
+ unsigned long size,
+ unsigned long min,
+ unsigned long *s);
int __must_check rangeset_remove_range(
struct rangeset *r, unsigned long s, unsigned long e);
bool __must_check rangeset_contains_range(
--
2.34.1
On 22.10.2025 15:56, Mykyta Poturai wrote:
> @@ -232,6 +233,21 @@ static int pci_bus_find_domain_nr(struct dt_device_node *dev)
> return domain;
> }
>
> +static int add_bar_range(const struct dt_device_node *dev, uint32_t flags,
> + uint64_t addr, uint64_t len, void *data)
> +{
> + struct pci_host_bridge *bridge = data;
> +
> + if ( !(flags & IORESOURCE_MEM) )
> + return 0;
> +
> + if ( flags & IORESOURCE_PREFETCH )
> + return rangeset_add_range(bridge->bar_ranges_prefetch, addr,
> + addr + len - 1);
> + else
> + return rangeset_add_range(bridge->bar_ranges, addr, addr + len - 1);
Here and everywhere else you use rangesets: This loses significant bits on
Arm32. Iirc the plan was to select HAS_PCI only for Arm64, but that's entirely
invisible here. I think there want to be perhaps multiple BUILD_BUG_ON()s, at
the very least.
> --- a/xen/arch/x86/include/asm/pci.h
> +++ b/xen/arch/x86/include/asm/pci.h
> @@ -76,4 +76,24 @@ int pci_sanitize_bar_memory(struct rangeset *r);
>
> void pci_setup(void);
>
> +/* Unlike ARM, HW domain alyways uses vpci for x86 */
> +static inline bool hwdom_uses_vpci(void)
> +{
> + return true;
> +}
What the comment says is not true. It is true for PVH Dom0. The sole use
of the predicate therefore is questionable, too.
> +static inline uint64_t pci_get_new_bar_addr(const struct pci_dev *pdev,
> + uint64_t size, bool is_64bit,
> + bool prefetch)
> +{
> + return 0;
> +}
> +
> +static inline int pci_reserve_bar_range(const struct pci_dev *pdev,
> + uint64_t addr, uint64_t size,
> + bool prefetch)
> +{
> + return 0;
> +}
Neither here nor elsewhere is any word said on what these do, what a
"new BAR range" is, or what "reserving" would mean.
> --- a/xen/common/rangeset.c
> +++ b/xen/common/rangeset.c
> @@ -357,6 +357,41 @@ int rangeset_claim_range(struct rangeset *r, unsigned long size,
> return 0;
> }
>
> +int rangeset_find_aligned_range(struct rangeset *r, unsigned long size,
> + unsigned long min, unsigned long *s)
> +{
> + struct range *x;
> +
> + /* Power of 2 check */
> + if ( (size & (size - 1)) != 0 )
> + {
> + *s = 0;
> + return -EINVAL;
> + }
> +
> + read_lock(&r->lock);
> +
> + for ( x = first_range(r); x; x = next_range(r, x) )
> + {
> + /* Assumes size is a power of 2 */
> + unsigned long start_aligned = (x->s + size - 1) & ~(size - 1);
> +
> + if ( x->e > start_aligned &&
> + (x->e - start_aligned) >= size &&
> + start_aligned >= min )
> + {
> + read_unlock(&r->lock);
With this and ...
> + *s = start_aligned;
> + return 0;
> + }
> + }
> +
> + read_unlock(&r->lock);
... this, how can the caller be sure the result they receive is not stale by
the time they get to look at and use it?
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -1172,6 +1172,80 @@ int __init scan_pci_devices(void)
> return ret;
> }
>
> +static void __init cf_check reserve_bar_range(struct pci_dev *pdev, uint8_t reg,
> + uint64_t addr, uint64_t size,
> + bool is_64bit, bool prefetch)
> +{
> + if ( pci_check_bar(pdev, maddr_to_mfn(addr),
> + maddr_to_mfn(addr + size - 1)) )
> + pci_reserve_bar_range(pdev, addr, size, prefetch);
> +}
> +
> +static void __init cf_check get_new_bar_addr(struct pci_dev *pdev, uint8_t reg,
> + uint64_t addr, uint64_t size,
> + bool is_64bit, bool prefetch)
> +{
> + if ( !pci_check_bar(pdev, maddr_to_mfn(addr),
> + maddr_to_mfn(addr + size - 1)) )
> + {
> + uint16_t cmd = pci_conf_read16(pdev->sbdf, PCI_COMMAND);
> +
> + addr = pci_get_new_bar_addr(pdev, size, is_64bit, prefetch);
> +
> + pci_conf_write16(pdev->sbdf, PCI_COMMAND,
> + cmd & ~(PCI_COMMAND_MEMORY | PCI_COMMAND_IO));
> +
> + pci_conf_write32(pdev->sbdf, reg,
> + (addr & GENMASK(31, 0)) |
> + (is_64bit ? PCI_BASE_ADDRESS_MEM_TYPE_64 : 0));
> +
> + if ( is_64bit )
> + pci_conf_write32(pdev->sbdf, reg + 4, addr >> 32);
> +
> + pci_conf_write16(pdev->sbdf, PCI_COMMAND, cmd);
> + }
> +}
> +
> +static int __init cf_check bars_iterate(struct pci_seg *pseg, void *arg)
> +{
> + struct pci_dev *pdev;
> + unsigned int i, ret, num_bars = PCI_HEADER_NORMAL_NR_BARS;
> + uint64_t addr, size;
> + void (*cb)(struct pci_dev *, uint8_t, uint64_t, uint64_t, bool, bool) = arg;
There needs to be some build-time checking that this and the two callbacks
above actually match in type. Likely by way of using a function typedef.
> --- a/xen/include/xen/rangeset.h
> +++ b/xen/include/xen/rangeset.h
> @@ -56,11 +56,15 @@ void rangeset_limit(
> bool __must_check rangeset_is_empty(
> const struct rangeset *r);
>
> -/* Add/claim/remove/query/purge a numeric range. */
> +/* Add/claim/find/remove/query/purge a numeric range. */
> int __must_check rangeset_add_range(
> struct rangeset *r, unsigned long s, unsigned long e);
> int __must_check rangeset_claim_range(struct rangeset *r, unsigned long size,
> unsigned long *s);
> +int __must_check rangeset_find_aligned_range(struct rangeset *r,
> + unsigned long size,
> + unsigned long min,
> + unsigned long *s);
From these parameter names I'm unable to tell what the "aligned" part of
the name is referring to. IOW it may be necessary to have an accompanying
comment.
Jan
© 2016 - 2025 Red Hat, Inc.