xen/arch/arm/domain_build.c | 29 +++++++++++++++ xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++ xen/include/xen/device_tree.h | 10 +++++ 3 files changed, 108 insertions(+)
PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
also there is no vpci trap handler register for IO bar.
Remove PCI I/O ranges property value from dom0 device tree node so that
dom0 linux will not allocate I/O space for PCI devices if
pci-passthrough is enabled.
Signed-off-by: Rahul Singh <rahul.singh@arm.com>
---
xen/arch/arm/domain_build.c | 29 +++++++++++++++
xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++
xen/include/xen/device_tree.h | 10 +++++
3 files changed, 108 insertions(+)
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 6931c022a2..7cfe64fe97 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
}
#endif
+/*
+ * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
+ * there is no trap handler registered for IO bar, therefore remove the IO
+ * range property from the device tree node for dom0.
+ */
+static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
+ const struct dt_device_node *node)
+{
+ if ( !is_pci_passthrough_enabled() )
+ return 0;
+
+ if ( !dt_device_type_is_equal(node, "pci") )
+ return 0;
+
+ /*
+ * The current heuristic assumes that a device is a host bridge
+ * if the type is "pci" and then parent type is not "pci".
+ */
+ if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
+ return 0;
+
+ return dt_pci_remove_io_ranges(kinfo->fdt, node);
+}
+
+
/*
* When PCI passthrough is available we want to keep the
* "linux,pci-domain" in sync for every host bridge.
@@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
iommu_node = NULL;
+ res = handle_linux_pci_io_ranges(kinfo, node);
+ if ( res )
+ return res;
+
dt_for_each_property_node (node, prop)
{
const void *prop_data = prop->value;
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index 4aae281e89..55a883e0f6 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
return (u16)domain;
}
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
+{
+ const struct dt_device_node *parent = NULL;
+ const struct dt_bus *bus, *pbus;
+ unsigned int rlen;
+ int na, ns, pna, pns, rone;
+ const __be32 *ranges;
+ __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
+ * 2];
+ __be32 *addr = ®s[0];
+
+ bus = dt_match_bus(dev);
+ if ( !bus )
+ return 0; /* device is not a bus */
+
+ parent = dt_get_parent(dev);
+ if ( !parent )
+ return -EINVAL;
+
+ ranges = dt_get_property(dev, "ranges", &rlen);
+ if ( !ranges )
+ {
+ printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+ if ( !rlen ) /* Nothing to do */
+ return 0;
+
+ bus->count_cells(dev, &na, &ns);
+ if ( !DT_CHECK_COUNTS(na, ns) )
+ {
+ printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+
+ pbus = dt_match_bus(parent);
+ if ( !pbus )
+ {
+ printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
+ return -EINVAL;
+ }
+
+ pbus->count_cells(dev, &pna, &pns);
+ if ( !DT_CHECK_COUNTS(pna, pns) )
+ {
+ printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+
+ /* Now walk through the ranges */
+ rlen /= 4;
+ rone = na + pna + ns;
+ for ( ; rlen >= rone; rlen -= rone, ranges += rone )
+ {
+ unsigned int flags = bus->get_flags(ranges);
+ if ( flags & IORESOURCE_IO )
+ continue;
+
+ memcpy(addr, ranges, 4 * rone);
+
+ addr += rone;
+ }
+
+ return fdt_property(fdt, "ranges", regs, sizeof(regs));
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index fd6cd00b43..580231f872 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
*/
int dt_get_pci_domain_nr(struct dt_device_node *node);
+/**
+ * dt_pci_remove_io_range - Remove the PCI I/O range property value.
+ * @fdt: Pointer to the file descriptor tree.
+ * @node: Device tree node.
+ *
+ * This function will remove the PCI IO range property from the PCI device tree
+ * node.
+ */
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
+
struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
#ifdef CONFIG_DEVICE_TREE_DEBUG
--
2.25.1
Hi Rahul,
On 15/02/2022 15:36, Rahul Singh wrote:
> PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
> also there is no vpci trap handler register for IO bar.
>
> Remove PCI I/O ranges property value from dom0 device tree node so that
> dom0 linux will not allocate I/O space for PCI devices if
> pci-passthrough is enabled.
>
> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
> ---
> xen/arch/arm/domain_build.c | 29 +++++++++++++++
> xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++
> xen/include/xen/device_tree.h | 10 +++++
> 3 files changed, 108 insertions(+)
For future version, please add a changelog. This helps to figure out
what changed more easily.
>
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 6931c022a2..7cfe64fe97 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
> }
> #endif
>
> +/*
> + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
> + * there is no trap handler registered for IO bar, therefore remove the IO
> + * range property from the device tree node for dom0.
> + */
> +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
> + const struct dt_device_node *node)
> +{
> + if ( !is_pci_passthrough_enabled() )
> + return 0;
> +
> + if ( !dt_device_type_is_equal(node, "pci") )
> + return 0;
> +
> + /*
> + * The current heuristic assumes that a device is a host bridge
> + * if the type is "pci" and then parent type is not "pci".
> + */
> + if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
> + return 0;
The logic above is exactly the same as in handle_linux_pci_domain(). Can
we create an helper that could be used by both functions? This would
help to keep the logic synchronized.
> +
> + return dt_pci_remove_io_ranges(kinfo->fdt, node);
> +}
> +
> +
> /*
> * When PCI passthrough is available we want to keep the
> * "linux,pci-domain" in sync for every host bridge.
> @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
> if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
> iommu_node = NULL;
>
> + res = handle_linux_pci_io_ranges(kinfo, node);
> + if ( res )
> + return res;
> +
> dt_for_each_property_node (node, prop)
> {
> const void *prop_data = prop->value;
> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
> index 4aae281e89..55a883e0f6 100644
> --- a/xen/common/device_tree.c
> +++ b/xen/common/device_tree.c
If I am not mistaken, the file common/device_tree.c is so far only
containing code to parse the host device-tree. But now...
> @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
> return (u16)domain;
> }
>
> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
you are introducing code to write the domain device-tree. I understand
this is because dt_match_bus() is internal. However, I would rather
prefer if we export dt_match_bus() & co and move this code to under
arch/arm/pci/. Maybe we should introduce a file domain_build.c.
Furthermore, the name of the function doesn't really match what the
function does. It will generate "ranges" for the hostbridge and remove
the I/O. We may want to perform other modifications on the range. So I
would name the function something like:
domain_build_generate_hostbridge_range()
> + const struct dt_device_node *parent = NULL;
> + const struct dt_bus *bus, *pbus;
> + unsigned int rlen;
> + int na, ns, pna, pns, rone;
> + const __be32 *ranges;
> + __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no
guarantee this will be bigger that what the host device-tree supports.
So you want to use DT_MAX_ADDR_CELLS here.
> + * 2];
Looking at the code below. I couldn't find any check guaranteing the
static array will be big enough to store the ranges provided by the host DT.
> + __be32 *addr = ®s[0];
> +
> + bus = dt_match_bus(dev);
> + if ( !bus )
> + return 0; /* device is not a bus */
> +
> + parent = dt_get_parent(dev);
> + if ( !parent )
> + return -EINVAL;
> +
> + ranges = dt_get_property(dev, "ranges", &rlen);
> + if ( !ranges )
> + {
> + printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
> + dev->full_name);
> + return -EINVAL;
> + }
> + if ( !rlen ) /* Nothing to do */
> + return 0;
> +
> + bus->count_cells(dev, &na, &ns);
> + if ( !DT_CHECK_COUNTS(na, ns) )
> + {
> + printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
> + dev->full_name);
> + return -EINVAL;
> + }
> +
> + pbus = dt_match_bus(parent);
> + if ( !pbus )
> + {
> + printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
> + return -EINVAL;
> + }
> +
> + pbus->count_cells(dev, &pna, &pns);
> + if ( !DT_CHECK_COUNTS(pna, pns) )
> + {
> + printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
> + dev->full_name);
> + return -EINVAL;
> + }
> +
> + /* Now walk through the ranges */
> + rlen /= 4;
> + rone = na + pna + ns;
> + for ( ; rlen >= rone; rlen -= rone, ranges += rone )
> + {
Most of the code in this function is the same as dt_for_each_range().
Can we refactor it to avoid code duplication?
> + unsigned int flags = bus->get_flags(ranges);
> + if ( flags & IORESOURCE_IO )
> + continue;
> +
> + memcpy(addr, ranges, 4 * rone);
> +
> + addr += rone;
> + }
> +
> + return fdt_property(fdt, "ranges", regs, sizeof(regs));
> +}
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
> index fd6cd00b43..580231f872 100644
> --- a/xen/include/xen/device_tree.h
> +++ b/xen/include/xen/device_tree.h
> @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
> */
> int dt_get_pci_domain_nr(struct dt_device_node *node);
>
> +/**
> + * dt_pci_remove_io_range - Remove the PCI I/O range property value.
> + * @fdt: Pointer to the file descriptor tree.
> + * @node: Device tree node.
> + *
> + * This function will remove the PCI IO range property from the PCI device tree
> + * node.
> + */
> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
> +
> struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>
> #ifdef CONFIG_DEVICE_TREE_DEBUG
Cheers,
--
Julien Grall
Hi Julien,
Thanks for reviewing the code.
> On 25 Feb 2022, at 7:46 pm, Julien Grall <julien@xen.org> wrote:
>
> Hi Rahul,
>
> On 15/02/2022 15:36, Rahul Singh wrote:
>> PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
>> also there is no vpci trap handler register for IO bar.
>> Remove PCI I/O ranges property value from dom0 device tree node so that
>> dom0 linux will not allocate I/O space for PCI devices if
>> pci-passthrough is enabled.
>> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
>> ---
>> xen/arch/arm/domain_build.c | 29 +++++++++++++++
>> xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++
>> xen/include/xen/device_tree.h | 10 +++++
>> 3 files changed, 108 insertions(+)
>
> For future version, please add a changelog. This helps to figure out what changed more easily.
Ok. I will add the changelog in next version.
>
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index 6931c022a2..7cfe64fe97 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
>> }
>> #endif
>> +/*
>> + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
>> + * there is no trap handler registered for IO bar, therefore remove the IO
>> + * range property from the device tree node for dom0.
>> + */
>> +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
>> + const struct dt_device_node *node)
>> +{
>> + if ( !is_pci_passthrough_enabled() )
>> + return 0;
>> +
>> + if ( !dt_device_type_is_equal(node, "pci") )
>> + return 0;
>> +
>> + /*
>> + * The current heuristic assumes that a device is a host bridge
>> + * if the type is "pci" and then parent type is not "pci".
>> + */
>> + if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
>> + return 0;
>
>
> The logic above is exactly the same as in handle_linux_pci_domain(). Can we create an helper that could be used by both functions? This would help to keep the logic synchronized.
Ok. I will create the helper name “dt_node_check_pci_hostbridge(..)”.
>
>> +
>> + return dt_pci_remove_io_ranges(kinfo->fdt, node);
>> +}
>> +
>> +
>> /*
>> * When PCI passthrough is available we want to keep the
>> * "linux,pci-domain" in sync for every host bridge.
>> @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
>> if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
>> iommu_node = NULL;
>> + res = handle_linux_pci_io_ranges(kinfo, node);
>> + if ( res )
>> + return res;
>> +
>> dt_for_each_property_node (node, prop)
>> {
>> const void *prop_data = prop->value;
>> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
>> index 4aae281e89..55a883e0f6 100644
>> --- a/xen/common/device_tree.c
>> +++ b/xen/common/device_tree.c
>
> If I am not mistaken, the file common/device_tree.c is so far only containing code to parse the host device-tree. But now...
>
>> @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
>> return (u16)domain;
>> }
>> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
>
> you are introducing code to write the domain device-tree. I understand this is because dt_match_bus() is internal. However, I would rather prefer if we export dt_match_bus() & co and move this code to under arch/arm/pci/. Maybe we should introduce a file domain_build.c.
>
> Furthermore, the name of the function doesn't really match what the function does. It will generate "ranges" for the hostbridge and remove the I/O. We may want to perform other modifications on the range. So I would name the function something like:
>
> domain_build_generate_hostbridge_range()
I will modify the code based on your comment in next version.
>
>> + const struct dt_device_node *parent = NULL;
>> + const struct dt_bus *bus, *pbus;
>> + unsigned int rlen;
>> + int na, ns, pna, pns, rone;
>> + const __be32 *ranges;
>> + __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
>
> GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no guarantee this will be bigger that what the host device-tree supports.
>
> So you want to use DT_MAX_ADDR_CELLS here.
>
>> + * 2];
> Looking at the code below. I couldn't find any check guaranteing the static array will be big enough to store the ranges provided by the host DT.
Let me fix this in next version.
>
>> + __be32 *addr = ®s[0];
>> +
>> + bus = dt_match_bus(dev);
>> + if ( !bus )
>> + return 0; /* device is not a bus */
>> +
>> + parent = dt_get_parent(dev);
>> + if ( !parent )
>> + return -EINVAL;
>> +
>> + ranges = dt_get_property(dev, "ranges", &rlen);
>> + if ( !ranges )
>> + {
>> + printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
>> + dev->full_name);
>> + return -EINVAL;
>> + }
>> + if ( !rlen ) /* Nothing to do */
>> + return 0;
>> +
>> + bus->count_cells(dev, &na, &ns);
>> + if ( !DT_CHECK_COUNTS(na, ns) )
>> + {
>> + printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
>> + dev->full_name);
>> + return -EINVAL;
>> + }
>> +
>> + pbus = dt_match_bus(parent);
>> + if ( !pbus )
>> + {
>> + printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
>> + return -EINVAL;
>> + }
>> +
>> + pbus->count_cells(dev, &pna, &pns);
>> + if ( !DT_CHECK_COUNTS(pna, pns) )
>> + {
>> + printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
>> + dev->full_name);
>> + return -EINVAL;
>> + }
>> +
>> + /* Now walk through the ranges */
>> + rlen /= 4;
>> + rone = na + pna + ns;
>> + for ( ; rlen >= rone; rlen -= rone, ranges += rone )
>> + {
>
> Most of the code in this function is the same as dt_for_each_range(). Can we refactor it to avoid code duplication?
Ok Let me try to refactor the code.
Regards,
Rahul
>
>> + unsigned int flags = bus->get_flags(ranges);
>> + if ( flags & IORESOURCE_IO )
>> + continue;
>> +
>> + memcpy(addr, ranges, 4 * rone);
>> +
>> + addr += rone;
>> + }
>> +
>> + return fdt_property(fdt, "ranges", regs, sizeof(regs));
>> +}
>> +
>> /*
>> * Local variables:
>> * mode: C
>> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
>> index fd6cd00b43..580231f872 100644
>> --- a/xen/include/xen/device_tree.h
>> +++ b/xen/include/xen/device_tree.h
>> @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
>> */
>> int dt_get_pci_domain_nr(struct dt_device_node *node);
>> +/**
>> + * dt_pci_remove_io_range - Remove the PCI I/O range property value.
>> + * @fdt: Pointer to the file descriptor tree.
>> + * @node: Device tree node.
>> + *
>> + * This function will remove the PCI IO range property from the PCI device tree
>> + * node.
>> + */
>> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
>> +
>> struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>> #ifdef CONFIG_DEVICE_TREE_DEBUG
>
> Cheers,
>
> --
> Julien Grall
© 2016 - 2026 Red Hat, Inc.