xen/arch/arm/domain_build.c | 29 +++++++++++++++ xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++ xen/include/xen/device_tree.h | 10 +++++ 3 files changed, 108 insertions(+)
PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
also there is no vpci trap handler register for IO bar.
Remove PCI I/O ranges property value from dom0 device tree node so that
dom0 linux will not allocate I/O space for PCI devices if
pci-passthrough is enabled.
Signed-off-by: Rahul Singh <rahul.singh@arm.com>
---
xen/arch/arm/domain_build.c | 29 +++++++++++++++
xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++
xen/include/xen/device_tree.h | 10 +++++
3 files changed, 108 insertions(+)
diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 6931c022a2..7cfe64fe97 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
}
#endif
+/*
+ * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
+ * there is no trap handler registered for IO bar, therefore remove the IO
+ * range property from the device tree node for dom0.
+ */
+static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
+ const struct dt_device_node *node)
+{
+ if ( !is_pci_passthrough_enabled() )
+ return 0;
+
+ if ( !dt_device_type_is_equal(node, "pci") )
+ return 0;
+
+ /*
+ * The current heuristic assumes that a device is a host bridge
+ * if the type is "pci" and then parent type is not "pci".
+ */
+ if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
+ return 0;
+
+ return dt_pci_remove_io_ranges(kinfo->fdt, node);
+}
+
+
/*
* When PCI passthrough is available we want to keep the
* "linux,pci-domain" in sync for every host bridge.
@@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
iommu_node = NULL;
+ res = handle_linux_pci_io_ranges(kinfo, node);
+ if ( res )
+ return res;
+
dt_for_each_property_node (node, prop)
{
const void *prop_data = prop->value;
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index 4aae281e89..55a883e0f6 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
return (u16)domain;
}
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
+{
+ const struct dt_device_node *parent = NULL;
+ const struct dt_bus *bus, *pbus;
+ unsigned int rlen;
+ int na, ns, pna, pns, rone;
+ const __be32 *ranges;
+ __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
+ * 2];
+ __be32 *addr = ®s[0];
+
+ bus = dt_match_bus(dev);
+ if ( !bus )
+ return 0; /* device is not a bus */
+
+ parent = dt_get_parent(dev);
+ if ( !parent )
+ return -EINVAL;
+
+ ranges = dt_get_property(dev, "ranges", &rlen);
+ if ( !ranges )
+ {
+ printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+ if ( !rlen ) /* Nothing to do */
+ return 0;
+
+ bus->count_cells(dev, &na, &ns);
+ if ( !DT_CHECK_COUNTS(na, ns) )
+ {
+ printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+
+ pbus = dt_match_bus(parent);
+ if ( !pbus )
+ {
+ printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
+ return -EINVAL;
+ }
+
+ pbus->count_cells(dev, &pna, &pns);
+ if ( !DT_CHECK_COUNTS(pna, pns) )
+ {
+ printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
+ dev->full_name);
+ return -EINVAL;
+ }
+
+ /* Now walk through the ranges */
+ rlen /= 4;
+ rone = na + pna + ns;
+ for ( ; rlen >= rone; rlen -= rone, ranges += rone )
+ {
+ unsigned int flags = bus->get_flags(ranges);
+ if ( flags & IORESOURCE_IO )
+ continue;
+
+ memcpy(addr, ranges, 4 * rone);
+
+ addr += rone;
+ }
+
+ return fdt_property(fdt, "ranges", regs, sizeof(regs));
+}
+
/*
* Local variables:
* mode: C
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index fd6cd00b43..580231f872 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
*/
int dt_get_pci_domain_nr(struct dt_device_node *node);
+/**
+ * dt_pci_remove_io_range - Remove the PCI I/O range property value.
+ * @fdt: Pointer to the file descriptor tree.
+ * @node: Device tree node.
+ *
+ * This function will remove the PCI IO range property from the PCI device tree
+ * node.
+ */
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
+
struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
#ifdef CONFIG_DEVICE_TREE_DEBUG
--
2.25.1
Hi Rahul, On 15/02/2022 15:36, Rahul Singh wrote: > PCI I/O space are not mapped to dom0 when PCI passthrough is enabled, > also there is no vpci trap handler register for IO bar. > > Remove PCI I/O ranges property value from dom0 device tree node so that > dom0 linux will not allocate I/O space for PCI devices if > pci-passthrough is enabled. > > Signed-off-by: Rahul Singh <rahul.singh@arm.com> > --- > xen/arch/arm/domain_build.c | 29 +++++++++++++++ > xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++ > xen/include/xen/device_tree.h | 10 +++++ > 3 files changed, 108 insertions(+) For future version, please add a changelog. This helps to figure out what changed more easily. > > diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c > index 6931c022a2..7cfe64fe97 100644 > --- a/xen/arch/arm/domain_build.c > +++ b/xen/arch/arm/domain_build.c > @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d, > } > #endif > > +/* > + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also > + * there is no trap handler registered for IO bar, therefore remove the IO > + * range property from the device tree node for dom0. > + */ > +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo, > + const struct dt_device_node *node) > +{ > + if ( !is_pci_passthrough_enabled() ) > + return 0; > + > + if ( !dt_device_type_is_equal(node, "pci") ) > + return 0; > + > + /* > + * The current heuristic assumes that a device is a host bridge > + * if the type is "pci" and then parent type is not "pci". > + */ > + if ( node->parent && dt_device_type_is_equal(node->parent, "pci") ) > + return 0; The logic above is exactly the same as in handle_linux_pci_domain(). Can we create an helper that could be used by both functions? This would help to keep the logic synchronized. > + > + return dt_pci_remove_io_ranges(kinfo->fdt, node); > +} > + > + > /* > * When PCI passthrough is available we want to keep the > * "linux,pci-domain" in sync for every host bridge. > @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo, > if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU ) > iommu_node = NULL; > > + res = handle_linux_pci_io_ranges(kinfo, node); > + if ( res ) > + return res; > + > dt_for_each_property_node (node, prop) > { > const void *prop_data = prop->value; > diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c > index 4aae281e89..55a883e0f6 100644 > --- a/xen/common/device_tree.c > +++ b/xen/common/device_tree.c If I am not mistaken, the file common/device_tree.c is so far only containing code to parse the host device-tree. But now... > @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node) > return (u16)domain; > } > > +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev) you are introducing code to write the domain device-tree. I understand this is because dt_match_bus() is internal. However, I would rather prefer if we export dt_match_bus() & co and move this code to under arch/arm/pci/. Maybe we should introduce a file domain_build.c. Furthermore, the name of the function doesn't really match what the function does. It will generate "ranges" for the hostbridge and remove the I/O. We may want to perform other modifications on the range. So I would name the function something like: domain_build_generate_hostbridge_range() > + const struct dt_device_node *parent = NULL; > + const struct dt_bus *bus, *pbus; > + unsigned int rlen; > + int na, ns, pna, pns, rone; > + const __be32 *ranges; > + __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1) GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no guarantee this will be bigger that what the host device-tree supports. So you want to use DT_MAX_ADDR_CELLS here. > + * 2]; Looking at the code below. I couldn't find any check guaranteing the static array will be big enough to store the ranges provided by the host DT. > + __be32 *addr = ®s[0]; > + > + bus = dt_match_bus(dev); > + if ( !bus ) > + return 0; /* device is not a bus */ > + > + parent = dt_get_parent(dev); > + if ( !parent ) > + return -EINVAL; > + > + ranges = dt_get_property(dev, "ranges", &rlen); > + if ( !ranges ) > + { > + printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n", > + dev->full_name); > + return -EINVAL; > + } > + if ( !rlen ) /* Nothing to do */ > + return 0; > + > + bus->count_cells(dev, &na, &ns); > + if ( !DT_CHECK_COUNTS(na, ns) ) > + { > + printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n", > + dev->full_name); > + return -EINVAL; > + } > + > + pbus = dt_match_bus(parent); > + if ( !pbus ) > + { > + printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name); > + return -EINVAL; > + } > + > + pbus->count_cells(dev, &pna, &pns); > + if ( !DT_CHECK_COUNTS(pna, pns) ) > + { > + printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n", > + dev->full_name); > + return -EINVAL; > + } > + > + /* Now walk through the ranges */ > + rlen /= 4; > + rone = na + pna + ns; > + for ( ; rlen >= rone; rlen -= rone, ranges += rone ) > + { Most of the code in this function is the same as dt_for_each_range(). Can we refactor it to avoid code duplication? > + unsigned int flags = bus->get_flags(ranges); > + if ( flags & IORESOURCE_IO ) > + continue; > + > + memcpy(addr, ranges, 4 * rone); > + > + addr += rone; > + } > + > + return fdt_property(fdt, "ranges", regs, sizeof(regs)); > +} > + > /* > * Local variables: > * mode: C > diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h > index fd6cd00b43..580231f872 100644 > --- a/xen/include/xen/device_tree.h > +++ b/xen/include/xen/device_tree.h > @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np, > */ > int dt_get_pci_domain_nr(struct dt_device_node *node); > > +/** > + * dt_pci_remove_io_range - Remove the PCI I/O range property value. > + * @fdt: Pointer to the file descriptor tree. > + * @node: Device tree node. > + * > + * This function will remove the PCI IO range property from the PCI device tree > + * node. > + */ > +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node); > + > struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle); > > #ifdef CONFIG_DEVICE_TREE_DEBUG Cheers, -- Julien Grall
Hi Julien, Thanks for reviewing the code. > On 25 Feb 2022, at 7:46 pm, Julien Grall <julien@xen.org> wrote: > > Hi Rahul, > > On 15/02/2022 15:36, Rahul Singh wrote: >> PCI I/O space are not mapped to dom0 when PCI passthrough is enabled, >> also there is no vpci trap handler register for IO bar. >> Remove PCI I/O ranges property value from dom0 device tree node so that >> dom0 linux will not allocate I/O space for PCI devices if >> pci-passthrough is enabled. >> Signed-off-by: Rahul Singh <rahul.singh@arm.com> >> --- >> xen/arch/arm/domain_build.c | 29 +++++++++++++++ >> xen/common/device_tree.c | 69 +++++++++++++++++++++++++++++++++++ >> xen/include/xen/device_tree.h | 10 +++++ >> 3 files changed, 108 insertions(+) > > For future version, please add a changelog. This helps to figure out what changed more easily. Ok. I will add the changelog in next version. > >> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c >> index 6931c022a2..7cfe64fe97 100644 >> --- a/xen/arch/arm/domain_build.c >> +++ b/xen/arch/arm/domain_build.c >> @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d, >> } >> #endif >> +/* >> + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also >> + * there is no trap handler registered for IO bar, therefore remove the IO >> + * range property from the device tree node for dom0. >> + */ >> +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo, >> + const struct dt_device_node *node) >> +{ >> + if ( !is_pci_passthrough_enabled() ) >> + return 0; >> + >> + if ( !dt_device_type_is_equal(node, "pci") ) >> + return 0; >> + >> + /* >> + * The current heuristic assumes that a device is a host bridge >> + * if the type is "pci" and then parent type is not "pci". >> + */ >> + if ( node->parent && dt_device_type_is_equal(node->parent, "pci") ) >> + return 0; > > > The logic above is exactly the same as in handle_linux_pci_domain(). Can we create an helper that could be used by both functions? This would help to keep the logic synchronized. Ok. I will create the helper name “dt_node_check_pci_hostbridge(..)”. > >> + >> + return dt_pci_remove_io_ranges(kinfo->fdt, node); >> +} >> + >> + >> /* >> * When PCI passthrough is available we want to keep the >> * "linux,pci-domain" in sync for every host bridge. >> @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo, >> if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU ) >> iommu_node = NULL; >> + res = handle_linux_pci_io_ranges(kinfo, node); >> + if ( res ) >> + return res; >> + >> dt_for_each_property_node (node, prop) >> { >> const void *prop_data = prop->value; >> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c >> index 4aae281e89..55a883e0f6 100644 >> --- a/xen/common/device_tree.c >> +++ b/xen/common/device_tree.c > > If I am not mistaken, the file common/device_tree.c is so far only containing code to parse the host device-tree. But now... > >> @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node) >> return (u16)domain; >> } >> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev) > > you are introducing code to write the domain device-tree. I understand this is because dt_match_bus() is internal. However, I would rather prefer if we export dt_match_bus() & co and move this code to under arch/arm/pci/. Maybe we should introduce a file domain_build.c. > > Furthermore, the name of the function doesn't really match what the function does. It will generate "ranges" for the hostbridge and remove the I/O. We may want to perform other modifications on the range. So I would name the function something like: > > domain_build_generate_hostbridge_range() I will modify the code based on your comment in next version. > >> + const struct dt_device_node *parent = NULL; >> + const struct dt_bus *bus, *pbus; >> + unsigned int rlen; >> + int na, ns, pna, pns, rone; >> + const __be32 *ranges; >> + __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1) > > GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no guarantee this will be bigger that what the host device-tree supports. > > So you want to use DT_MAX_ADDR_CELLS here. > >> + * 2]; > Looking at the code below. I couldn't find any check guaranteing the static array will be big enough to store the ranges provided by the host DT. Let me fix this in next version. > >> + __be32 *addr = ®s[0]; >> + >> + bus = dt_match_bus(dev); >> + if ( !bus ) >> + return 0; /* device is not a bus */ >> + >> + parent = dt_get_parent(dev); >> + if ( !parent ) >> + return -EINVAL; >> + >> + ranges = dt_get_property(dev, "ranges", &rlen); >> + if ( !ranges ) >> + { >> + printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n", >> + dev->full_name); >> + return -EINVAL; >> + } >> + if ( !rlen ) /* Nothing to do */ >> + return 0; >> + >> + bus->count_cells(dev, &na, &ns); >> + if ( !DT_CHECK_COUNTS(na, ns) ) >> + { >> + printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n", >> + dev->full_name); >> + return -EINVAL; >> + } >> + >> + pbus = dt_match_bus(parent); >> + if ( !pbus ) >> + { >> + printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name); >> + return -EINVAL; >> + } >> + >> + pbus->count_cells(dev, &pna, &pns); >> + if ( !DT_CHECK_COUNTS(pna, pns) ) >> + { >> + printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n", >> + dev->full_name); >> + return -EINVAL; >> + } >> + >> + /* Now walk through the ranges */ >> + rlen /= 4; >> + rone = na + pna + ns; >> + for ( ; rlen >= rone; rlen -= rone, ranges += rone ) >> + { > > Most of the code in this function is the same as dt_for_each_range(). Can we refactor it to avoid code duplication? Ok Let me try to refactor the code. Regards, Rahul > >> + unsigned int flags = bus->get_flags(ranges); >> + if ( flags & IORESOURCE_IO ) >> + continue; >> + >> + memcpy(addr, ranges, 4 * rone); >> + >> + addr += rone; >> + } >> + >> + return fdt_property(fdt, "ranges", regs, sizeof(regs)); >> +} >> + >> /* >> * Local variables: >> * mode: C >> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h >> index fd6cd00b43..580231f872 100644 >> --- a/xen/include/xen/device_tree.h >> +++ b/xen/include/xen/device_tree.h >> @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np, >> */ >> int dt_get_pci_domain_nr(struct dt_device_node *node); >> +/** >> + * dt_pci_remove_io_range - Remove the PCI I/O range property value. >> + * @fdt: Pointer to the file descriptor tree. >> + * @node: Device tree node. >> + * >> + * This function will remove the PCI IO range property from the PCI device tree >> + * node. >> + */ >> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node); >> + >> struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle); >> #ifdef CONFIG_DEVICE_TREE_DEBUG > > Cheers, > > -- > Julien Grall
© 2016 - 2024 Red Hat, Inc.