[PATCH v2] xen/arm: vpci: remove PCI I/O ranges property value

Rahul Singh posted 1 patch 2 years, 3 months ago
Test gitlab-ci failed
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/8ea25f00c8641bfd95a4d8444b82ca2ac3ee5ce0.1644939115.git.rahul.singh@arm.com
xen/arch/arm/domain_build.c   | 29 +++++++++++++++
xen/common/device_tree.c      | 69 +++++++++++++++++++++++++++++++++++
xen/include/xen/device_tree.h | 10 +++++
3 files changed, 108 insertions(+)
[PATCH v2] xen/arm: vpci: remove PCI I/O ranges property value
Posted by Rahul Singh 2 years, 3 months ago
PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
also there is no vpci trap handler register for IO bar.

Remove PCI I/O ranges property value from dom0 device tree node so that
dom0 linux will not allocate I/O space for PCI devices if
pci-passthrough is enabled.

Signed-off-by: Rahul Singh <rahul.singh@arm.com>
---
 xen/arch/arm/domain_build.c   | 29 +++++++++++++++
 xen/common/device_tree.c      | 69 +++++++++++++++++++++++++++++++++++
 xen/include/xen/device_tree.h | 10 +++++
 3 files changed, 108 insertions(+)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 6931c022a2..7cfe64fe97 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
 }
 #endif
 
+/*
+ * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
+ * there is no trap handler registered for IO bar, therefore remove the IO
+ * range property from the device tree node for dom0.
+ */
+static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
+                                      const struct dt_device_node *node)
+{
+    if ( !is_pci_passthrough_enabled() )
+        return 0;
+
+    if ( !dt_device_type_is_equal(node, "pci") )
+        return 0;
+
+    /*
+     * The current heuristic assumes that a device is a host bridge
+     * if the type is "pci" and then parent type is not "pci".
+     */
+    if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
+        return 0;
+
+    return dt_pci_remove_io_ranges(kinfo->fdt, node);
+}
+
+
 /*
  * When PCI passthrough is available we want to keep the
  * "linux,pci-domain" in sync for every host bridge.
@@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
     if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
         iommu_node = NULL;
 
+    res = handle_linux_pci_io_ranges(kinfo, node);
+    if ( res )
+        return res;
+
     dt_for_each_property_node (node, prop)
     {
         const void *prop_data = prop->value;
diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
index 4aae281e89..55a883e0f6 100644
--- a/xen/common/device_tree.c
+++ b/xen/common/device_tree.c
@@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
     return (u16)domain;
 }
 
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
+{
+    const struct dt_device_node *parent = NULL;
+    const struct dt_bus *bus, *pbus;
+    unsigned int rlen;
+    int na, ns, pna, pns, rone;
+    const __be32 *ranges;
+    __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
+               * 2];
+    __be32 *addr = &regs[0];
+
+    bus = dt_match_bus(dev);
+    if ( !bus )
+        return 0; /* device is not a bus */
+
+    parent = dt_get_parent(dev);
+    if ( !parent )
+        return -EINVAL;
+
+    ranges = dt_get_property(dev, "ranges", &rlen);
+    if ( !ranges )
+    {
+        printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
+               dev->full_name);
+        return -EINVAL;
+    }
+    if ( !rlen ) /* Nothing to do */
+        return 0;
+
+    bus->count_cells(dev, &na, &ns);
+    if ( !DT_CHECK_COUNTS(na, ns) )
+    {
+        printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
+               dev->full_name);
+        return -EINVAL;
+    }
+
+    pbus = dt_match_bus(parent);
+    if ( !pbus )
+    {
+        printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
+        return -EINVAL;
+    }
+
+    pbus->count_cells(dev, &pna, &pns);
+    if ( !DT_CHECK_COUNTS(pna, pns) )
+    {
+        printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
+               dev->full_name);
+        return -EINVAL;
+    }
+
+    /* Now walk through the ranges */
+    rlen /= 4;
+    rone = na + pna + ns;
+    for ( ; rlen >= rone; rlen -= rone, ranges += rone )
+    {
+        unsigned int flags = bus->get_flags(ranges);
+        if ( flags & IORESOURCE_IO )
+            continue;
+
+        memcpy(addr, ranges, 4 * rone);
+
+        addr += rone;
+    }
+
+    return fdt_property(fdt, "ranges", regs, sizeof(regs));
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
index fd6cd00b43..580231f872 100644
--- a/xen/include/xen/device_tree.h
+++ b/xen/include/xen/device_tree.h
@@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
  */
 int dt_get_pci_domain_nr(struct dt_device_node *node);
 
+/**
+ * dt_pci_remove_io_range - Remove the PCI I/O range property value.
+ * @fdt: Pointer to the file descriptor tree.
+ * @node: Device tree node.
+ *
+ * This function will remove the PCI IO range property from the PCI device tree
+ * node.
+ */
+int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
+
 struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
 
 #ifdef CONFIG_DEVICE_TREE_DEBUG
-- 
2.25.1


Re: [PATCH v2] xen/arm: vpci: remove PCI I/O ranges property value
Posted by Julien Grall 2 years, 2 months ago
Hi Rahul,

On 15/02/2022 15:36, Rahul Singh wrote:
> PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
> also there is no vpci trap handler register for IO bar.
> 
> Remove PCI I/O ranges property value from dom0 device tree node so that
> dom0 linux will not allocate I/O space for PCI devices if
> pci-passthrough is enabled.
> 
> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
> ---
>   xen/arch/arm/domain_build.c   | 29 +++++++++++++++
>   xen/common/device_tree.c      | 69 +++++++++++++++++++++++++++++++++++
>   xen/include/xen/device_tree.h | 10 +++++
>   3 files changed, 108 insertions(+)

For future version, please add a changelog. This helps to figure out 
what changed more easily.

> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index 6931c022a2..7cfe64fe97 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
>   }
>   #endif
>   
> +/*
> + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
> + * there is no trap handler registered for IO bar, therefore remove the IO
> + * range property from the device tree node for dom0.
> + */
> +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
> +                                      const struct dt_device_node *node)
> +{
> +    if ( !is_pci_passthrough_enabled() )
> +        return 0;
> +
> +    if ( !dt_device_type_is_equal(node, "pci") )
> +        return 0;
> +
> +    /*
> +     * The current heuristic assumes that a device is a host bridge
> +     * if the type is "pci" and then parent type is not "pci".
> +     */
> +    if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
> +        return 0;


The logic above is exactly the same as in handle_linux_pci_domain(). Can 
we create an helper that could be used by both functions? This would 
help to keep the logic synchronized.

> +
> +    return dt_pci_remove_io_ranges(kinfo->fdt, node);
> +}
> +
> +
>   /*
>    * When PCI passthrough is available we want to keep the
>    * "linux,pci-domain" in sync for every host bridge.
> @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
>       if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
>           iommu_node = NULL;
>   
> +    res = handle_linux_pci_io_ranges(kinfo, node);
> +    if ( res )
> +        return res;
> +
>       dt_for_each_property_node (node, prop)
>       {
>           const void *prop_data = prop->value;
> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
> index 4aae281e89..55a883e0f6 100644
> --- a/xen/common/device_tree.c
> +++ b/xen/common/device_tree.c

If I am not mistaken, the file common/device_tree.c is so far only 
containing code to parse the host device-tree. But now...

> @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
>       return (u16)domain;
>   }
>   
> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)

you are introducing code to write the domain device-tree. I understand 
this is because dt_match_bus() is internal. However, I would rather 
prefer if we export dt_match_bus() & co and move this code to under 
arch/arm/pci/. Maybe we should introduce a file domain_build.c.

Furthermore, the name of the function doesn't really match what the 
function does. It will generate "ranges" for the hostbridge and remove 
the I/O. We may want to perform other modifications on the range. So I 
would name the function something like:

domain_build_generate_hostbridge_range()

> +    const struct dt_device_node *parent = NULL;
> +    const struct dt_bus *bus, *pbus;
> +    unsigned int rlen;
> +    int na, ns, pna, pns, rone;
> +    const __be32 *ranges;
> +    __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)

GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no 
guarantee this will be bigger that what the host device-tree supports.

So you want to use DT_MAX_ADDR_CELLS here.

> +               * 2];
Looking at the code below. I couldn't find any check guaranteing the 
static array will be big enough to store the ranges provided by the host DT.

> +    __be32 *addr = &regs[0];
> +
> +    bus = dt_match_bus(dev);
> +    if ( !bus )
> +        return 0; /* device is not a bus */
> +
> +    parent = dt_get_parent(dev);
> +    if ( !parent )
> +        return -EINVAL;
> +
> +    ranges = dt_get_property(dev, "ranges", &rlen);
> +    if ( !ranges )
> +    {
> +        printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
> +               dev->full_name);
> +        return -EINVAL;
> +    }
> +    if ( !rlen ) /* Nothing to do */
> +        return 0;
> +
> +    bus->count_cells(dev, &na, &ns);
> +    if ( !DT_CHECK_COUNTS(na, ns) )
> +    {
> +        printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
> +               dev->full_name);
> +        return -EINVAL;
> +    }
> +
> +    pbus = dt_match_bus(parent);
> +    if ( !pbus )
> +    {
> +        printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
> +        return -EINVAL;
> +    }
> +
> +    pbus->count_cells(dev, &pna, &pns);
> +    if ( !DT_CHECK_COUNTS(pna, pns) )
> +    {
> +        printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
> +               dev->full_name);
> +        return -EINVAL;
> +    }
> +
> +    /* Now walk through the ranges */
> +    rlen /= 4;
> +    rone = na + pna + ns;
> +    for ( ; rlen >= rone; rlen -= rone, ranges += rone )
> +    {

Most of the code in this function is the same as dt_for_each_range(). 
Can we refactor it to avoid code duplication?

> +        unsigned int flags = bus->get_flags(ranges);
> +        if ( flags & IORESOURCE_IO )
> +            continue;
> +
> +        memcpy(addr, ranges, 4 * rone);
> +
> +        addr += rone;
> +    }
> +
> +    return fdt_property(fdt, "ranges", regs, sizeof(regs));
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
> index fd6cd00b43..580231f872 100644
> --- a/xen/include/xen/device_tree.h
> +++ b/xen/include/xen/device_tree.h
> @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
>    */
>   int dt_get_pci_domain_nr(struct dt_device_node *node);
>   
> +/**
> + * dt_pci_remove_io_range - Remove the PCI I/O range property value.
> + * @fdt: Pointer to the file descriptor tree.
> + * @node: Device tree node.
> + *
> + * This function will remove the PCI IO range property from the PCI device tree
> + * node.
> + */
> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
> +
>   struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>   
>   #ifdef CONFIG_DEVICE_TREE_DEBUG

Cheers,

-- 
Julien Grall

Re: [PATCH v2] xen/arm: vpci: remove PCI I/O ranges property value
Posted by Rahul Singh 2 years, 2 months ago
Hi Julien,

Thanks for reviewing the code.

> On 25 Feb 2022, at 7:46 pm, Julien Grall <julien@xen.org> wrote:
> 
> Hi Rahul,
> 
> On 15/02/2022 15:36, Rahul Singh wrote:
>> PCI I/O space are not mapped to dom0 when PCI passthrough is enabled,
>> also there is no vpci trap handler register for IO bar.
>> Remove PCI I/O ranges property value from dom0 device tree node so that
>> dom0 linux will not allocate I/O space for PCI devices if
>> pci-passthrough is enabled.
>> Signed-off-by: Rahul Singh <rahul.singh@arm.com>
>> ---
>>  xen/arch/arm/domain_build.c   | 29 +++++++++++++++
>>  xen/common/device_tree.c      | 69 +++++++++++++++++++++++++++++++++++
>>  xen/include/xen/device_tree.h | 10 +++++
>>  3 files changed, 108 insertions(+)
> 
> For future version, please add a changelog. This helps to figure out what changed more easily.

Ok. I will add the changelog in next version.

> 
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index 6931c022a2..7cfe64fe97 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -648,6 +648,31 @@ static void __init allocate_static_memory(struct domain *d,
>>  }
>>  #endif
>>  +/*
>> + * PCI IO bar are not mapped to dom0 when PCI passthrough is enabled, also
>> + * there is no trap handler registered for IO bar, therefore remove the IO
>> + * range property from the device tree node for dom0.
>> + */
>> +static int handle_linux_pci_io_ranges(struct kernel_info *kinfo,
>> +                                      const struct dt_device_node *node)
>> +{
>> +    if ( !is_pci_passthrough_enabled() )
>> +        return 0;
>> +
>> +    if ( !dt_device_type_is_equal(node, "pci") )
>> +        return 0;
>> +
>> +    /*
>> +     * The current heuristic assumes that a device is a host bridge
>> +     * if the type is "pci" and then parent type is not "pci".
>> +     */
>> +    if ( node->parent && dt_device_type_is_equal(node->parent, "pci") )
>> +        return 0;
> 
> 
> The logic above is exactly the same as in handle_linux_pci_domain(). Can we create an helper that could be used by both functions? This would help to keep the logic synchronized.

Ok. I will create the helper name “dt_node_check_pci_hostbridge(..)”.

> 
>> +
>> +    return dt_pci_remove_io_ranges(kinfo->fdt, node);
>> +}
>> +
>> +
>>  /*
>>   * When PCI passthrough is available we want to keep the
>>   * "linux,pci-domain" in sync for every host bridge.
>> @@ -723,6 +748,10 @@ static int __init write_properties(struct domain *d, struct kernel_info *kinfo,
>>      if ( iommu_node && device_get_class(iommu_node) != DEVICE_IOMMU )
>>          iommu_node = NULL;
>>  +    res = handle_linux_pci_io_ranges(kinfo, node);
>> +    if ( res )
>> +        return res;
>> +
>>      dt_for_each_property_node (node, prop)
>>      {
>>          const void *prop_data = prop->value;
>> diff --git a/xen/common/device_tree.c b/xen/common/device_tree.c
>> index 4aae281e89..55a883e0f6 100644
>> --- a/xen/common/device_tree.c
>> +++ b/xen/common/device_tree.c
> 
> If I am not mistaken, the file common/device_tree.c is so far only containing code to parse the host device-tree. But now...
> 
>> @@ -2195,6 +2195,75 @@ int dt_get_pci_domain_nr(struct dt_device_node *node)
>>      return (u16)domain;
>>  }
>>  +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *dev)
> 
> you are introducing code to write the domain device-tree. I understand this is because dt_match_bus() is internal. However, I would rather prefer if we export dt_match_bus() & co and move this code to under arch/arm/pci/. Maybe we should introduce a file domain_build.c.
> 
> Furthermore, the name of the function doesn't really match what the function does. It will generate "ranges" for the hostbridge and remove the I/O. We may want to perform other modifications on the range. So I would name the function something like:
> 
> domain_build_generate_hostbridge_range()

I will modify the code based on your comment in next version.

> 
>> +    const struct dt_device_node *parent = NULL;
>> +    const struct dt_bus *bus, *pbus;
>> +    unsigned int rlen;
>> +    int na, ns, pna, pns, rone;
>> +    const __be32 *ranges;
>> +    __be32 regs[((GUEST_ROOT_ADDRESS_CELLS * 2) + GUEST_ROOT_SIZE_CELLS + 1)
> 
> GUEST_ROOT_*_CELLS are only valid for domU. In theory, there are no guarantee this will be bigger that what the host device-tree supports.
> 
> So you want to use DT_MAX_ADDR_CELLS here.
> 
>> +               * 2];
> Looking at the code below. I couldn't find any check guaranteing the static array will be big enough to store the ranges provided by the host DT.

Let me fix this in next version.

> 
>> +    __be32 *addr = &regs[0];
>> +
>> +    bus = dt_match_bus(dev);
>> +    if ( !bus )
>> +        return 0; /* device is not a bus */
>> +
>> +    parent = dt_get_parent(dev);
>> +    if ( !parent )
>> +        return -EINVAL;
>> +
>> +    ranges = dt_get_property(dev, "ranges", &rlen);
>> +    if ( !ranges )
>> +    {
>> +        printk(XENLOG_ERR "DT: no ranges; cannot enumerate %s\n",
>> +               dev->full_name);
>> +        return -EINVAL;
>> +    }
>> +    if ( !rlen ) /* Nothing to do */
>> +        return 0;
>> +
>> +    bus->count_cells(dev, &na, &ns);
>> +    if ( !DT_CHECK_COUNTS(na, ns) )
>> +    {
>> +        printk(XENLOG_ERR "dt_parse: Bad cell count for device %s\n",
>> +               dev->full_name);
>> +        return -EINVAL;
>> +    }
>> +
>> +    pbus = dt_match_bus(parent);
>> +    if ( !pbus )
>> +    {
>> +        printk(XENLOG_ERR "DT: %s is not a valid bus\n", parent->full_name);
>> +        return -EINVAL;
>> +    }
>> +
>> +    pbus->count_cells(dev, &pna, &pns);
>> +    if ( !DT_CHECK_COUNTS(pna, pns) )
>> +    {
>> +        printk(XENLOG_ERR "dt_parse: Bad cell count for parent %s\n",
>> +               dev->full_name);
>> +        return -EINVAL;
>> +    }
>> +
>> +    /* Now walk through the ranges */
>> +    rlen /= 4;
>> +    rone = na + pna + ns;
>> +    for ( ; rlen >= rone; rlen -= rone, ranges += rone )
>> +    {
> 
> Most of the code in this function is the same as dt_for_each_range(). Can we refactor it to avoid code duplication?

Ok Let me try to refactor the code.

Regards,
Rahul
> 
>> +        unsigned int flags = bus->get_flags(ranges);
>> +        if ( flags & IORESOURCE_IO )
>> +            continue;
>> +
>> +        memcpy(addr, ranges, 4 * rone);
>> +
>> +        addr += rone;
>> +    }
>> +
>> +    return fdt_property(fdt, "ranges", regs, sizeof(regs));
>> +}
>> +
>>  /*
>>   * Local variables:
>>   * mode: C
>> diff --git a/xen/include/xen/device_tree.h b/xen/include/xen/device_tree.h
>> index fd6cd00b43..580231f872 100644
>> --- a/xen/include/xen/device_tree.h
>> +++ b/xen/include/xen/device_tree.h
>> @@ -849,6 +849,16 @@ int dt_count_phandle_with_args(const struct dt_device_node *np,
>>   */
>>  int dt_get_pci_domain_nr(struct dt_device_node *node);
>>  +/**
>> + * dt_pci_remove_io_range - Remove the PCI I/O range property value.
>> + * @fdt: Pointer to the file descriptor tree.
>> + * @node: Device tree node.
>> + *
>> + * This function will remove the PCI IO range property from the PCI device tree
>> + * node.
>> + */
>> +int dt_pci_remove_io_ranges(void *fdt, const struct dt_device_node *node);
>> +
>>  struct dt_device_node *dt_find_node_by_phandle(dt_phandle handle);
>>    #ifdef CONFIG_DEVICE_TREE_DEBUG
> 
> Cheers,
> 
> -- 
> Julien Grall