[PATCH v2 3/3] xen/pci: add discovered PCI device at boot

Mykyta Poturai posted 3 patches 2 months, 1 week ago
[PATCH v2 3/3] xen/pci: add discovered PCI device at boot
Posted by Mykyta Poturai 2 months, 1 week ago
From: Luca Fancellu <luca.fancellu@arm.com>

In dom0less mode, there is no dom0 that can call PCI physdev ops to
register PCI devices to iommu, so it needs to be done by Xen.
pci_add_device requires some default domain, we don't have hwdom, and
the guests are not yet created during the PCI init phase, so use dom_io
as a temporary sentinel before devices are assigned to their target
domains.

Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
handling to it.

In pci_add_device there is a call to xsm that doesn't consider the
requester of the function to be Xen itself, so add a check to skip
the call if the owner domain is dom_io, since it means the call is
coming directly from Xen.

Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
---
(cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
 the downstream branch poc/pci-passthrough from
 https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)

v1->v2:
* integrate add_discovered_pci_devices into existing routines
* better explain the need for dom_io
---
 xen/arch/arm/pci/pci.c                      |  1 +
 xen/drivers/passthrough/amd/pci_amd_iommu.c |  2 +-
 xen/drivers/passthrough/pci.c               | 42 ++++++++++++++-------
 xen/drivers/passthrough/vtd/iommu.c         |  2 +-
 xen/include/xen/pci.h                       |  5 +--
 5 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/xen/arch/arm/pci/pci.c b/xen/arch/arm/pci/pci.c
index 1b34e17517..909fbdd694 100644
--- a/xen/arch/arm/pci/pci.c
+++ b/xen/arch/arm/pci/pci.c
@@ -124,6 +124,7 @@ static int __init pci_init(void)
         if ( ret < 0 )
             return ret;
 
+        setup_pci_devices(dom_io, NULL);
     }
 
     return 0;
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 3a14770855..f3a83a0ab7 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -401,7 +401,7 @@ static void __hwdom_init cf_check amd_iommu_hwdom_init(struct domain *d)
 
     /* Make sure workarounds are applied (if needed) before adding devices. */
     arch_iommu_hwdom_init(d);
-    setup_hwdom_pci_devices(d, amd_iommu_add_device);
+    setup_pci_devices(d, amd_iommu_add_device);
 }
 
 static void amd_iommu_disable_domain_device(const struct domain *domain,
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 09b424d1b3..6ddc6811df 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -35,6 +35,7 @@
 #include <xen/msi.h>
 #include <xsm/xsm.h>
 #include "ats.h"
+#include "xen/dom0less-build.h"
 
 struct pci_seg {
     struct list_head alldevs_list;
@@ -676,9 +677,12 @@ int pci_add_device(uint16_t seg, uint8_t bus, uint8_t devfn,
     else
         type = "device";
 
-    ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
-    if ( ret )
-        return ret;
+    if ( d != dom_io )
+    {
+        ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
+        if ( ret )
+            return ret;
+    }
 
     ret = -ENOMEM;
 
@@ -1181,19 +1185,21 @@ int __init scan_pci_devices(void)
     return ret;
 }
 
-struct setup_hwdom {
+struct setup_ctxt {
     struct domain *d;
     int (*handler)(uint8_t devfn, struct pci_dev *pdev);
 };
 
-static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
+static void __hwdom_init setup_one_pci_device(const struct setup_ctxt *ctxt,
                                                 struct pci_dev *pdev)
 {
     u8 devfn = pdev->devfn;
-    int err;
+    int err = 0;
 
     do {
-        err = ctxt->handler(devfn, pdev);
+        if ( ctxt->handler )
+            err = ctxt->handler(devfn, pdev);
+
         if ( err )
         {
             printk(XENLOG_ERR "setup %pp for d%d failed (%d)\n",
@@ -1213,10 +1219,10 @@ static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
                ctxt->d->domain_id, err);
 }
 
-static int __hwdom_init cf_check _setup_hwdom_pci_devices(
+static int __hwdom_init cf_check _setup_pci_devices(
     struct pci_seg *pseg, void *arg)
 {
-    struct setup_hwdom *ctxt = arg;
+    struct setup_ctxt *ctxt = arg;
     int bus, devfn;
 
     for ( bus = 0; bus < 256; bus++ )
@@ -1229,18 +1235,26 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
             if ( !pdev )
                 continue;
 
+            if ( is_dom0less_mode() ) {
+                int ret = pci_add_device(pdev->seg, pdev->bus, pdev->devfn, NULL,
+                                         NUMA_NO_NODE, ctxt->d);
+                if (ret)
+                    printk(XENLOG_ERR "Failed to add PCI device %pp: %d\n", &pdev->sbdf, ret);
+                continue;
+            }
+
             if ( !pdev->domain )
             {
                 pdev->domain = ctxt->d;
                 write_lock(&ctxt->d->pci_lock);
                 list_add(&pdev->domain_list, &ctxt->d->pdev_list);
                 write_unlock(&ctxt->d->pci_lock);
-                setup_one_hwdom_device(ctxt, pdev);
+                setup_one_pci_device(ctxt, pdev);
             }
             else if ( pdev->domain == dom_xen )
             {
                 pdev->domain = ctxt->d;
-                setup_one_hwdom_device(ctxt, pdev);
+                setup_one_pci_device(ctxt, pdev);
                 pdev->domain = dom_xen;
             }
             else if ( pdev->domain != ctxt->d )
@@ -1266,13 +1280,13 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
     return 0;
 }
 
-void __hwdom_init setup_hwdom_pci_devices(
+void __hwdom_init setup_pci_devices(
     struct domain *d, int (*handler)(uint8_t devfn, struct pci_dev *pdev))
 {
-    struct setup_hwdom ctxt = { .d = d, .handler = handler };
+    struct setup_ctxt ctxt = { .d = d, .handler = handler };
 
     pcidevs_lock();
-    pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
+    pci_segments_iterate(_setup_pci_devices, &ctxt);
     pcidevs_unlock();
 }
 
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index c55f02c97e..1096c16327 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1451,7 +1451,7 @@ static void __hwdom_init cf_check intel_iommu_hwdom_init(struct domain *d)
 {
     struct acpi_drhd_unit *drhd;
 
-    setup_hwdom_pci_devices(d, setup_hwdom_device);
+    setup_pci_devices(d, setup_hwdom_device);
     setup_hwdom_rmrr(d);
     /* Make sure workarounds are applied before enabling the IOMMU(s). */
     arch_iommu_hwdom_init(d);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 89f3281b7c..61f69a8a1b 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -220,9 +220,8 @@ int scan_pci_devices(void);
 enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
 int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
 
-void setup_hwdom_pci_devices(struct domain *d,
-                             int (*handler)(uint8_t devfn,
-                                            struct pci_dev *pdev));
+void setup_pci_devices(struct domain *d, int (*handler)(uint8_t devfn,
+                                                        struct pci_dev *pdev));
 int pci_release_devices(struct domain *d);
 int pci_add_segment(u16 seg);
 const unsigned long *pci_get_ro_map(u16 seg);
-- 
2.34.1
Re: [PATCH v2 3/3] xen/pci: add discovered PCI device at boot
Posted by Jan Beulich 2 months, 1 week ago
On 20.08.2025 14:28, Mykyta Poturai wrote:
> From: Luca Fancellu <luca.fancellu@arm.com>
> 
> In dom0less mode, there is no dom0 that can call PCI physdev ops to
> register PCI devices to iommu, so it needs to be done by Xen.
> pci_add_device requires some default domain, we don't have hwdom, and
> the guests are not yet created during the PCI init phase, so use dom_io
> as a temporary sentinel before devices are assigned to their target
> domains.
> 
> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
> handling to it.
> 
> In pci_add_device there is a call to xsm that doesn't consider the
> requester of the function to be Xen itself, so add a check to skip
> the call if the owner domain is dom_io, since it means the call is
> coming directly from Xen.
> 
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
> ---
> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
>  the downstream branch poc/pci-passthrough from
>  https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
> 
> v1->v2:
> * integrate add_discovered_pci_devices into existing routines
> * better explain the need for dom_io

What I continue to miss is an explanation of why devices can't go to their
ultimate "destination" domain right away (once those have been created),
i.e. why the dom_io intermediary is necessary in the first place.

> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -35,6 +35,7 @@
>  #include <xen/msi.h>
>  #include <xsm/xsm.h>
>  #include "ats.h"
> +#include "xen/dom0less-build.h"

No, please don't, at the very least not this way (using quotes rather than
angle brackets). I may guess that it's for is_dom0less_mode(), but even
then I wonder whether that declaration wouldn't better move elsewhere. It
simply feels somewhat wrong to include this header here.

> @@ -1181,19 +1185,21 @@ int __init scan_pci_devices(void)
>      return ret;
>  }
>  
> -struct setup_hwdom {
> +struct setup_ctxt {
>      struct domain *d;
>      int (*handler)(uint8_t devfn, struct pci_dev *pdev);
>  };
>  
> -static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
> +static void __hwdom_init setup_one_pci_device(const struct setup_ctxt *ctxt,
>                                                  struct pci_dev *pdev)

Nit: Indentation then also needds to change on this following line.

>  {
>      u8 devfn = pdev->devfn;
> -    int err;
> +    int err = 0;

This doesn't suffice, as ...

>      do {
> -        err = ctxt->handler(devfn, pdev);
> +        if ( ctxt->handler )
> +            err = ctxt->handler(devfn, pdev);
> +
>          if ( err )
>          {
>              printk(XENLOG_ERR "setup %pp for d%d failed (%d)\n",

... below here we may continue the loop even if we got an error. "err"
needs setting unconditionally in the loop body, and hence maybe better
with a conditional expression.

> @@ -1229,18 +1235,26 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
>              if ( !pdev )
>                  continue;
>  
> +            if ( is_dom0less_mode() ) {

We're in a __hwdom_init function. You can't call an __init one from here.

Also nit (style): Brace placement.

> +                int ret = pci_add_device(pdev->seg, pdev->bus, pdev->devfn, NULL,
> +                                         NUMA_NO_NODE, ctxt->d);
> +                if (ret)

Nit (style): Missing blanks.

> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -220,9 +220,8 @@ int scan_pci_devices(void);
>  enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
>  int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
>  
> -void setup_hwdom_pci_devices(struct domain *d,
> -                             int (*handler)(uint8_t devfn,
> -                                            struct pci_dev *pdev));
> +void setup_pci_devices(struct domain *d, int (*handler)(uint8_t devfn,
> +                                                        struct pci_dev *pdev));

I think in this case the 2nd parameter would better remain on the following
line, to limit overall indentation.

Jan
Re: [PATCH v2 3/3] xen/pci: add discovered PCI device at boot
Posted by Mykyta Poturai 2 months, 1 week ago
On 21.08.25 12:08, Jan Beulich wrote:
> On 20.08.2025 14:28, Mykyta Poturai wrote:
>> From: Luca Fancellu <luca.fancellu@arm.com>
>>
>> In dom0less mode, there is no dom0 that can call PCI physdev ops to
>> register PCI devices to iommu, so it needs to be done by Xen.
>> pci_add_device requires some default domain, we don't have hwdom, and
>> the guests are not yet created during the PCI init phase, so use dom_io
>> as a temporary sentinel before devices are assigned to their target
>> domains.
>>
>> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
>> handling to it.
>>
>> In pci_add_device there is a call to xsm that doesn't consider the
>> requester of the function to be Xen itself, so add a check to skip
>> the call if the owner domain is dom_io, since it means the call is
>> coming directly from Xen.
>>
>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
>> ---
>> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
>>   the downstream branch poc/pci-passthrough from
>>   https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git
>>
>> v1->v2:
>> * integrate add_discovered_pci_devices into existing routines
>> * better explain the need for dom_io
> 
> What I continue to miss is an explanation of why devices can't go to their
> ultimate "destination" domain right away (once those have been created),
> i.e. why the dom_io intermediary is necessary in the first place.
> 
> Jan

I've done some testing and indeed everything seems to work if we call 
pci_add_device directly during domain construction instead of 
reassigning them. Do you think this would be a better approach? If so 
then I guess this series needs to be dropped, and I will prepare a new 
one with direct device assignment to DomUs.

-- 
Mykyta
Re: [PATCH v2 3/3] xen/pci: add discovered PCI device at boot
Posted by Jan Beulich 2 months, 1 week ago
On 22.08.2025 10:03, Mykyta Poturai wrote:
> On 21.08.25 12:08, Jan Beulich wrote:
>> On 20.08.2025 14:28, Mykyta Poturai wrote:
>>> From: Luca Fancellu <luca.fancellu@arm.com>
>>>
>>> In dom0less mode, there is no dom0 that can call PCI physdev ops to
>>> register PCI devices to iommu, so it needs to be done by Xen.
>>> pci_add_device requires some default domain, we don't have hwdom, and
>>> the guests are not yet created during the PCI init phase, so use dom_io
>>> as a temporary sentinel before devices are assigned to their target
>>> domains.
>>>
>>> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
>>> handling to it.
>>>
>>> In pci_add_device there is a call to xsm that doesn't consider the
>>> requester of the function to be Xen itself, so add a check to skip
>>> the call if the owner domain is dom_io, since it means the call is
>>> coming directly from Xen.
>>>
>>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
>>> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
>>> ---
>>> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
>>>   the downstream branch poc/pci-passthrough from
>>>   https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git
>>>
>>> v1->v2:
>>> * integrate add_discovered_pci_devices into existing routines
>>> * better explain the need for dom_io
>>
>> What I continue to miss is an explanation of why devices can't go to their
>> ultimate "destination" domain right away (once those have been created),
>> i.e. why the dom_io intermediary is necessary in the first place.
> 
> I've done some testing and indeed everything seems to work if we call 
> pci_add_device directly during domain construction instead of 
> reassigning them. Do you think this would be a better approach?

I think so, yes, but first and foremost you'll need Arm maintainer buyoff
on either approach (or yet another one).

Jan

> If so 
> then I guess this series needs to be dropped, and I will prepare a new 
> one with direct device assignment to DomUs.
Re: [PATCH v2 3/3] xen/pci: add discovered PCI device at boot
Posted by Stefano Stabellini 2 months, 1 week ago
On Fri, 22 Aug 2025, Jan Beulich wrote:
> On 22.08.2025 10:03, Mykyta Poturai wrote:
> > On 21.08.25 12:08, Jan Beulich wrote:
> >> On 20.08.2025 14:28, Mykyta Poturai wrote:
> >>> From: Luca Fancellu <luca.fancellu@arm.com>
> >>>
> >>> In dom0less mode, there is no dom0 that can call PCI physdev ops to
> >>> register PCI devices to iommu, so it needs to be done by Xen.
> >>> pci_add_device requires some default domain, we don't have hwdom, and
> >>> the guests are not yet created during the PCI init phase, so use dom_io
> >>> as a temporary sentinel before devices are assigned to their target
> >>> domains.
> >>>
> >>> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
> >>> handling to it.
> >>>
> >>> In pci_add_device there is a call to xsm that doesn't consider the
> >>> requester of the function to be Xen itself, so add a check to skip
> >>> the call if the owner domain is dom_io, since it means the call is
> >>> coming directly from Xen.
> >>>
> >>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> >>> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
> >>> ---
> >>> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
> >>>   the downstream branch poc/pci-passthrough from
> >>>   https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git
> >>>
> >>> v1->v2:
> >>> * integrate add_discovered_pci_devices into existing routines
> >>> * better explain the need for dom_io
> >>
> >> What I continue to miss is an explanation of why devices can't go to their
> >> ultimate "destination" domain right away (once those have been created),
> >> i.e. why the dom_io intermediary is necessary in the first place.
> > 
> > I've done some testing and indeed everything seems to work if we call 
> > pci_add_device directly during domain construction instead of 
> > reassigning them. Do you think this would be a better approach?
> 
> I think so, yes, but first and foremost you'll need Arm maintainer buyoff
> on either approach (or yet another one).

I am OK with that in principle