From: Luca Fancellu <luca.fancellu@arm.com>
In dom0less mode, there is no dom0 that can call PCI physdev ops to
register PCI devices to iommu, so it needs to be done by Xen.
pci_add_device requires some default domain, we don't have hwdom, and
the guests are not yet created during the PCI init phase, so use dom_io
as a temporary sentinel before devices are assigned to their target
domains.
Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
handling to it.
In pci_add_device there is a call to xsm that doesn't consider the
requester of the function to be Xen itself, so add a check to skip
the call if the owner domain is dom_io, since it means the call is
coming directly from Xen.
Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
---
(cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
the downstream branch poc/pci-passthrough from
https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
v1->v2:
* integrate add_discovered_pci_devices into existing routines
* better explain the need for dom_io
---
xen/arch/arm/pci/pci.c | 1 +
xen/drivers/passthrough/amd/pci_amd_iommu.c | 2 +-
xen/drivers/passthrough/pci.c | 42 ++++++++++++++-------
xen/drivers/passthrough/vtd/iommu.c | 2 +-
xen/include/xen/pci.h | 5 +--
5 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/xen/arch/arm/pci/pci.c b/xen/arch/arm/pci/pci.c
index 1b34e17517..909fbdd694 100644
--- a/xen/arch/arm/pci/pci.c
+++ b/xen/arch/arm/pci/pci.c
@@ -124,6 +124,7 @@ static int __init pci_init(void)
if ( ret < 0 )
return ret;
+ setup_pci_devices(dom_io, NULL);
}
return 0;
diff --git a/xen/drivers/passthrough/amd/pci_amd_iommu.c b/xen/drivers/passthrough/amd/pci_amd_iommu.c
index 3a14770855..f3a83a0ab7 100644
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -401,7 +401,7 @@ static void __hwdom_init cf_check amd_iommu_hwdom_init(struct domain *d)
/* Make sure workarounds are applied (if needed) before adding devices. */
arch_iommu_hwdom_init(d);
- setup_hwdom_pci_devices(d, amd_iommu_add_device);
+ setup_pci_devices(d, amd_iommu_add_device);
}
static void amd_iommu_disable_domain_device(const struct domain *domain,
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 09b424d1b3..6ddc6811df 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -35,6 +35,7 @@
#include <xen/msi.h>
#include <xsm/xsm.h>
#include "ats.h"
+#include "xen/dom0less-build.h"
struct pci_seg {
struct list_head alldevs_list;
@@ -676,9 +677,12 @@ int pci_add_device(uint16_t seg, uint8_t bus, uint8_t devfn,
else
type = "device";
- ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
- if ( ret )
- return ret;
+ if ( d != dom_io )
+ {
+ ret = xsm_resource_plug_pci(XSM_PRIV, (seg << 16) | (bus << 8) | devfn);
+ if ( ret )
+ return ret;
+ }
ret = -ENOMEM;
@@ -1181,19 +1185,21 @@ int __init scan_pci_devices(void)
return ret;
}
-struct setup_hwdom {
+struct setup_ctxt {
struct domain *d;
int (*handler)(uint8_t devfn, struct pci_dev *pdev);
};
-static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
+static void __hwdom_init setup_one_pci_device(const struct setup_ctxt *ctxt,
struct pci_dev *pdev)
{
u8 devfn = pdev->devfn;
- int err;
+ int err = 0;
do {
- err = ctxt->handler(devfn, pdev);
+ if ( ctxt->handler )
+ err = ctxt->handler(devfn, pdev);
+
if ( err )
{
printk(XENLOG_ERR "setup %pp for d%d failed (%d)\n",
@@ -1213,10 +1219,10 @@ static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
ctxt->d->domain_id, err);
}
-static int __hwdom_init cf_check _setup_hwdom_pci_devices(
+static int __hwdom_init cf_check _setup_pci_devices(
struct pci_seg *pseg, void *arg)
{
- struct setup_hwdom *ctxt = arg;
+ struct setup_ctxt *ctxt = arg;
int bus, devfn;
for ( bus = 0; bus < 256; bus++ )
@@ -1229,18 +1235,26 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
if ( !pdev )
continue;
+ if ( is_dom0less_mode() ) {
+ int ret = pci_add_device(pdev->seg, pdev->bus, pdev->devfn, NULL,
+ NUMA_NO_NODE, ctxt->d);
+ if (ret)
+ printk(XENLOG_ERR "Failed to add PCI device %pp: %d\n", &pdev->sbdf, ret);
+ continue;
+ }
+
if ( !pdev->domain )
{
pdev->domain = ctxt->d;
write_lock(&ctxt->d->pci_lock);
list_add(&pdev->domain_list, &ctxt->d->pdev_list);
write_unlock(&ctxt->d->pci_lock);
- setup_one_hwdom_device(ctxt, pdev);
+ setup_one_pci_device(ctxt, pdev);
}
else if ( pdev->domain == dom_xen )
{
pdev->domain = ctxt->d;
- setup_one_hwdom_device(ctxt, pdev);
+ setup_one_pci_device(ctxt, pdev);
pdev->domain = dom_xen;
}
else if ( pdev->domain != ctxt->d )
@@ -1266,13 +1280,13 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
return 0;
}
-void __hwdom_init setup_hwdom_pci_devices(
+void __hwdom_init setup_pci_devices(
struct domain *d, int (*handler)(uint8_t devfn, struct pci_dev *pdev))
{
- struct setup_hwdom ctxt = { .d = d, .handler = handler };
+ struct setup_ctxt ctxt = { .d = d, .handler = handler };
pcidevs_lock();
- pci_segments_iterate(_setup_hwdom_pci_devices, &ctxt);
+ pci_segments_iterate(_setup_pci_devices, &ctxt);
pcidevs_unlock();
}
diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c
index c55f02c97e..1096c16327 100644
--- a/xen/drivers/passthrough/vtd/iommu.c
+++ b/xen/drivers/passthrough/vtd/iommu.c
@@ -1451,7 +1451,7 @@ static void __hwdom_init cf_check intel_iommu_hwdom_init(struct domain *d)
{
struct acpi_drhd_unit *drhd;
- setup_hwdom_pci_devices(d, setup_hwdom_device);
+ setup_pci_devices(d, setup_hwdom_device);
setup_hwdom_rmrr(d);
/* Make sure workarounds are applied before enabling the IOMMU(s). */
arch_iommu_hwdom_init(d);
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index 89f3281b7c..61f69a8a1b 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -220,9 +220,8 @@ int scan_pci_devices(void);
enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
-void setup_hwdom_pci_devices(struct domain *d,
- int (*handler)(uint8_t devfn,
- struct pci_dev *pdev));
+void setup_pci_devices(struct domain *d, int (*handler)(uint8_t devfn,
+ struct pci_dev *pdev));
int pci_release_devices(struct domain *d);
int pci_add_segment(u16 seg);
const unsigned long *pci_get_ro_map(u16 seg);
--
2.34.1
On 20.08.2025 14:28, Mykyta Poturai wrote:
> From: Luca Fancellu <luca.fancellu@arm.com>
>
> In dom0less mode, there is no dom0 that can call PCI physdev ops to
> register PCI devices to iommu, so it needs to be done by Xen.
> pci_add_device requires some default domain, we don't have hwdom, and
> the guests are not yet created during the PCI init phase, so use dom_io
> as a temporary sentinel before devices are assigned to their target
> domains.
>
> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less
> handling to it.
>
> In pci_add_device there is a call to xsm that doesn't consider the
> requester of the function to be Xen itself, so add a check to skip
> the call if the owner domain is dom_io, since it means the call is
> coming directly from Xen.
>
> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com>
> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com>
> ---
> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from
> the downstream branch poc/pci-passthrough from
> https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git)
>
> v1->v2:
> * integrate add_discovered_pci_devices into existing routines
> * better explain the need for dom_io
What I continue to miss is an explanation of why devices can't go to their
ultimate "destination" domain right away (once those have been created),
i.e. why the dom_io intermediary is necessary in the first place.
> --- a/xen/drivers/passthrough/pci.c
> +++ b/xen/drivers/passthrough/pci.c
> @@ -35,6 +35,7 @@
> #include <xen/msi.h>
> #include <xsm/xsm.h>
> #include "ats.h"
> +#include "xen/dom0less-build.h"
No, please don't, at the very least not this way (using quotes rather than
angle brackets). I may guess that it's for is_dom0less_mode(), but even
then I wonder whether that declaration wouldn't better move elsewhere. It
simply feels somewhat wrong to include this header here.
> @@ -1181,19 +1185,21 @@ int __init scan_pci_devices(void)
> return ret;
> }
>
> -struct setup_hwdom {
> +struct setup_ctxt {
> struct domain *d;
> int (*handler)(uint8_t devfn, struct pci_dev *pdev);
> };
>
> -static void __hwdom_init setup_one_hwdom_device(const struct setup_hwdom *ctxt,
> +static void __hwdom_init setup_one_pci_device(const struct setup_ctxt *ctxt,
> struct pci_dev *pdev)
Nit: Indentation then also needds to change on this following line.
> {
> u8 devfn = pdev->devfn;
> - int err;
> + int err = 0;
This doesn't suffice, as ...
> do {
> - err = ctxt->handler(devfn, pdev);
> + if ( ctxt->handler )
> + err = ctxt->handler(devfn, pdev);
> +
> if ( err )
> {
> printk(XENLOG_ERR "setup %pp for d%d failed (%d)\n",
... below here we may continue the loop even if we got an error. "err"
needs setting unconditionally in the loop body, and hence maybe better
with a conditional expression.
> @@ -1229,18 +1235,26 @@ static int __hwdom_init cf_check _setup_hwdom_pci_devices(
> if ( !pdev )
> continue;
>
> + if ( is_dom0less_mode() ) {
We're in a __hwdom_init function. You can't call an __init one from here.
Also nit (style): Brace placement.
> + int ret = pci_add_device(pdev->seg, pdev->bus, pdev->devfn, NULL,
> + NUMA_NO_NODE, ctxt->d);
> + if (ret)
Nit (style): Missing blanks.
> --- a/xen/include/xen/pci.h
> +++ b/xen/include/xen/pci.h
> @@ -220,9 +220,8 @@ int scan_pci_devices(void);
> enum pdev_type pdev_type(u16 seg, u8 bus, u8 devfn);
> int find_upstream_bridge(u16 seg, u8 *bus, u8 *devfn, u8 *secbus);
>
> -void setup_hwdom_pci_devices(struct domain *d,
> - int (*handler)(uint8_t devfn,
> - struct pci_dev *pdev));
> +void setup_pci_devices(struct domain *d, int (*handler)(uint8_t devfn,
> + struct pci_dev *pdev));
I think in this case the 2nd parameter would better remain on the following
line, to limit overall indentation.
Jan
On 21.08.25 12:08, Jan Beulich wrote: > On 20.08.2025 14:28, Mykyta Poturai wrote: >> From: Luca Fancellu <luca.fancellu@arm.com> >> >> In dom0less mode, there is no dom0 that can call PCI physdev ops to >> register PCI devices to iommu, so it needs to be done by Xen. >> pci_add_device requires some default domain, we don't have hwdom, and >> the guests are not yet created during the PCI init phase, so use dom_io >> as a temporary sentinel before devices are assigned to their target >> domains. >> >> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less >> handling to it. >> >> In pci_add_device there is a call to xsm that doesn't consider the >> requester of the function to be Xen itself, so add a check to skip >> the call if the owner domain is dom_io, since it means the call is >> coming directly from Xen. >> >> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com> >> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com> >> --- >> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from >> the downstream branch poc/pci-passthrough from >> https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git >> >> v1->v2: >> * integrate add_discovered_pci_devices into existing routines >> * better explain the need for dom_io > > What I continue to miss is an explanation of why devices can't go to their > ultimate "destination" domain right away (once those have been created), > i.e. why the dom_io intermediary is necessary in the first place. > > Jan I've done some testing and indeed everything seems to work if we call pci_add_device directly during domain construction instead of reassigning them. Do you think this would be a better approach? If so then I guess this series needs to be dropped, and I will prepare a new one with direct device assignment to DomUs. -- Mykyta
On 22.08.2025 10:03, Mykyta Poturai wrote: > On 21.08.25 12:08, Jan Beulich wrote: >> On 20.08.2025 14:28, Mykyta Poturai wrote: >>> From: Luca Fancellu <luca.fancellu@arm.com> >>> >>> In dom0less mode, there is no dom0 that can call PCI physdev ops to >>> register PCI devices to iommu, so it needs to be done by Xen. >>> pci_add_device requires some default domain, we don't have hwdom, and >>> the guests are not yet created during the PCI init phase, so use dom_io >>> as a temporary sentinel before devices are assigned to their target >>> domains. >>> >>> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less >>> handling to it. >>> >>> In pci_add_device there is a call to xsm that doesn't consider the >>> requester of the function to be Xen itself, so add a check to skip >>> the call if the owner domain is dom_io, since it means the call is >>> coming directly from Xen. >>> >>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com> >>> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com> >>> --- >>> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from >>> the downstream branch poc/pci-passthrough from >>> https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git >>> >>> v1->v2: >>> * integrate add_discovered_pci_devices into existing routines >>> * better explain the need for dom_io >> >> What I continue to miss is an explanation of why devices can't go to their >> ultimate "destination" domain right away (once those have been created), >> i.e. why the dom_io intermediary is necessary in the first place. > > I've done some testing and indeed everything seems to work if we call > pci_add_device directly during domain construction instead of > reassigning them. Do you think this would be a better approach? I think so, yes, but first and foremost you'll need Arm maintainer buyoff on either approach (or yet another one). Jan > If so > then I guess this series needs to be dropped, and I will prepare a new > one with direct device assignment to DomUs.
On Fri, 22 Aug 2025, Jan Beulich wrote: > On 22.08.2025 10:03, Mykyta Poturai wrote: > > On 21.08.25 12:08, Jan Beulich wrote: > >> On 20.08.2025 14:28, Mykyta Poturai wrote: > >>> From: Luca Fancellu <luca.fancellu@arm.com> > >>> > >>> In dom0less mode, there is no dom0 that can call PCI physdev ops to > >>> register PCI devices to iommu, so it needs to be done by Xen. > >>> pci_add_device requires some default domain, we don't have hwdom, and > >>> the guests are not yet created during the PCI init phase, so use dom_io > >>> as a temporary sentinel before devices are assigned to their target > >>> domains. > >>> > >>> Rename setup_hwdom_pci_devices to setup_pci_devices and add dom0less > >>> handling to it. > >>> > >>> In pci_add_device there is a call to xsm that doesn't consider the > >>> requester of the function to be Xen itself, so add a check to skip > >>> the call if the owner domain is dom_io, since it means the call is > >>> coming directly from Xen. > >>> > >>> Signed-off-by: Luca Fancellu <luca.fancellu@arm.com> > >>> Signed-off-by: Mykyta Poturai <mykyta_poturai@epam.com> > >>> --- > >>> (cherry picked from commit eff51e50021b75f5a50533f7de681b2ce044f5bd from > >>> the downstream branch poc/pci-passthrough from > >>> https://gitlab.com/xen-project/people/bmarquis/xen-arm-poc.git > >>> > >>> v1->v2: > >>> * integrate add_discovered_pci_devices into existing routines > >>> * better explain the need for dom_io > >> > >> What I continue to miss is an explanation of why devices can't go to their > >> ultimate "destination" domain right away (once those have been created), > >> i.e. why the dom_io intermediary is necessary in the first place. > > > > I've done some testing and indeed everything seems to work if we call > > pci_add_device directly during domain construction instead of > > reassigning them. Do you think this would be a better approach? > > I think so, yes, but first and foremost you'll need Arm maintainer buyoff > on either approach (or yet another one). I am OK with that in principle
© 2016 - 2025 Red Hat, Inc.