[PATCH] x86/iommu: setup MMCFG ahead of IOMMU

Roger Pau Monne posted 1 patch 2 months, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/20250819171826.60700-1-roger.pau@citrix.com
xen/arch/x86/acpi/boot.c              |  2 --
xen/arch/x86/include/asm/pci.h        |  2 ++
xen/arch/x86/pci.c                    | 16 ++++++++++++++++
xen/arch/x86/setup.c                  |  7 +++----
xen/arch/x86/x86_64/mmconfig-shared.c |  3 +++
5 files changed, 24 insertions(+), 6 deletions(-)
[PATCH] x86/iommu: setup MMCFG ahead of IOMMU
Posted by Roger Pau Monne 2 months, 1 week ago
Otherwise the PCI accesses to segments different than the first one done by
the IOMMU initialization code would silently fail by returning all ones.

Introduce a new helper, called pci_setup(), and move both the creation of
PCI segment 0 internal data structures, plus the parsing of ACPI MMCFG
table to it.

Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
---
 xen/arch/x86/acpi/boot.c              |  2 --
 xen/arch/x86/include/asm/pci.h        |  2 ++
 xen/arch/x86/pci.c                    | 16 ++++++++++++++++
 xen/arch/x86/setup.c                  |  7 +++----
 xen/arch/x86/x86_64/mmconfig-shared.c |  3 +++
 5 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/acpi/boot.c b/xen/arch/x86/acpi/boot.c
index 3901f9d9825b..1ca2360e0065 100644
--- a/xen/arch/x86/acpi/boot.c
+++ b/xen/arch/x86/acpi/boot.c
@@ -748,8 +748,6 @@ int __init acpi_boot_init(void)
 
 	acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet);
 
-	acpi_mmcfg_init();
-
 	erst_init();
 
 	acpi_hest_init();
diff --git a/xen/arch/x86/include/asm/pci.h b/xen/arch/x86/include/asm/pci.h
index 665b32116521..0b98081aeaa4 100644
--- a/xen/arch/x86/include/asm/pci.h
+++ b/xen/arch/x86/include/asm/pci.h
@@ -74,4 +74,6 @@ bool pci_check_bar(const struct pci_dev *pdev, mfn_t start, mfn_t end);
 struct rangeset;
 int pci_sanitize_bar_memory(struct rangeset *r);
 
+void pci_setup(void);
+
 #endif /* __X86_PCI_H__ */
diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
index 26bb7f6a3c3a..e75a29e851a7 100644
--- a/xen/arch/x86/pci.c
+++ b/xen/arch/x86/pci.c
@@ -6,7 +6,10 @@
 
 #include <xen/spinlock.h>
 #include <xen/pci.h>
+
+#include <asm/acpi.h>
 #include <asm/io.h>
+
 #include <xsm/xsm.h>
 
 static DEFINE_SPINLOCK(pci_config_lock);
@@ -139,6 +142,19 @@ int pci_sanitize_bar_memory(struct rangeset *r)
     return 0;
 }
 
+void __init pci_setup(void)
+{
+    /*
+     * Ahead of any ACPI table parsing make sure we have control structures
+     * for PCI segment 0.
+     */
+    if ( pci_add_segment(0) )
+        panic("Could not initialize PCI segment 0\n");
+
+    /* Parse ACPI MMCFG ahead of IOMMU, so accesses to segments > 0 is setup. */
+    acpi_mmcfg_init();
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 6fb42c5a5f95..bd648323bfed 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1938,11 +1938,10 @@ void asmlinkage __init noreturn __start_xen(void)
     setup_system_domains();
 
     /*
-     * Ahead of any ACPI table parsing make sure we have control structures
-     * for PCI segment 0.
+     * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
+     * setup, as it requires access to the PCI config space.
      */
-    if ( pci_add_segment(0) )
-        panic("Could not initialize PCI segment 0\n");
+    pci_setup();
 
     /*
      * IOMMU-related ACPI table parsing has to happen before APIC probing, for
diff --git a/xen/arch/x86/x86_64/mmconfig-shared.c b/xen/arch/x86/x86_64/mmconfig-shared.c
index f1a3d42c5b21..fbe2676f8636 100644
--- a/xen/arch/x86/x86_64/mmconfig-shared.c
+++ b/xen/arch/x86/x86_64/mmconfig-shared.c
@@ -402,6 +402,9 @@ void __init acpi_mmcfg_init(void)
 {
     bool valid = true;
 
+    if ( acpi_disabled )
+        return;
+
     /* MMCONFIG disabled */
     if ((pci_probe & PCI_PROBE_MMCONF) == 0)
         return;
-- 
2.49.0


Re: [PATCH] x86/iommu: setup MMCFG ahead of IOMMU
Posted by Andrew Cooper 2 months, 1 week ago
On 19/08/2025 6:18 pm, Roger Pau Monne wrote:
> Otherwise the PCI accesses to segments different than the first one done by
> the IOMMU initialization code would silently fail by returning all ones.
>
> Introduce a new helper, called pci_setup(), and move both the creation of
> PCI segment 0 internal data structures, plus the parsing of ACPI MMCFG
> table to it.
>
> Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>

And moving acpi_mmcfg_init() slightly earlier from acpi_boot_init() into
pci_setup().

> diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
> index 26bb7f6a3c3a..e75a29e851a7 100644
> --- a/xen/arch/x86/pci.c
> +++ b/xen/arch/x86/pci.c
> @@ -139,6 +142,19 @@ int pci_sanitize_bar_memory(struct rangeset *r)
>      return 0;
>  }
>  
> +void __init pci_setup(void)
> +{
> +    /*
> +     * Ahead of any ACPI table parsing make sure we have control structures
> +     * for PCI segment 0.
> +     */
> +    if ( pci_add_segment(0) )
> +        panic("Could not initialize PCI segment 0\n");
> +
> +    /* Parse ACPI MMCFG ahead of IOMMU, so accesses to segments > 0 is setup. */

"ahead of IOMMU" isn't helpful here because the relevant context is in
the caller.  Instead, I'd just say:

/* Parse ACPI MMCFG to see if other segments are available. */

> +    acpi_mmcfg_init();
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index 6fb42c5a5f95..bd648323bfed 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -1938,11 +1938,10 @@ void asmlinkage __init noreturn __start_xen(void)
>      setup_system_domains();
>  
>      /*
> -     * Ahead of any ACPI table parsing make sure we have control structures
> -     * for PCI segment 0.
> +     * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
> +     * setup, as it requires access to the PCI config space.
>       */

Again, this isn't terribly clear IMO.

"ahead of IOMMU setup, as the IOMMUs might not all live on segment 0." ?


> -    if ( pci_add_segment(0) )
> -        panic("Could not initialize PCI segment 0\n");
> +    pci_setup();
>  
>      /*
>       * IOMMU-related ACPI table parsing has to happen before APIC probing, for
> diff --git a/xen/arch/x86/x86_64/mmconfig-shared.c b/xen/arch/x86/x86_64/mmconfig-shared.c
> index f1a3d42c5b21..fbe2676f8636 100644
> --- a/xen/arch/x86/x86_64/mmconfig-shared.c
> +++ b/xen/arch/x86/x86_64/mmconfig-shared.c
> @@ -402,6 +402,9 @@ void __init acpi_mmcfg_init(void)
>  {
>      bool valid = true;
>  
> +    if ( acpi_disabled )
> +        return;

This is fine for the patch, making things consistent with the prior
behaviour.

However, I think it's well gone time we drop support for pre-APCI
systems, including things like acpi_disabled, etc.

Nothing good can possibly come of these codepaths these days.

~Andrew

Re: [PATCH] x86/iommu: setup MMCFG ahead of IOMMU
Posted by Roger Pau Monné 2 months, 1 week ago
On Tue, Aug 19, 2025 at 07:23:57PM +0100, Andrew Cooper wrote:
> On 19/08/2025 6:18 pm, Roger Pau Monne wrote:
> > Otherwise the PCI accesses to segments different than the first one done by
> > the IOMMU initialization code would silently fail by returning all ones.
> >
> > Introduce a new helper, called pci_setup(), and move both the creation of
> > PCI segment 0 internal data structures, plus the parsing of ACPI MMCFG
> > table to it.
> >
> > Signed-off-by: Roger Pau Monné <roger.pau@citrix.com>
> 
> And moving acpi_mmcfg_init() slightly earlier from acpi_boot_init() into
> pci_setup().
> 
> > diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
> > index 26bb7f6a3c3a..e75a29e851a7 100644
> > --- a/xen/arch/x86/pci.c
> > +++ b/xen/arch/x86/pci.c
> > @@ -139,6 +142,19 @@ int pci_sanitize_bar_memory(struct rangeset *r)
> >      return 0;
> >  }
> >  
> > +void __init pci_setup(void)
> > +{
> > +    /*
> > +     * Ahead of any ACPI table parsing make sure we have control structures
> > +     * for PCI segment 0.
> > +     */
> > +    if ( pci_add_segment(0) )
> > +        panic("Could not initialize PCI segment 0\n");
> > +
> > +    /* Parse ACPI MMCFG ahead of IOMMU, so accesses to segments > 0 is setup. */
> 
> "ahead of IOMMU" isn't helpful here because the relevant context is in
> the caller.  Instead, I'd just say:
> 
> /* Parse ACPI MMCFG to see if other segments are available. */

Sure.

> > +    acpi_mmcfg_init();
> > +}
> > +
> >  /*
> >   * Local variables:
> >   * mode: C
> > diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> > index 6fb42c5a5f95..bd648323bfed 100644
> > --- a/xen/arch/x86/setup.c
> > +++ b/xen/arch/x86/setup.c
> > @@ -1938,11 +1938,10 @@ void asmlinkage __init noreturn __start_xen(void)
> >      setup_system_domains();
> >  
> >      /*
> > -     * Ahead of any ACPI table parsing make sure we have control structures
> > -     * for PCI segment 0.
> > +     * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
> > +     * setup, as it requires access to the PCI config space.
> >       */
> 
> Again, this isn't terribly clear IMO.
> 
> "ahead of IOMMU setup, as the IOMMUs might not all live on segment 0." ?

It's not just IOMMUs, but for example on VT-d we also need to poke at
the config space of bridges, and when such bridges live in segment > 0
that results in garbage being returned.

I'm not sure acpi_iommu_init() accesses the IOMMU PCI device config
space, but it does at least access the config space of bridges in
order to detect hierarchy.  See how acpi_parse_dev_scope() performs
PCI reads.

What about using:

/*
 * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
 * setup, as devices in segment > 0 must also be discoverable.
 */

Thanks, Roger.

Re: [PATCH] x86/iommu: setup MMCFG ahead of IOMMU
Posted by Andrew Cooper 2 months, 1 week ago
On 20/08/2025 12:33 pm, Roger Pau Monné wrote:
> On Tue, Aug 19, 2025 at 07:23:57PM +0100, Andrew Cooper wrote:
>> On 19/08/2025 6:18 pm, Roger Pau Monne wrote:
>>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
>>> index 6fb42c5a5f95..bd648323bfed 100644
>>> --- a/xen/arch/x86/setup.c
>>> +++ b/xen/arch/x86/setup.c
>>> @@ -1938,11 +1938,10 @@ void asmlinkage __init noreturn __start_xen(void)
>>>      setup_system_domains();
>>>  
>>>      /*
>>> -     * Ahead of any ACPI table parsing make sure we have control structures
>>> -     * for PCI segment 0.
>>> +     * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
>>> +     * setup, as it requires access to the PCI config space.
>>>       */
>> Again, this isn't terribly clear IMO.
>>
>> "ahead of IOMMU setup, as the IOMMUs might not all live on segment 0." ?
> It's not just IOMMUs, but for example on VT-d we also need to poke at
> the config space of bridges, and when such bridges live in segment > 0
> that results in garbage being returned.
>
> I'm not sure acpi_iommu_init() accesses the IOMMU PCI device config
> space, but it does at least access the config space of bridges in
> order to detect hierarchy.  See how acpi_parse_dev_scope() performs
> PCI reads.
>
> What about using:
>
> /*
>  * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
>  * setup, as devices in segment > 0 must also be discoverable.
>  */

Yeah, that works.

With those comment adjustments, Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

Re: [PATCH] x86/iommu: setup MMCFG ahead of IOMMU
Posted by Roger Pau Monné 2 months, 1 week ago
On Wed, Aug 20, 2025 at 12:37:51PM +0100, Andrew Cooper wrote:
> On 20/08/2025 12:33 pm, Roger Pau Monné wrote:
> > On Tue, Aug 19, 2025 at 07:23:57PM +0100, Andrew Cooper wrote:
> >> On 19/08/2025 6:18 pm, Roger Pau Monne wrote:
> >>> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> >>> index 6fb42c5a5f95..bd648323bfed 100644
> >>> --- a/xen/arch/x86/setup.c
> >>> +++ b/xen/arch/x86/setup.c
> >>> @@ -1938,11 +1938,10 @@ void asmlinkage __init noreturn __start_xen(void)
> >>>      setup_system_domains();
> >>>  
> >>>      /*
> >>> -     * Ahead of any ACPI table parsing make sure we have control structures
> >>> -     * for PCI segment 0.
> >>> +     * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
> >>> +     * setup, as it requires access to the PCI config space.
> >>>       */
> >> Again, this isn't terribly clear IMO.
> >>
> >> "ahead of IOMMU setup, as the IOMMUs might not all live on segment 0." ?
> > It's not just IOMMUs, but for example on VT-d we also need to poke at
> > the config space of bridges, and when such bridges live in segment > 0
> > that results in garbage being returned.
> >
> > I'm not sure acpi_iommu_init() accesses the IOMMU PCI device config
> > space, but it does at least access the config space of bridges in
> > order to detect hierarchy.  See how acpi_parse_dev_scope() performs
> > PCI reads.
> >
> > What about using:
> >
> > /*
> >  * Initialize PCI (create segment 0, setup MMCFG access) ahead of IOMMU
> >  * setup, as devices in segment > 0 must also be discoverable.
> >  */
> 
> Yeah, that works.
> 
> With those comment adjustments, Reviewed-by: Andrew Cooper
> <andrew.cooper3@citrix.com>

Forgot to add it, this patch should have:

Fixes: 3950f2485bbc ('x86/x2APIC: defer probe until after IOMMU ACPI table parsing')

In the commit message.

Thanks, Roger.