x2apic_bsp_setup() gets called ahead of iommu_setup(), and since x2APIC
mode (physical vs clustered) depends on iommu_intremap, that variable
needs to be set to off as soon as we know we can't / won't enable
interrupt remapping, i.e. in particular when parsing of the respective
ACPI tables failed. Move the turning off of iommu_intremap from AMD
specific code into acpi_iommu_init(), accompanying it by clearing of
iommu_enable.
Take the opportunity and also fully skip ACPI table parsing logic on
VT-d when both "iommu=off" and "iommu=no-intremap" are in effect anyway,
like was already the case for AMD.
The tag below only references the commit uncovering a pre-existing
anomaly.
Fixes: d8bd82327b0f ("AMD/IOMMU: obtain IVHD type to use earlier")
Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
While the change here deals with apic_x2apic_probe() as called from
x2apic_bsp_setup(), the check_x2apic_preenabled() path looks to be
similarly affected. That call occurs before acpi_boot_init(), which is
what calls acpi_iommu_init(). The ordering in setup.c is in part
relatively fragile, which is why for the moment I'm still hesitant to
move the generic_apic_probe() call down. Plus I don't have easy access
to a suitable system to test this case. Thoughts?
---
v2: Treat iommu_enable and iommu_intremap as separate options.
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
@@ -183,9 +183,6 @@ int __init acpi_ivrs_init(void)
{
int rc;
- if ( !iommu_enable && !iommu_intremap )
- return 0;
-
rc = amd_iommu_get_supported_ivhd_type();
if ( rc < 0 )
return rc;
@@ -193,10 +190,7 @@ int __init acpi_ivrs_init(void)
ivhd_type = rc;
if ( (amd_iommu_detect_acpi() !=0) || (iommu_found() == 0) )
- {
- iommu_intremap = iommu_intremap_off;
return -ENODEV;
- }
iommu_init_ops = &_iommu_init_ops;
--- a/xen/drivers/passthrough/vtd/dmar.c
+++ b/xen/drivers/passthrough/vtd/dmar.c
@@ -777,11 +777,7 @@ static int __init acpi_parse_dmar(struct
dmar = (struct acpi_table_dmar *)table;
dmar_flags = dmar->flags;
- if ( !iommu_enable && !iommu_intremap )
- {
- ret = -EINVAL;
- goto out;
- }
+ ASSERT(iommu_enable || iommu_intremap);
if ( !dmar->width )
{
--- a/xen/drivers/passthrough/x86/iommu.c
+++ b/xen/drivers/passthrough/x86/iommu.c
@@ -41,6 +41,24 @@ enum iommu_intremap __read_mostly iommu_
bool __read_mostly iommu_intpost;
#endif
+void __init acpi_iommu_init(void)
+{
+ int ret;
+
+ if ( !iommu_enable && !iommu_intremap )
+ return;
+
+ ret = acpi_dmar_init();
+ if ( ret == -ENODEV )
+ ret = acpi_ivrs_init();
+
+ if ( ret )
+ {
+ iommu_enable = false;
+ iommu_intremap = iommu_intremap_off;
+ }
+}
+
int __init iommu_hardware_setup(void)
{
struct IO_APIC_route_entry **ioapic_entries = NULL;
--- a/xen/include/asm-x86/acpi.h
+++ b/xen/include/asm-x86/acpi.h
@@ -141,16 +141,10 @@ extern u32 x86_acpiid_to_apicid[];
extern u32 pmtmr_ioport;
extern unsigned int pmtmr_width;
+void acpi_iommu_init(void);
int acpi_dmar_init(void);
int acpi_ivrs_init(void);
-static inline int acpi_iommu_init(void)
-{
- int ret = acpi_dmar_init();
-
- return ret == -ENODEV ? acpi_ivrs_init() : ret;
-}
-
void acpi_mmcfg_init(void);
/* Incremented whenever we transition through S3. Value is 1 during boot. */
On Thu, Oct 21, 2021 at 11:58:18AM +0200, Jan Beulich wrote:
> x2apic_bsp_setup() gets called ahead of iommu_setup(), and since x2APIC
> mode (physical vs clustered) depends on iommu_intremap, that variable
> needs to be set to off as soon as we know we can't / won't enable
> interrupt remapping, i.e. in particular when parsing of the respective
> ACPI tables failed. Move the turning off of iommu_intremap from AMD
> specific code into acpi_iommu_init(), accompanying it by clearing of
> iommu_enable.
>
> Take the opportunity and also fully skip ACPI table parsing logic on
> VT-d when both "iommu=off" and "iommu=no-intremap" are in effect anyway,
> like was already the case for AMD.
>
> The tag below only references the commit uncovering a pre-existing
> anomaly.
>
> Fixes: d8bd82327b0f ("AMD/IOMMU: obtain IVHD type to use earlier")
> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
> ---
> While the change here deals with apic_x2apic_probe() as called from
> x2apic_bsp_setup(), the check_x2apic_preenabled() path looks to be
> similarly affected. That call occurs before acpi_boot_init(), which is
> what calls acpi_iommu_init(). The ordering in setup.c is in part
> relatively fragile, which is why for the moment I'm still hesitant to
> move the generic_apic_probe() call down. Plus I don't have easy access
> to a suitable system to test this case. Thoughts?
Indeed, that seems it could go quite wrong, as apic_x2apic_probe will
see iommu_intremap == iommu_intremap_full (the default value) and thus
could choose cluster mode without real interrupt remapping support.
At first sight it would seem possible to move lower down, but as you
say, this is all quite fragile. It's even made worse because we lack a
strict ordering discipline or any kind of dependency checking, so even
if we mess up the order it's likely to go unnoticed unless someone
tests on an affected system.
While we can try to solve this for the upcoming release, long term we
need a stricter ordering, either as a comment, or even better enforced
somehow in code. The x2APIC vs IOMMU ordering has bitten us multiple
times and we should see about implementing a more robust solution.
Thanks, Roger.
On 22.10.2021 17:52, Roger Pau Monné wrote:
> On Thu, Oct 21, 2021 at 11:58:18AM +0200, Jan Beulich wrote:
>> x2apic_bsp_setup() gets called ahead of iommu_setup(), and since x2APIC
>> mode (physical vs clustered) depends on iommu_intremap, that variable
>> needs to be set to off as soon as we know we can't / won't enable
>> interrupt remapping, i.e. in particular when parsing of the respective
>> ACPI tables failed. Move the turning off of iommu_intremap from AMD
>> specific code into acpi_iommu_init(), accompanying it by clearing of
>> iommu_enable.
>>
>> Take the opportunity and also fully skip ACPI table parsing logic on
>> VT-d when both "iommu=off" and "iommu=no-intremap" are in effect anyway,
>> like was already the case for AMD.
>>
>> The tag below only references the commit uncovering a pre-existing
>> anomaly.
>>
>> Fixes: d8bd82327b0f ("AMD/IOMMU: obtain IVHD type to use earlier")
>> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
>> Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Thanks.
>> ---
>> While the change here deals with apic_x2apic_probe() as called from
>> x2apic_bsp_setup(), the check_x2apic_preenabled() path looks to be
>> similarly affected. That call occurs before acpi_boot_init(), which is
>> what calls acpi_iommu_init(). The ordering in setup.c is in part
>> relatively fragile, which is why for the moment I'm still hesitant to
>> move the generic_apic_probe() call down. Plus I don't have easy access
>> to a suitable system to test this case. Thoughts?
>
> Indeed, that seems it could go quite wrong, as apic_x2apic_probe will
> see iommu_intremap == iommu_intremap_full (the default value) and thus
> could choose cluster mode without real interrupt remapping support.
>
> At first sight it would seem possible to move lower down, but as you
> say, this is all quite fragile. It's even made worse because we lack a
> strict ordering discipline or any kind of dependency checking, so even
> if we mess up the order it's likely to go unnoticed unless someone
> tests on an affected system.
>
> While we can try to solve this for the upcoming release, long term we
> need a stricter ordering, either as a comment, or even better enforced
> somehow in code. The x2APIC vs IOMMU ordering has bitten us multiple
> times and we should see about implementing a more robust solution.
So what's your thought then: Make the change (in another patch), or rather
leave the code as is? I'm slightly in favor of making the change seeing
that you agree that the rearrangement looks to be correct.
Jan
On 21.10.2021 11:58, Jan Beulich wrote:
> x2apic_bsp_setup() gets called ahead of iommu_setup(), and since x2APIC
> mode (physical vs clustered) depends on iommu_intremap, that variable
> needs to be set to off as soon as we know we can't / won't enable
> interrupt remapping, i.e. in particular when parsing of the respective
> ACPI tables failed. Move the turning off of iommu_intremap from AMD
> specific code into acpi_iommu_init(), accompanying it by clearing of
> iommu_enable.
>
> Take the opportunity and also fully skip ACPI table parsing logic on
> VT-d when both "iommu=off" and "iommu=no-intremap" are in effect anyway,
> like was already the case for AMD.
>
> The tag below only references the commit uncovering a pre-existing
> anomaly.
>
> Fixes: d8bd82327b0f ("AMD/IOMMU: obtain IVHD type to use earlier")
> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Ouch, forgot to Cc Kevin; now added.
Jan
> ---
> While the change here deals with apic_x2apic_probe() as called from
> x2apic_bsp_setup(), the check_x2apic_preenabled() path looks to be
> similarly affected. That call occurs before acpi_boot_init(), which is
> what calls acpi_iommu_init(). The ordering in setup.c is in part
> relatively fragile, which is why for the moment I'm still hesitant to
> move the generic_apic_probe() call down. Plus I don't have easy access
> to a suitable system to test this case. Thoughts?
> ---
> v2: Treat iommu_enable and iommu_intremap as separate options.
>
> --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> @@ -183,9 +183,6 @@ int __init acpi_ivrs_init(void)
> {
> int rc;
>
> - if ( !iommu_enable && !iommu_intremap )
> - return 0;
> -
> rc = amd_iommu_get_supported_ivhd_type();
> if ( rc < 0 )
> return rc;
> @@ -193,10 +190,7 @@ int __init acpi_ivrs_init(void)
> ivhd_type = rc;
>
> if ( (amd_iommu_detect_acpi() !=0) || (iommu_found() == 0) )
> - {
> - iommu_intremap = iommu_intremap_off;
> return -ENODEV;
> - }
>
> iommu_init_ops = &_iommu_init_ops;
>
> --- a/xen/drivers/passthrough/vtd/dmar.c
> +++ b/xen/drivers/passthrough/vtd/dmar.c
> @@ -777,11 +777,7 @@ static int __init acpi_parse_dmar(struct
> dmar = (struct acpi_table_dmar *)table;
> dmar_flags = dmar->flags;
>
> - if ( !iommu_enable && !iommu_intremap )
> - {
> - ret = -EINVAL;
> - goto out;
> - }
> + ASSERT(iommu_enable || iommu_intremap);
>
> if ( !dmar->width )
> {
> --- a/xen/drivers/passthrough/x86/iommu.c
> +++ b/xen/drivers/passthrough/x86/iommu.c
> @@ -41,6 +41,24 @@ enum iommu_intremap __read_mostly iommu_
> bool __read_mostly iommu_intpost;
> #endif
>
> +void __init acpi_iommu_init(void)
> +{
> + int ret;
> +
> + if ( !iommu_enable && !iommu_intremap )
> + return;
> +
> + ret = acpi_dmar_init();
> + if ( ret == -ENODEV )
> + ret = acpi_ivrs_init();
> +
> + if ( ret )
> + {
> + iommu_enable = false;
> + iommu_intremap = iommu_intremap_off;
> + }
> +}
> +
> int __init iommu_hardware_setup(void)
> {
> struct IO_APIC_route_entry **ioapic_entries = NULL;
> --- a/xen/include/asm-x86/acpi.h
> +++ b/xen/include/asm-x86/acpi.h
> @@ -141,16 +141,10 @@ extern u32 x86_acpiid_to_apicid[];
> extern u32 pmtmr_ioport;
> extern unsigned int pmtmr_width;
>
> +void acpi_iommu_init(void);
> int acpi_dmar_init(void);
> int acpi_ivrs_init(void);
>
> -static inline int acpi_iommu_init(void)
> -{
> - int ret = acpi_dmar_init();
> -
> - return ret == -ENODEV ? acpi_ivrs_init() : ret;
> -}
> -
> void acpi_mmcfg_init(void);
>
> /* Incremented whenever we transition through S3. Value is 1 during boot. */
>
>
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Friday, October 22, 2021 1:59 PM
>
> On 21.10.2021 11:58, Jan Beulich wrote:
> > x2apic_bsp_setup() gets called ahead of iommu_setup(), and since x2APIC
> > mode (physical vs clustered) depends on iommu_intremap, that variable
> > needs to be set to off as soon as we know we can't / won't enable
> > interrupt remapping, i.e. in particular when parsing of the respective
> > ACPI tables failed. Move the turning off of iommu_intremap from AMD
> > specific code into acpi_iommu_init(), accompanying it by clearing of
> > iommu_enable.
> >
> > Take the opportunity and also fully skip ACPI table parsing logic on
> > VT-d when both "iommu=off" and "iommu=no-intremap" are in effect
> anyway,
> > like was already the case for AMD.
> >
> > The tag below only references the commit uncovering a pre-existing
> > anomaly.
> >
> > Fixes: d8bd82327b0f ("AMD/IOMMU: obtain IVHD type to use earlier")
> > Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> > Signed-off-by: Jan Beulich <jbeulich@suse.com>
>
> Ouch, forgot to Cc Kevin; now added.
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
>
> Jan
>
> > ---
> > While the change here deals with apic_x2apic_probe() as called from
> > x2apic_bsp_setup(), the check_x2apic_preenabled() path looks to be
> > similarly affected. That call occurs before acpi_boot_init(), which is
> > what calls acpi_iommu_init(). The ordering in setup.c is in part
> > relatively fragile, which is why for the moment I'm still hesitant to
> > move the generic_apic_probe() call down. Plus I don't have easy access
> > to a suitable system to test this case. Thoughts?
> > ---
> > v2: Treat iommu_enable and iommu_intremap as separate options.
> >
> > --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c
> > +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c
> > @@ -183,9 +183,6 @@ int __init acpi_ivrs_init(void)
> > {
> > int rc;
> >
> > - if ( !iommu_enable && !iommu_intremap )
> > - return 0;
> > -
> > rc = amd_iommu_get_supported_ivhd_type();
> > if ( rc < 0 )
> > return rc;
> > @@ -193,10 +190,7 @@ int __init acpi_ivrs_init(void)
> > ivhd_type = rc;
> >
> > if ( (amd_iommu_detect_acpi() !=0) || (iommu_found() == 0) )
> > - {
> > - iommu_intremap = iommu_intremap_off;
> > return -ENODEV;
> > - }
> >
> > iommu_init_ops = &_iommu_init_ops;
> >
> > --- a/xen/drivers/passthrough/vtd/dmar.c
> > +++ b/xen/drivers/passthrough/vtd/dmar.c
> > @@ -777,11 +777,7 @@ static int __init acpi_parse_dmar(struct
> > dmar = (struct acpi_table_dmar *)table;
> > dmar_flags = dmar->flags;
> >
> > - if ( !iommu_enable && !iommu_intremap )
> > - {
> > - ret = -EINVAL;
> > - goto out;
> > - }
> > + ASSERT(iommu_enable || iommu_intremap);
> >
> > if ( !dmar->width )
> > {
> > --- a/xen/drivers/passthrough/x86/iommu.c
> > +++ b/xen/drivers/passthrough/x86/iommu.c
> > @@ -41,6 +41,24 @@ enum iommu_intremap __read_mostly iommu_
> > bool __read_mostly iommu_intpost;
> > #endif
> >
> > +void __init acpi_iommu_init(void)
> > +{
> > + int ret;
> > +
> > + if ( !iommu_enable && !iommu_intremap )
> > + return;
> > +
> > + ret = acpi_dmar_init();
> > + if ( ret == -ENODEV )
> > + ret = acpi_ivrs_init();
> > +
> > + if ( ret )
> > + {
> > + iommu_enable = false;
> > + iommu_intremap = iommu_intremap_off;
> > + }
> > +}
> > +
> > int __init iommu_hardware_setup(void)
> > {
> > struct IO_APIC_route_entry **ioapic_entries = NULL;
> > --- a/xen/include/asm-x86/acpi.h
> > +++ b/xen/include/asm-x86/acpi.h
> > @@ -141,16 +141,10 @@ extern u32 x86_acpiid_to_apicid[];
> > extern u32 pmtmr_ioport;
> > extern unsigned int pmtmr_width;
> >
> > +void acpi_iommu_init(void);
> > int acpi_dmar_init(void);
> > int acpi_ivrs_init(void);
> >
> > -static inline int acpi_iommu_init(void)
> > -{
> > - int ret = acpi_dmar_init();
> > -
> > - return ret == -ENODEV ? acpi_ivrs_init() : ret;
> > -}
> > -
> > void acpi_mmcfg_init(void);
> >
> > /* Incremented whenever we transition through S3. Value is 1 during boot.
> */
> >
> >
© 2016 - 2026 Red Hat, Inc.