When vIOMMU is configured x-flts=on in scalable mode, first stage page table
is passed to host to construct nested page table for passthrough devices.
We need to check compatibility of some critical IOMMU capabilities between
vIOMMU and host IOMMU to ensure guest first stage page table could be used by
host.
For instance, vIOMMU supports first stage 1GB large page mapping, but host does
not, then this IOMMUFD backed device should fail.
Even of the checks pass, for now we willingly reject the association because
all the bits are not there yet, it will be relaxed in the end of this series.
Note vIOMMU has exposed IOMMU_HWPT_ALLOC_NEST_PARENT flag to force VFIO core to
create nesting parent HWPT, if host doesn't support nested translation, the
creation will fail. So no need to check nested capability here.
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
MAINTAINERS | 1 +
hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
hw/i386/intel_iommu.c | 5 ++---
hw/i386/intel_iommu_accel.c | 42 +++++++++++++++++++++++++++++++++++++
hw/i386/Kconfig | 5 +++++
hw/i386/meson.build | 1 +
6 files changed, 79 insertions(+), 3 deletions(-)
create mode 100644 hw/i386/intel_iommu_accel.h
create mode 100644 hw/i386/intel_iommu_accel.c
diff --git a/MAINTAINERS b/MAINTAINERS
index f4a30c126b..bc1d2b6261 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3929,6 +3929,7 @@ R: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
S: Supported
F: hw/i386/intel_iommu.c
F: hw/i386/intel_iommu_internal.h
+F: hw/i386/intel_iommu_accel.*
F: include/hw/i386/intel_iommu.h
F: tests/functional/x86_64/test_intel_iommu.py
F: tests/qtest/intel-iommu-test.c
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
new file mode 100644
index 0000000000..c5274e342c
--- /dev/null
+++ b/hw/i386/intel_iommu_accel.h
@@ -0,0 +1,28 @@
+/*
+ * Intel IOMMU acceleration with nested translation
+ *
+ * Copyright (C) 2025 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
+#define HW_I386_INTEL_IOMMU_ACCEL_H
+#include CONFIG_DEVICES
+
+#ifdef CONFIG_VTD_ACCEL
+bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+ Error **errp);
+#else
+static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
+ HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ error_setg(errp,
+ "host IOMMU is incompatible with guest first stage translation");
+ return false;
+}
+#endif
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3095d78321..d3c8a75878 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -26,6 +26,7 @@
#include "hw/sysbus.h"
#include "hw/iommu.h"
#include "intel_iommu_internal.h"
+#include "intel_iommu_accel.h"
#include "hw/pci/pci.h"
#include "hw/pci/pci_bus.h"
#include "hw/qdev-properties.h"
@@ -4596,9 +4597,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
return true;
}
- error_setg(errp,
- "host device is uncompatible with first stage translation");
- return false;
+ return vtd_check_hiod_accel(s, hiod, errp);
}
static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
new file mode 100644
index 0000000000..6846c6ec4d
--- /dev/null
+++ b/hw/i386/intel_iommu_accel.c
@@ -0,0 +1,42 @@
+/*
+ * Intel IOMMU acceleration with nested translation
+ *
+ * Copyright (C) 2025 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/iommufd.h"
+#include "intel_iommu_internal.h"
+#include "intel_iommu_accel.h"
+
+bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+ Error **errp)
+{
+ struct HostIOMMUDeviceCaps *caps = &hiod->caps;
+ struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
+
+ if (!object_dynamic_cast(OBJECT(hiod), TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+ error_setg(errp, "Need IOMMUFD backend when x-flts=on");
+ return false;
+ }
+
+ if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
+ error_setg(errp, "Incompatible host platform IOMMU type %d",
+ caps->type);
+ return false;
+ }
+
+ if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
+ error_setg(errp,
+ "First stage 1GB large page is unsupported by host IOMMU");
+ return false;
+ }
+
+ error_setg(errp,
+ "host IOMMU is incompatible with guest first stage translation");
+ return false;
+}
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 6a0ab54bea..12473acaa7 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -150,8 +150,13 @@ config X86_IOMMU
config VTD
bool
+ imply VTD_ACCEL
select X86_IOMMU
+config VTD_ACCEL
+ bool
+ depends on VTD && IOMMUFD
+
config AMD_IOMMU
bool
select X86_IOMMU
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 436b3ce52d..63ae57baa5 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
+i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true: files('intel_iommu_accel.c'))
i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
if_false: files('sgx-stub.c'))
--
2.47.1
Hi Zhenzhong,
On 11/17/25 10:37 AM, Zhenzhong Duan wrote:
> When vIOMMU is configured x-flts=on in scalable mode, first stage page table
> is passed to host to construct nested page table for passthrough devices.
>
> We need to check compatibility of some critical IOMMU capabilities between
> vIOMMU and host IOMMU to ensure guest first stage page table could be used by
> host.
>
> For instance, vIOMMU supports first stage 1GB large page mapping, but host does
> not, then this IOMMUFD backed device should fail.
>
> Even of the checks pass, for now we willingly reject the association because
> all the bits are not there yet, it will be relaxed in the end of this series.
>
> Note vIOMMU has exposed IOMMU_HWPT_ALLOC_NEST_PARENT flag to force VFIO core to
> create nesting parent HWPT, if host doesn't support nested translation, the
> creation will fail. So no need to check nested capability here.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> MAINTAINERS | 1 +
> hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
> hw/i386/intel_iommu.c | 5 ++---
> hw/i386/intel_iommu_accel.c | 42 +++++++++++++++++++++++++++++++++++++
> hw/i386/Kconfig | 5 +++++
> hw/i386/meson.build | 1 +
> 6 files changed, 79 insertions(+), 3 deletions(-)
> create mode 100644 hw/i386/intel_iommu_accel.h
> create mode 100644 hw/i386/intel_iommu_accel.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index f4a30c126b..bc1d2b6261 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3929,6 +3929,7 @@ R: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
> S: Supported
> F: hw/i386/intel_iommu.c
> F: hw/i386/intel_iommu_internal.h
> +F: hw/i386/intel_iommu_accel.*
> F: include/hw/i386/intel_iommu.h
> F: tests/functional/x86_64/test_intel_iommu.py
> F: tests/qtest/intel-iommu-test.c
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> new file mode 100644
> index 0000000000..c5274e342c
> --- /dev/null
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -0,0 +1,28 @@
> +/*
> + * Intel IOMMU acceleration with nested translation
> + *
> + * Copyright (C) 2025 Intel Corporation.
> + *
> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
> +#define HW_I386_INTEL_IOMMU_ACCEL_H
> +#include CONFIG_DEVICES
> +
> +#ifdef CONFIG_VTD_ACCEL
> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> + Error **errp);
> +#else
> +static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> + HostIOMMUDevice *hiod,
> + Error **errp)
> +{
> + error_setg(errp,
> + "host IOMMU is incompatible with guest first stage translation");
I would rather change the error msg to
host IOMMU cannot be checked!
+ append a hint through error_append_hint,
CONFIG_VTD_ACCEL is not enabled or smthg alike
> + return false;
> +}
> +#endif
> +#endif
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 3095d78321..d3c8a75878 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -26,6 +26,7 @@
> #include "hw/sysbus.h"
> #include "hw/iommu.h"
> #include "intel_iommu_internal.h"
> +#include "intel_iommu_accel.h"
> #include "hw/pci/pci.h"
> #include "hw/pci/pci_bus.h"
> #include "hw/qdev-properties.h"
> @@ -4596,9 +4597,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> return true;
> }
>
> - error_setg(errp,
> - "host device is uncompatible with first stage translation");
> - return false;
> + return vtd_check_hiod_accel(s, hiod, errp);
> }
>
> static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> new file mode 100644
> index 0000000000..6846c6ec4d
> --- /dev/null
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -0,0 +1,42 @@
> +/*
> + * Intel IOMMU acceleration with nested translation
> + *
> + * Copyright (C) 2025 Intel Corporation.
> + *
> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "system/iommufd.h"
> +#include "intel_iommu_internal.h"
> +#include "intel_iommu_accel.h"
> +
> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> + Error **errp)
> +{
> + struct HostIOMMUDeviceCaps *caps = &hiod->caps;
> + struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
> +
> + if (!object_dynamic_cast(OBJECT(hiod), TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> + error_setg(errp, "Need IOMMUFD backend when x-flts=on");
> + return false;
> + }
> +
> + if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
> + error_setg(errp, "Incompatible host platform IOMMU type %d",
> + caps->type);
> + return false;
> + }
> +
> + if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
> + error_setg(errp,
> + "First stage 1GB large page is unsupported by host IOMMU");
> + return false;
> + }
> +
> + error_setg(errp,
> + "host IOMMU is incompatible with guest first stage translation");
> + return false;
> +}
> diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
> index 6a0ab54bea..12473acaa7 100644
> --- a/hw/i386/Kconfig
> +++ b/hw/i386/Kconfig
> @@ -150,8 +150,13 @@ config X86_IOMMU
>
> config VTD
> bool
> + imply VTD_ACCEL
> select X86_IOMMU
>
> +config VTD_ACCEL
> + bool
> + depends on VTD && IOMMUFD
> +
> config AMD_IOMMU
> bool
> select X86_IOMMU
> diff --git a/hw/i386/meson.build b/hw/i386/meson.build
> index 436b3ce52d..63ae57baa5 100644
> --- a/hw/i386/meson.build
> +++ b/hw/i386/meson.build
> @@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
> i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
> i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
> i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
> +i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true: files('intel_iommu_accel.c'))
> i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
> if_false: files('sgx-stub.c'))
>
wrt comments made by Cédric in
https://lore.kernel.org/all/IA3PR11MB9136B13C0C48EF293D3B599D92FAA@IA3PR11MB9136.namprd11.prod.outlook.com/
I see you kept the original approach. I have no strong opinion on that.
I let Cédric's comment if he strongly disagrees.
With my comment taken into account feel free to grab my
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Thanks
Eric
Hi Eric,
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v8 09/23] intel_iommu_accel: Check for compatibility
>with IOMMUFD backed device when x-flts=on
>
>Hi Zhenzhong,
>On 11/17/25 10:37 AM, Zhenzhong Duan wrote:
>> When vIOMMU is configured x-flts=on in scalable mode, first stage page
>table
>> is passed to host to construct nested page table for passthrough devices.
>>
>> We need to check compatibility of some critical IOMMU capabilities
>between
>> vIOMMU and host IOMMU to ensure guest first stage page table could be
>used by
>> host.
>>
>> For instance, vIOMMU supports first stage 1GB large page mapping, but
>host does
>> not, then this IOMMUFD backed device should fail.
>>
>> Even of the checks pass, for now we willingly reject the association because
>> all the bits are not there yet, it will be relaxed in the end of this series.
>>
>> Note vIOMMU has exposed IOMMU_HWPT_ALLOC_NEST_PARENT flag to
>force VFIO core to
>> create nesting parent HWPT, if host doesn't support nested translation, the
>> creation will fail. So no need to check nested capability here.
>>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> MAINTAINERS | 1 +
>> hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
>> hw/i386/intel_iommu.c | 5 ++---
>> hw/i386/intel_iommu_accel.c | 42
>+++++++++++++++++++++++++++++++++++++
>> hw/i386/Kconfig | 5 +++++
>> hw/i386/meson.build | 1 +
>> 6 files changed, 79 insertions(+), 3 deletions(-)
>> create mode 100644 hw/i386/intel_iommu_accel.h
>> create mode 100644 hw/i386/intel_iommu_accel.c
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index f4a30c126b..bc1d2b6261 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -3929,6 +3929,7 @@ R: Clément Mathieu--Drif
><clement.mathieu--drif@eviden.com>
>> S: Supported
>> F: hw/i386/intel_iommu.c
>> F: hw/i386/intel_iommu_internal.h
>> +F: hw/i386/intel_iommu_accel.*
>> F: include/hw/i386/intel_iommu.h
>> F: tests/functional/x86_64/test_intel_iommu.py
>> F: tests/qtest/intel-iommu-test.c
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> new file mode 100644
>> index 0000000000..c5274e342c
>> --- /dev/null
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -0,0 +1,28 @@
>> +/*
>> + * Intel IOMMU acceleration with nested translation
>> + *
>> + * Copyright (C) 2025 Intel Corporation.
>> + *
>> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
>> +#define HW_I386_INTEL_IOMMU_ACCEL_H
>> +#include CONFIG_DEVICES
Here to address Cédric's suggestion.
>> +
>> +#ifdef CONFIG_VTD_ACCEL
>> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice
>*hiod,
>> + Error **errp);
>> +#else
>> +static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>> + HostIOMMUDevice
>*hiod,
>> + Error **errp)
>> +{
>> + error_setg(errp,
>> + "host IOMMU is incompatible with guest first stage
>translation");
>I would rather change the error msg to
>
>host IOMMU cannot be checked!
>+ append a hint through error_append_hint,
>CONFIG_VTD_ACCEL is not enabled or smthg alike
Will do.
>
>> + return false;
>> +}
>> +#endif
>> +#endif
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 3095d78321..d3c8a75878 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -26,6 +26,7 @@
>> #include "hw/sysbus.h"
>> #include "hw/iommu.h"
>> #include "intel_iommu_internal.h"
>> +#include "intel_iommu_accel.h"
>> #include "hw/pci/pci.h"
>> #include "hw/pci/pci_bus.h"
>> #include "hw/qdev-properties.h"
>> @@ -4596,9 +4597,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s,
>HostIOMMUDevice *hiod,
>> return true;
>> }
>>
>> - error_setg(errp,
>> - "host device is uncompatible with first stage
>translation");
>> - return false;
>> + return vtd_check_hiod_accel(s, hiod, errp);
>> }
>>
>> static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>devfn,
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> new file mode 100644
>> index 0000000000..6846c6ec4d
>> --- /dev/null
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -0,0 +1,42 @@
>> +/*
>> + * Intel IOMMU acceleration with nested translation
>> + *
>> + * Copyright (C) 2025 Intel Corporation.
>> + *
>> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include "system/iommufd.h"
>> +#include "intel_iommu_internal.h"
>> +#include "intel_iommu_accel.h"
>> +
>> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice
>*hiod,
>> + Error **errp)
>> +{
>> + struct HostIOMMUDeviceCaps *caps = &hiod->caps;
>> + struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
>> +
>> + if (!object_dynamic_cast(OBJECT(hiod),
>TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>> + error_setg(errp, "Need IOMMUFD backend when x-flts=on");
>> + return false;
>> + }
>> +
>> + if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
>> + error_setg(errp, "Incompatible host platform IOMMU type %d",
>> + caps->type);
>> + return false;
>> + }
>> +
>> + if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
>> + error_setg(errp,
>> + "First stage 1GB large page is unsupported by host
>IOMMU");
>> + return false;
>> + }
>> +
>> + error_setg(errp,
>> + "host IOMMU is incompatible with guest first stage
>translation");
>> + return false;
>> +}
>> diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
>> index 6a0ab54bea..12473acaa7 100644
>> --- a/hw/i386/Kconfig
>> +++ b/hw/i386/Kconfig
>> @@ -150,8 +150,13 @@ config X86_IOMMU
>>
>> config VTD
>> bool
>> + imply VTD_ACCEL
>> select X86_IOMMU
>>
>> +config VTD_ACCEL
>> + bool
>> + depends on VTD && IOMMUFD
>> +
>> config AMD_IOMMU
>> bool
>> select X86_IOMMU
>> diff --git a/hw/i386/meson.build b/hw/i386/meson.build
>> index 436b3ce52d..63ae57baa5 100644
>> --- a/hw/i386/meson.build
>> +++ b/hw/i386/meson.build
>> @@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true:
>files('pc_q35.c'))
>> i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
>> i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
>> i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
>> +i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true:
>files('intel_iommu_accel.c'))
>> i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
>> if_false: files('sgx-stub.c'))
>>
>wrt comments made by Cédric in
>https://lore.kernel.org/all/IA3PR11MB9136B13C0C48EF293D3B599D92FAA@
>IA3PR11MB9136.namprd11.prod.outlook.com/
>I see you kept the original approach. I have no strong opinion on that.
>I let Cédric's comment if he strongly disagrees.
Guess you mean adding '#include CONFIG_DEVICES'?
I added it in hw/i386/intel_iommu_accel.h, see above. There is reference to
CONFIG_VTD_ACCEL in intel_iommu_accel.h, I thought it's better to add it
there instead of intel_iommu_accel.c
Thanks
Zhenzhong
>
>With my comment taken into account feel free to grab my
>
>Reviewed-by: Eric Auger <eric.auger@redhat.com>
>
>Thanks
>
>Eric
On 12/11/25 7:49 AM, Duan, Zhenzhong wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v8 09/23] intel_iommu_accel: Check for compatibility
>> with IOMMUFD backed device when x-flts=on
>>
>> Hi Zhenzhong,
>> On 11/17/25 10:37 AM, Zhenzhong Duan wrote:
>>> When vIOMMU is configured x-flts=on in scalable mode, first stage page
>> table
>>> is passed to host to construct nested page table for passthrough devices.
>>>
>>> We need to check compatibility of some critical IOMMU capabilities
>> between
>>> vIOMMU and host IOMMU to ensure guest first stage page table could be
>> used by
>>> host.
>>>
>>> For instance, vIOMMU supports first stage 1GB large page mapping, but
>> host does
>>> not, then this IOMMUFD backed device should fail.
>>>
>>> Even of the checks pass, for now we willingly reject the association because
>>> all the bits are not there yet, it will be relaxed in the end of this series.
>>>
>>> Note vIOMMU has exposed IOMMU_HWPT_ALLOC_NEST_PARENT flag to
>> force VFIO core to
>>> create nesting parent HWPT, if host doesn't support nested translation, the
>>> creation will fail. So no need to check nested capability here.
>>>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>> MAINTAINERS | 1 +
>>> hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
>>> hw/i386/intel_iommu.c | 5 ++---
>>> hw/i386/intel_iommu_accel.c | 42
>> +++++++++++++++++++++++++++++++++++++
>>> hw/i386/Kconfig | 5 +++++
>>> hw/i386/meson.build | 1 +
>>> 6 files changed, 79 insertions(+), 3 deletions(-)
>>> create mode 100644 hw/i386/intel_iommu_accel.h
>>> create mode 100644 hw/i386/intel_iommu_accel.c
>>>
>>> diff --git a/MAINTAINERS b/MAINTAINERS
>>> index f4a30c126b..bc1d2b6261 100644
>>> --- a/MAINTAINERS
>>> +++ b/MAINTAINERS
>>> @@ -3929,6 +3929,7 @@ R: Clément Mathieu--Drif
>> <clement.mathieu--drif@eviden.com>
>>> S: Supported
>>> F: hw/i386/intel_iommu.c
>>> F: hw/i386/intel_iommu_internal.h
>>> +F: hw/i386/intel_iommu_accel.*
>>> F: include/hw/i386/intel_iommu.h
>>> F: tests/functional/x86_64/test_intel_iommu.py
>>> F: tests/qtest/intel-iommu-test.c
>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>> new file mode 100644
>>> index 0000000000..c5274e342c
>>> --- /dev/null
>>> +++ b/hw/i386/intel_iommu_accel.h
>>> @@ -0,0 +1,28 @@
>>> +/*
>>> + * Intel IOMMU acceleration with nested translation
>>> + *
>>> + * Copyright (C) 2025 Intel Corporation.
>>> + *
>>> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
>>> +#define HW_I386_INTEL_IOMMU_ACCEL_H
>>> +#include CONFIG_DEVICES
> Here to address Cédric's suggestion.
>
>>> +
>>> +#ifdef CONFIG_VTD_ACCEL
>>> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice
>> *hiod,
>>> + Error **errp);
>>> +#else
>>> +static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>> + HostIOMMUDevice
>> *hiod,
>>> + Error **errp)
>>> +{
>>> + error_setg(errp,
>>> + "host IOMMU is incompatible with guest first stage
>> translation");
>> I would rather change the error msg to
>>
>> host IOMMU cannot be checked!
>> + append a hint through error_append_hint,
>> CONFIG_VTD_ACCEL is not enabled or smthg alike
> Will do.
>
>>> + return false;
>>> +}
>>> +#endif
>>> +#endif
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 3095d78321..d3c8a75878 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -26,6 +26,7 @@
>>> #include "hw/sysbus.h"
>>> #include "hw/iommu.h"
>>> #include "intel_iommu_internal.h"
>>> +#include "intel_iommu_accel.h"
>>> #include "hw/pci/pci.h"
>>> #include "hw/pci/pci_bus.h"
>>> #include "hw/qdev-properties.h"
>>> @@ -4596,9 +4597,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s,
>> HostIOMMUDevice *hiod,
>>> return true;
>>> }
>>>
>>> - error_setg(errp,
>>> - "host device is uncompatible with first stage
>> translation");
>>> - return false;
>>> + return vtd_check_hiod_accel(s, hiod, errp);
>>> }
>>>
>>> static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int
>> devfn,
>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>> new file mode 100644
>>> index 0000000000..6846c6ec4d
>>> --- /dev/null
>>> +++ b/hw/i386/intel_iommu_accel.c
>>> @@ -0,0 +1,42 @@
>>> +/*
>>> + * Intel IOMMU acceleration with nested translation
>>> + *
>>> + * Copyright (C) 2025 Intel Corporation.
>>> + *
>>> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "system/iommufd.h"
>>> +#include "intel_iommu_internal.h"
>>> +#include "intel_iommu_accel.h"
>>> +
>>> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice
>> *hiod,
>>> + Error **errp)
>>> +{
>>> + struct HostIOMMUDeviceCaps *caps = &hiod->caps;
>>> + struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
>>> +
>>> + if (!object_dynamic_cast(OBJECT(hiod),
>> TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>>> + error_setg(errp, "Need IOMMUFD backend when x-flts=on");
>>> + return false;
>>> + }
>>> +
>>> + if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
>>> + error_setg(errp, "Incompatible host platform IOMMU type %d",
>>> + caps->type);
>>> + return false;
>>> + }
>>> +
>>> + if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
>>> + error_setg(errp,
>>> + "First stage 1GB large page is unsupported by host
>> IOMMU");
>>> + return false;
>>> + }
>>> +
>>> + error_setg(errp,
>>> + "host IOMMU is incompatible with guest first stage
>> translation");
>>> + return false;
>>> +}
>>> diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
>>> index 6a0ab54bea..12473acaa7 100644
>>> --- a/hw/i386/Kconfig
>>> +++ b/hw/i386/Kconfig
>>> @@ -150,8 +150,13 @@ config X86_IOMMU
>>>
>>> config VTD
>>> bool
>>> + imply VTD_ACCEL
>>> select X86_IOMMU
>>>
>>> +config VTD_ACCEL
>>> + bool
>>> + depends on VTD && IOMMUFD
>>> +
>>> config AMD_IOMMU
>>> bool
>>> select X86_IOMMU
>>> diff --git a/hw/i386/meson.build b/hw/i386/meson.build
>>> index 436b3ce52d..63ae57baa5 100644
>>> --- a/hw/i386/meson.build
>>> +++ b/hw/i386/meson.build
>>> @@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true:
>> files('pc_q35.c'))
>>> i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
>>> i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
>>> i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
>>> +i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true:
>> files('intel_iommu_accel.c'))
>>> i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
>>> if_false: files('sgx-stub.c'))
>>>
>> wrt comments made by Cédric in
>> https://lore.kernel.org/all/IA3PR11MB9136B13C0C48EF293D3B599D92FAA@
>> IA3PR11MB9136.namprd11.prod.outlook.com/
>> I see you kept the original approach. I have no strong opinion on that.
>> I let Cédric's comment if he strongly disagrees.
> Guess you mean adding '#include CONFIG_DEVICES'?
> I added it in hw/i386/intel_iommu_accel.h, see above. There is reference to
> CONFIG_VTD_ACCEL in intel_iommu_accel.h, I thought it's better to add it
> there instead of intel_iommu_accel.c
No I rather meant Cédric's comment on extending HostIOMMUDeviceClass
instead of using iommufd directly Eric
>
> Thanks
> Zhenzhong
>
>> With my comment taken into account feel free to grab my
>>
>> Reviewed-by: Eric Auger <eric.auger@redhat.com>
>>
>> Thanks
>>
>> Eric
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>>>> diff --git a/hw/i386/meson.build b/hw/i386/meson.build
>>>> index 436b3ce52d..63ae57baa5 100644
>>>> --- a/hw/i386/meson.build
>>>> +++ b/hw/i386/meson.build
>>>> @@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true:
>>> files('pc_q35.c'))
>>>> i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
>>>> i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
>>>> i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
>>>> +i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true:
>>> files('intel_iommu_accel.c'))
>>>> i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
>>>> if_false: files('sgx-stub.c'))
>>>>
>>> wrt comments made by Cédric in
>>>
>https://lore.kernel.org/all/IA3PR11MB9136B13C0C48EF293D3B599D92FAA@
>>> IA3PR11MB9136.namprd11.prod.outlook.com/
>>> I see you kept the original approach. I have no strong opinion on that.
>>> I let Cédric's comment if he strongly disagrees.
>> Guess you mean adding '#include CONFIG_DEVICES'?
>> I added it in hw/i386/intel_iommu_accel.h, see above. There is reference to
>> CONFIG_VTD_ACCEL in intel_iommu_accel.h, I thought it's better to add it
>> there instead of intel_iommu_accel.c
>
>No I rather meant Cédric's comment on extending HostIOMMUDeviceClass
>instead of using iommufd directly Eric
I had digged Cédric's suggestion, currently IOMMUFD is the only fresh new
accel related IOMMU backend. I feel hard to foresee what a new accel
backend e.g, IOMMUFD_v2 will be like, what a common abstract to make
between them, just like the big difference between legacy VFIO and IOMMUFD
backend. I think we can add the abstract layer when IOMMUFD_v2 comes in
the future, if Cédric agrees.
Thanks
Zhenzhong
© 2016 - 2025 Red Hat, Inc.