On 2026/1/6 14:12, Zhenzhong Duan wrote:
> When vIOMMU is configured x-flts=on in scalable mode, first stage page table
> is passed to host to construct nested page table for passthrough devices.
>
> We need to check compatibility of some critical IOMMU capabilities between
> vIOMMU and host IOMMU to ensure guest first stage page table could be used by
> host.
>
> For instance, vIOMMU supports first stage 1GB large page mapping, but host does
> not, then this IOMMUFD backed device should fail.
>
> Even of the checks pass, for now we willingly reject the association because
> all the bits are not there yet, it will be relaxed in the end of this series.
>
> Note vIOMMU has exposed IOMMU_HWPT_ALLOC_NEST_PARENT flag to force VFIO core to
a nit:
s/IOMMU_HWPT_ALLOC_NEST_PARENT/VIOMMU_FLAG_WANT_NESTING_PARENT/
> create nesting parent HWPT, if host doesn't support nested translation, the
> creation will fail. So no need to check nested capability here.
>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> Reviewed-by: Eric Auger <eric.auger@redhat.com>
> ---
> MAINTAINERS | 1 +
> hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
> hw/i386/intel_iommu.c | 5 ++---
> hw/i386/intel_iommu_accel.c | 42 +++++++++++++++++++++++++++++++++++++
> hw/i386/Kconfig | 5 +++++
> hw/i386/meson.build | 1 +
> 6 files changed, 79 insertions(+), 3 deletions(-)
> create mode 100644 hw/i386/intel_iommu_accel.h
> create mode 100644 hw/i386/intel_iommu_accel.c
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 3ff0d3a4da..a00539e650 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3931,6 +3931,7 @@ R: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
> S: Supported
> F: hw/i386/intel_iommu.c
> F: hw/i386/intel_iommu_internal.h
> +F: hw/i386/intel_iommu_accel.*
> F: include/hw/i386/intel_iommu.h
> F: tests/functional/x86_64/test_intel_iommu.py
> F: tests/qtest/intel-iommu-test.c
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> new file mode 100644
> index 0000000000..472ae109e2
> --- /dev/null
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -0,0 +1,28 @@
> +/*
> + * Intel IOMMU acceleration with nested translation
> + *
> + * Copyright (C) 2025 Intel Corporation.
> + *
> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
> +#define HW_I386_INTEL_IOMMU_ACCEL_H
> +#include CONFIG_DEVICES
> +
> +#ifdef CONFIG_VTD_ACCEL
> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> + Error **errp);
> +#else
> +static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> + HostIOMMUDevice *hiod,
> + Error **errp)
> +{
> + error_setg(errp, "host IOMMU cannot be checked!");
> + error_append_hint(errp, "CONFIG_VTD_ACCEL is not enabled");
> + return false;
> +}
> +#endif
> +#endif
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 3a3725e489..b11798d4b7 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -26,6 +26,7 @@
> #include "hw/core/sysbus.h"
> #include "hw/core/iommu.h"
> #include "intel_iommu_internal.h"
> +#include "intel_iommu_accel.h"
> #include "hw/pci/pci.h"
> #include "hw/pci/pci_bus.h"
> #include "hw/core/qdev-properties.h"
> @@ -4595,9 +4596,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> return true;
> }
>
> - error_setg(errp,
> - "host device is uncompatible with first stage translation");
> - return false;
> + return vtd_check_hiod_accel(s, hiod, errp);
> }
>
> static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> new file mode 100644
> index 0000000000..6846c6ec4d
> --- /dev/null
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -0,0 +1,42 @@
> +/*
> + * Intel IOMMU acceleration with nested translation
> + *
> + * Copyright (C) 2025 Intel Corporation.
> + *
> + * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include "qemu/osdep.h"
> +#include "system/iommufd.h"
> +#include "intel_iommu_internal.h"
> +#include "intel_iommu_accel.h"
> +
> +bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
> + Error **errp)
> +{
> + struct HostIOMMUDeviceCaps *caps = &hiod->caps;
> + struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
> +
> + if (!object_dynamic_cast(OBJECT(hiod), TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> + error_setg(errp, "Need IOMMUFD backend when x-flts=on");
> + return false;
> + }
> +
> + if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
> + error_setg(errp, "Incompatible host platform IOMMU type %d",
> + caps->type);
> + return false;
> + }
> +
> + if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
> + error_setg(errp,
> + "First stage 1GB large page is unsupported by host IOMMU");
> + return false;
> + }
> +
> + error_setg(errp,
> + "host IOMMU is incompatible with guest first stage translation");
> + return false;
> +}
> diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
> index 6a0ab54bea..12473acaa7 100644
> --- a/hw/i386/Kconfig
> +++ b/hw/i386/Kconfig
> @@ -150,8 +150,13 @@ config X86_IOMMU
>
> config VTD
> bool
> + imply VTD_ACCEL
> select X86_IOMMU
>
> +config VTD_ACCEL
> + bool
> + depends on VTD && IOMMUFD
> +
> config AMD_IOMMU
> bool
> select X86_IOMMU
> diff --git a/hw/i386/meson.build b/hw/i386/meson.build
> index 436b3ce52d..63ae57baa5 100644
> --- a/hw/i386/meson.build
> +++ b/hw/i386/meson.build
> @@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
> i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
> i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
> i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
> +i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true: files('intel_iommu_accel.c'))
> i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
> if_false: files('sgx-stub.c'))
>