[PULL 11/41] intel_iommu_accel: Check for compatibility with IOMMUFD backed device when x-flts=on

Cédric Le Goater posted 41 patches 3 weeks, 6 days ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Jason Wang <jasowang@redhat.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Alexey Kardashevskiy <aik@ozlabs.ru>, John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, "Cédric Le Goater" <clg@redhat.com>, Alex Williamson <alex@shazbot.org>, Magnus Kulke <magnus.kulke@linux.microsoft.com>, Wei Liu <wei.liu@kernel.org>, Cornelia Huck <cohuck@redhat.com>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>
There is a newer version of this series
[PULL 11/41] intel_iommu_accel: Check for compatibility with IOMMUFD backed device when x-flts=on
Posted by Cédric Le Goater 3 weeks, 6 days ago
From: Zhenzhong Duan <zhenzhong.duan@intel.com>

When vIOMMU is configured x-flts=on in scalable mode, first stage page table
is passed to host to construct nested page table for passthrough devices.

We need to check compatibility of some critical IOMMU capabilities between
vIOMMU and host IOMMU to ensure guest first stage page table could be used by
host.

For instance, vIOMMU supports first stage 1GB large page mapping, but host does
not, then this IOMMUFD backed device should fail.

Even of the checks pass, for now we willingly reject the association because
all the bits are not there yet, it will be relaxed in the end of this series.

Note vIOMMU has exposed VIOMMU_FLAG_WANT_NESTING_PARENT flag to force
VFIO core to create nesting parent HWPT, if host doesn't support nested
translation, the creation will fail. So no need to check nested
capability here.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20260106061304.314546-10-zhenzhong.duan@intel.com
[ clg: - hw/i386/intel_iommu_accel.[hc]: Changed Copyright date 2025 -> 2026
       - in commit log :
       	 IOMMU_HWPT_ALLOC_NEST_PARENT -> VIOMMU_FLAG_WANT_NESTING_PARENT  ]
Signed-off-by: Cédric Le Goater <clg@redhat.com>
---
 MAINTAINERS                 |  1 +
 hw/i386/intel_iommu_accel.h | 28 +++++++++++++++++++++++++
 hw/i386/intel_iommu.c       |  5 ++---
 hw/i386/intel_iommu_accel.c | 42 +++++++++++++++++++++++++++++++++++++
 hw/i386/Kconfig             |  5 +++++
 hw/i386/meson.build         |  1 +
 6 files changed, 79 insertions(+), 3 deletions(-)
 create mode 100644 hw/i386/intel_iommu_accel.h
 create mode 100644 hw/i386/intel_iommu_accel.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 620b184aa5f91f8f86879cf22eabe720a9fa33a3..4ddbfba9f0118190b7dd3a3d6400e34774d5e17a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3961,6 +3961,7 @@ R: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>
 S: Supported
 F: hw/i386/intel_iommu.c
 F: hw/i386/intel_iommu_internal.h
+F: hw/i386/intel_iommu_accel.*
 F: include/hw/i386/intel_iommu.h
 F: tests/functional/x86_64/test_intel_iommu.py
 F: tests/qtest/intel-iommu-test.c
diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
new file mode 100644
index 0000000000000000000000000000000000000000..79117b25a030a3d22d75b635725a6f78a21ec407
--- /dev/null
+++ b/hw/i386/intel_iommu_accel.h
@@ -0,0 +1,28 @@
+/*
+ * Intel IOMMU acceleration with nested translation
+ *
+ * Copyright (C) 2026 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_I386_INTEL_IOMMU_ACCEL_H
+#define HW_I386_INTEL_IOMMU_ACCEL_H
+#include CONFIG_DEVICES
+
+#ifdef CONFIG_VTD_ACCEL
+bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+                          Error **errp);
+#else
+static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
+                                        HostIOMMUDevice *hiod,
+                                        Error **errp)
+{
+    error_setg(errp, "host IOMMU cannot be checked!");
+    error_append_hint(errp, "CONFIG_VTD_ACCEL is not enabled");
+    return false;
+}
+#endif
+#endif
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 3a3725e489595121b1fdb0f38a1e85fa7f64c1f6..b11798d4b75b7fb6961a1b2bde8851667b0d07db 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -26,6 +26,7 @@
 #include "hw/core/sysbus.h"
 #include "hw/core/iommu.h"
 #include "intel_iommu_internal.h"
+#include "intel_iommu_accel.h"
 #include "hw/pci/pci.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/core/qdev-properties.h"
@@ -4595,9 +4596,7 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod,
         return true;
     }
 
-    error_setg(errp,
-               "host device is uncompatible with first stage translation");
-    return false;
+    return vtd_check_hiod_accel(s, hiod, errp);
 }
 
 static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
new file mode 100644
index 0000000000000000000000000000000000000000..2942eff100b9e7871326d27b58b71517ff705271
--- /dev/null
+++ b/hw/i386/intel_iommu_accel.c
@@ -0,0 +1,42 @@
+/*
+ * Intel IOMMU acceleration with nested translation
+ *
+ * Copyright (C) 2026 Intel Corporation.
+ *
+ * Authors: Zhenzhong Duan <zhenzhong.duan@intel.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "system/iommufd.h"
+#include "intel_iommu_internal.h"
+#include "intel_iommu_accel.h"
+
+bool vtd_check_hiod_accel(IntelIOMMUState *s, HostIOMMUDevice *hiod,
+                          Error **errp)
+{
+    struct HostIOMMUDeviceCaps *caps = &hiod->caps;
+    struct iommu_hw_info_vtd *vtd = &caps->vendor_caps.vtd;
+
+    if (!object_dynamic_cast(OBJECT(hiod), TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+        error_setg(errp, "Need IOMMUFD backend when x-flts=on");
+        return false;
+    }
+
+    if (caps->type != IOMMU_HW_INFO_TYPE_INTEL_VTD) {
+        error_setg(errp, "Incompatible host platform IOMMU type %d",
+                   caps->type);
+        return false;
+    }
+
+    if (s->fs1gp && !(vtd->cap_reg & VTD_CAP_FS1GP)) {
+        error_setg(errp,
+                   "First stage 1GB large page is unsupported by host IOMMU");
+        return false;
+    }
+
+    error_setg(errp,
+               "host IOMMU is incompatible with guest first stage translation");
+    return false;
+}
diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig
index 6a0ab54bea4ab8599965a7b4dec60194f85877cb..12473acaa7344c36f6bf20eadd058d414dc6f945 100644
--- a/hw/i386/Kconfig
+++ b/hw/i386/Kconfig
@@ -150,8 +150,13 @@ config X86_IOMMU
 
 config VTD
     bool
+    imply VTD_ACCEL
     select X86_IOMMU
 
+config VTD_ACCEL
+    bool
+    depends on VTD && IOMMUFD
+
 config AMD_IOMMU
     bool
     select X86_IOMMU
diff --git a/hw/i386/meson.build b/hw/i386/meson.build
index 436b3ce52d6480457a84c796ee5ed79e3a0bec36..63ae57baa511e6e29b0e6a276352cd61df6602a6 100644
--- a/hw/i386/meson.build
+++ b/hw/i386/meson.build
@@ -21,6 +21,7 @@ i386_ss.add(when: 'CONFIG_Q35', if_true: files('pc_q35.c'))
 i386_ss.add(when: 'CONFIG_VMMOUSE', if_true: files('vmmouse.c'))
 i386_ss.add(when: 'CONFIG_VMPORT', if_true: files('vmport.c'))
 i386_ss.add(when: 'CONFIG_VTD', if_true: files('intel_iommu.c'))
+i386_ss.add(when: 'CONFIG_VTD_ACCEL', if_true: files('intel_iommu_accel.c'))
 i386_ss.add(when: 'CONFIG_SGX', if_true: files('sgx-epc.c','sgx.c'),
                                 if_false: files('sgx-stub.c'))
 
-- 
2.52.0