[PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO device

Zhenzhong Duan posted 2 patches 1 month, 3 weeks ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Yi Liu <yi.l.liu@intel.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>
There is a newer version of this series
[PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO device
Posted by Zhenzhong Duan 1 month, 3 weeks ago
In early days, we have different tricks to ensure caching-mode=on with VFIO
device:

28cf553afe ("intel_iommu: Sanity check vfio-pci config on machine init done")
c6cbc29d36 ("pc/q35: Disallow vfio-pci hotplug without VT-d caching mode")
b8d78277c0 ("intel-iommu: fail MAP notifier without caching mode")

Because without caching mode, MAP notifier won't work correctly since guest
won't send IOTLB update event when it establishes new mappings in the I/O page
tables.

Now with host IOMMU device interface between VFIO and vIOMMU, we can simplify
it with a small check in set_iommu_device(). This also works for future VDPA
implementation which may also need caching mode on.

For coldplug VFIO device:

qemu-system-x86_64: -device vfio-pci,host=0000:3b:00.0,id=hostdev3,bus=root0,iommufd=iommufd0: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.

For hotplug VFIO device:

Error: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.

Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu.c | 47 ++++++-------------------------------------
 hw/i386/pc.c          | 20 ------------------
 2 files changed, 6 insertions(+), 61 deletions(-)

diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index f04300022e..5c67b42dde 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -85,13 +85,6 @@ struct vtd_iotlb_key {
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
-static void vtd_panic_require_caching_mode(void)
-{
-    error_report("We need to set caching-mode=on for intel-iommu to enable "
-                 "device assignment with IOMMU protection.");
-    exit(1);
-}
-
 static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
                             uint64_t wmask, uint64_t w1cmask)
 {
@@ -3731,13 +3724,6 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
                          "Snoop Control with vhost or VFIO is not supported");
         return -ENOTSUP;
     }
-    if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {
-        error_setg_errno(errp, ENOTSUP,
-                         "device %02x.%02x.%x requires caching mode",
-                         pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
-                         PCI_FUNC(vtd_as->devfn));
-        return -ENOTSUP;
-    }
     if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
         error_setg_errno(errp, ENOTSUP,
                          "device %02x.%02x.%x requires device IOTLB mode",
@@ -4378,6 +4364,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
 
     assert(hiod);
 
+    if (!s->caching_mode) {
+        error_setg(errp, "Device assignment is not allowed without enabling "
+                   "caching-mode=on for Intel IOMMU.");
+        return false;
+    }
+
     vtd_iommu_lock(s);
 
     if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
@@ -4910,32 +4902,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
     return true;
 }
 
-static int vtd_machine_done_notify_one(Object *child, void *unused)
-{
-    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
-
-    /*
-     * We hard-coded here because vfio-pci is the only special case
-     * here.  Let's be more elegant in the future when we can, but so
-     * far there seems to be no better way.
-     */
-    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
-        vtd_panic_require_caching_mode();
-    }
-
-    return 0;
-}
-
-static void vtd_machine_done_hook(Notifier *notifier, void *unused)
-{
-    object_child_foreach_recursive(object_get_root(),
-                                   vtd_machine_done_notify_one, NULL);
-}
-
-static Notifier vtd_machine_done_notify = {
-    .notify = vtd_machine_done_hook,
-};
-
 static void vtd_realize(DeviceState *dev, Error **errp)
 {
     MachineState *ms = MACHINE(qdev_get_machine());
@@ -4990,7 +4956,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
     pci_setup_iommu(bus, &vtd_iommu_ops, dev);
     /* Pseudo address space under root PCI bus. */
     x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
-    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
 }
 
 static void vtd_class_init(ObjectClass *klass, const void *data)
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index bc048a6d13..01cd9a67db 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1720,25 +1720,6 @@ static void pc_machine_wakeup(MachineState *machine)
     cpu_synchronize_all_post_reset();
 }
 
-static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
-{
-    X86IOMMUState *iommu = x86_iommu_get_default();
-    IntelIOMMUState *intel_iommu;
-
-    if (iommu &&
-        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
-        object_dynamic_cast((Object *)dev, "vfio-pci")) {
-        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
-        if (!intel_iommu->caching_mode) {
-            error_setg(errp, "Device assignment is not allowed without "
-                       "enabling caching-mode=on for Intel IOMMU.");
-            return false;
-        }
-    }
-
-    return true;
-}
-
 static void pc_machine_class_init(ObjectClass *oc, const void *data)
 {
     MachineClass *mc = MACHINE_CLASS(oc);
@@ -1758,7 +1739,6 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data)
     x86mc->apic_xrupt_override = true;
     assert(!mc->get_hotplug_handler);
     mc->get_hotplug_handler = pc_get_hotplug_handler;
-    mc->hotplug_allowed = pc_hotplug_allowed;
     mc->auto_enable_numa_with_memhp = true;
     mc->auto_enable_numa_with_memdev = true;
     mc->has_hotpluggable_cpus = true;
-- 
2.47.1
RE: [PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO device
Posted by Duan, Zhenzhong 1 month, 2 weeks ago

>-----Original Message-----
>From: Duan, Zhenzhong <zhenzhong.duan@intel.com>
>Subject: [PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO
>device
>
>In early days, we have different tricks to ensure caching-mode=on with VFIO
>device:
>
>28cf553afe ("intel_iommu: Sanity check vfio-pci config on machine init done")
>c6cbc29d36 ("pc/q35: Disallow vfio-pci hotplug without VT-d caching mode")
>b8d78277c0 ("intel-iommu: fail MAP notifier without caching mode")
>
>Because without caching mode, MAP notifier won't work correctly since
>guest
>won't send IOTLB update event when it establishes new mappings in the I/O
>page
>tables.
>
>Now with host IOMMU device interface between VFIO and vIOMMU, we can
>simplify
>it with a small check in set_iommu_device(). This also works for future VDPA
>implementation which may also need caching mode on.

I made a wrong assumption on VDPA, currently VDPA doesn't use hiod interface,
so commit b8d78277c0 is still needed for VDPA. I'll have to send a v2, sorry for noise.

BRs,
Zhenzhong

>
>For coldplug VFIO device:
>
>qemu-system-x86_64: -device
>vfio-pci,host=0000:3b:00.0,id=hostdev3,bus=root0,iommufd=iommufd0: vfio
>0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed
>without enabling caching-mode=on for Intel IOMMU.
>
>For hotplug VFIO device:
>
>Error: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not
>allowed without enabling caching-mode=on for Intel IOMMU.
>
>Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>---
> hw/i386/intel_iommu.c | 47 ++++++-------------------------------------
> hw/i386/pc.c          | 20 ------------------
> 2 files changed, 6 insertions(+), 61 deletions(-)
>
>diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>index f04300022e..5c67b42dde 100644
>--- a/hw/i386/intel_iommu.c
>+++ b/hw/i386/intel_iommu.c
>@@ -85,13 +85,6 @@ struct vtd_iotlb_key {
> static void vtd_address_space_refresh_all(IntelIOMMUState *s);
> static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n);
>
>-static void vtd_panic_require_caching_mode(void)
>-{
>-    error_report("We need to set caching-mode=on for intel-iommu to
>enable "
>-                 "device assignment with IOMMU protection.");
>-    exit(1);
>-}
>-
> static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                             uint64_t wmask, uint64_t w1cmask)
> {
>@@ -3731,13 +3724,6 @@ static int
>vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
>                          "Snoop Control with vhost or VFIO is not
>supported");
>         return -ENOTSUP;
>     }
>-    if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {
>-        error_setg_errno(errp, ENOTSUP,
>-                         "device %02x.%02x.%x requires caching
>mode",
>-                         pci_bus_num(vtd_as->bus),
>PCI_SLOT(vtd_as->devfn),
>-                         PCI_FUNC(vtd_as->devfn));
>-        return -ENOTSUP;
>-    }
>     if (!x86_iommu->dt_supported && (new &
>IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
>         error_setg_errno(errp, ENOTSUP,
>                          "device %02x.%02x.%x requires device IOTLB
>mode",
>@@ -4378,6 +4364,12 @@ static bool vtd_dev_set_iommu_device(PCIBus
>*bus, void *opaque, int devfn,
>
>     assert(hiod);
>
>+    if (!s->caching_mode) {
>+        error_setg(errp, "Device assignment is not allowed without
>enabling "
>+                   "caching-mode=on for Intel IOMMU.");
>+        return false;
>+    }
>+
>     vtd_iommu_lock(s);
>
>     if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
>@@ -4910,32 +4902,6 @@ static bool vtd_decide_config(IntelIOMMUState
>*s, Error **errp)
>     return true;
> }
>
>-static int vtd_machine_done_notify_one(Object *child, void *unused)
>-{
>-    IntelIOMMUState *iommu =
>INTEL_IOMMU_DEVICE(x86_iommu_get_default());
>-
>-    /*
>-     * We hard-coded here because vfio-pci is the only special case
>-     * here.  Let's be more elegant in the future when we can, but so
>-     * far there seems to be no better way.
>-     */
>-    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
>-        vtd_panic_require_caching_mode();
>-    }
>-
>-    return 0;
>-}
>-
>-static void vtd_machine_done_hook(Notifier *notifier, void *unused)
>-{
>-    object_child_foreach_recursive(object_get_root(),
>-                                   vtd_machine_done_notify_one,
>NULL);
>-}
>-
>-static Notifier vtd_machine_done_notify = {
>-    .notify = vtd_machine_done_hook,
>-};
>-
> static void vtd_realize(DeviceState *dev, Error **errp)
> {
>     MachineState *ms = MACHINE(qdev_get_machine());
>@@ -4990,7 +4956,6 @@ static void vtd_realize(DeviceState *dev, Error
>**errp)
>     pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>     /* Pseudo address space under root PCI bus. */
>     x86ms->ioapic_as = vtd_host_dma_iommu(bus, s,
>Q35_PSEUDO_DEVFN_IOAPIC);
>-    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
> }
>
> static void vtd_class_init(ObjectClass *klass, const void *data)
>diff --git a/hw/i386/pc.c b/hw/i386/pc.c
>index bc048a6d13..01cd9a67db 100644
>--- a/hw/i386/pc.c
>+++ b/hw/i386/pc.c
>@@ -1720,25 +1720,6 @@ static void pc_machine_wakeup(MachineState
>*machine)
>     cpu_synchronize_all_post_reset();
> }
>
>-static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error
>**errp)
>-{
>-    X86IOMMUState *iommu = x86_iommu_get_default();
>-    IntelIOMMUState *intel_iommu;
>-
>-    if (iommu &&
>-        object_dynamic_cast((Object *)iommu,
>TYPE_INTEL_IOMMU_DEVICE) &&
>-        object_dynamic_cast((Object *)dev, "vfio-pci")) {
>-        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
>-        if (!intel_iommu->caching_mode) {
>-            error_setg(errp, "Device assignment is not allowed without "
>-                       "enabling caching-mode=on for Intel IOMMU.");
>-            return false;
>-        }
>-    }
>-
>-    return true;
>-}
>-
> static void pc_machine_class_init(ObjectClass *oc, const void *data)
> {
>     MachineClass *mc = MACHINE_CLASS(oc);
>@@ -1758,7 +1739,6 @@ static void pc_machine_class_init(ObjectClass *oc,
>const void *data)
>     x86mc->apic_xrupt_override = true;
>     assert(!mc->get_hotplug_handler);
>     mc->get_hotplug_handler = pc_get_hotplug_handler;
>-    mc->hotplug_allowed = pc_hotplug_allowed;
>     mc->auto_enable_numa_with_memhp = true;
>     mc->auto_enable_numa_with_memdev = true;
>     mc->has_hotpluggable_cpus = true;
>--
>2.47.1
Re: [PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO device
Posted by CLEMENT MATHIEU--DRIF 1 month, 2 weeks ago
Hi Zhenzhnog,

Nice!

Reviewed-by: Clément Mathieu--Drif <clement.mathieu--drif@eviden.com>

Thanks

On Fri, 2025-09-19 at 03:06 -0400, Zhenzhong Duan wrote:
> Caution: External email. Do not open attachments or click links, unless this email comes from a known sender and you know the content is safe.
> 
> 
> In early days, we have different tricks to ensure caching-mode=on with VFIO  
> device:
> 
> 28cf553afe ("intel_iommu: Sanity check vfio-pci config on machine init done")  
> c6cbc29d36 ("pc/q35: Disallow vfio-pci hotplug without VT-d caching mode")  
> b8d78277c0 ("intel-iommu: fail MAP notifier without caching mode")
> 
> Because without caching mode, MAP notifier won't work correctly since guest  
> won't send IOTLB update event when it establishes new mappings in the I/O page  
> tables.
> 
> Now with host IOMMU device interface between VFIO and vIOMMU, we can simplify  
> it with a small check in set_iommu_device(). This also works for future VDPA  
> implementation which may also need caching mode on.
> 
> For coldplug VFIO device:
> 
> qemu-system-x86_64: -device vfio-pci,host=0000:3b:00.0,id=hostdev3,bus=root0,iommufd=iommufd0: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.
> 
> For hotplug VFIO device:
> 
> Error: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.
> 
> Signed-off-by: Zhenzhong Duan <[zhenzhong.duan@intel.com](mailto:zhenzhong.duan@intel.com)>  
> ---  
>  hw/i386/intel_iommu.c | 47 ++++++-------------------------------------  
>  hw/i386/pc.c          | 20 ------------------  
>  2 files changed, 6 insertions(+), 61 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c  
> index f04300022e..5c67b42dde 100644  
> --- a/hw/i386/intel_iommu.c  
> +++ b/hw/i386/intel_iommu.c  
> @@ -85,13 +85,6 @@ struct vtd_iotlb_key {  
>  static void vtd_address_space_refresh_all(IntelIOMMUState *s);  
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> 
> -static void vtd_panic_require_caching_mode(void)  
> -{  
> -    error_report("We need to set caching-mode=on for intel-iommu to enable "  
> -                 "device assignment with IOMMU protection.");  
> -    exit(1);  
> -}  
> -  
>  static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,  
>                              uint64_t wmask, uint64_t w1cmask)  
>  {  
> @@ -3731,13 +3724,6 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,  
>                           "Snoop Control with vhost or VFIO is not supported");  
>          return -ENOTSUP;  
>      }  
> -    if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {  
> -        error_setg_errno(errp, ENOTSUP,  
> -                         "device %02x.%02x.%x requires caching mode",  
> -                         pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),  
> -                         PCI_FUNC(vtd_as->devfn));  
> -        return -ENOTSUP;  
> -    }  
>      if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {  
>          error_setg_errno(errp, ENOTSUP,  
>                           "device %02x.%02x.%x requires device IOTLB mode",  
> @@ -4378,6 +4364,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
> 
>      assert(hiod);
> 
> +    if (!s->caching_mode) {  
> +        error_setg(errp, "Device assignment is not allowed without enabling "  
> +                   "caching-mode=on for Intel IOMMU.");  
> +        return false;  
> +    }  
> +  
>      vtd_iommu_lock(s);
> 
>      if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {  
> @@ -4910,32 +4902,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)  
>      return true;  
>  }
> 
> -static int vtd_machine_done_notify_one(Object *child, void *unused)  
> -{  
> -    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());  
> -  
> -    /*  
> -     * We hard-coded here because vfio-pci is the only special case  
> -     * here.  Let's be more elegant in the future when we can, but so  
> -     * far there seems to be no better way.  
> -     */  
> -    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {  
> -        vtd_panic_require_caching_mode();  
> -    }  
> -  
> -    return 0;  
> -}  
> -  
> -static void vtd_machine_done_hook(Notifier *notifier, void *unused)  
> -{  
> -    object_child_foreach_recursive(object_get_root(),  
> -                                   vtd_machine_done_notify_one, NULL);  
> -}  
> -  
> -static Notifier vtd_machine_done_notify = {  
> -    .notify = vtd_machine_done_hook,  
> -};  
> -  
>  static void vtd_realize(DeviceState *dev, Error **errp)  
>  {  
>      MachineState *ms = MACHINE(qdev_get_machine());  
> @@ -4990,7 +4956,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)  
>      pci_setup_iommu(bus, &vtd_iommu_ops, dev);  
>      /* Pseudo address space under root PCI bus. */  
>      x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);  
> -    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);  
>  }
> 
>  static void vtd_class_init(ObjectClass *klass, const void *data)  
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c  
> index bc048a6d13..01cd9a67db 100644  
> --- a/hw/i386/pc.c  
> +++ b/hw/i386/pc.c  
> @@ -1720,25 +1720,6 @@ static void pc_machine_wakeup(MachineState *machine)  
>      cpu_synchronize_all_post_reset();  
>  }
> 
> -static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)  
> -{  
> -    X86IOMMUState *iommu = x86_iommu_get_default();  
> -    IntelIOMMUState *intel_iommu;  
> -  
> -    if (iommu &&  
> -        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&  
> -        object_dynamic_cast((Object *)dev, "vfio-pci")) {  
> -        intel_iommu = INTEL_IOMMU_DEVICE(iommu);  
> -        if (!intel_iommu->caching_mode) {  
> -            error_setg(errp, "Device assignment is not allowed without "  
> -                       "enabling caching-mode=on for Intel IOMMU.");  
> -            return false;  
> -        }  
> -    }  
> -  
> -    return true;  
> -}  
> -  
>  static void pc_machine_class_init(ObjectClass *oc, const void *data)  
>  {  
>      MachineClass *mc = MACHINE_CLASS(oc);  
> @@ -1758,7 +1739,6 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data)  
>      x86mc->apic_xrupt_override = true;  
>      assert(!mc->get_hotplug_handler);  
>      mc->get_hotplug_handler = pc_get_hotplug_handler;  
> -    mc->hotplug_allowed = pc_hotplug_allowed;  
>      mc->auto_enable_numa_with_memhp = true;  
>      mc->auto_enable_numa_with_memdev = true;  
>      mc->has_hotpluggable_cpus = true;  
> --  
> 2.47.1
> 
Re: [PATCH 2/2] intel_iommu: Simplify caching mode check with VFIO device
Posted by Cédric Le Goater 1 month, 2 weeks ago
On 9/19/25 09:06, Zhenzhong Duan wrote:
> In early days, we have different tricks to ensure caching-mode=on with VFIO
> device:
> 
> 28cf553afe ("intel_iommu: Sanity check vfio-pci config on machine init done")
> c6cbc29d36 ("pc/q35: Disallow vfio-pci hotplug without VT-d caching mode")
> b8d78277c0 ("intel-iommu: fail MAP notifier without caching mode")
> 
> Because without caching mode, MAP notifier won't work correctly since guest
> won't send IOTLB update event when it establishes new mappings in the I/O page
> tables.
> 
> Now with host IOMMU device interface between VFIO and vIOMMU, we can simplify
> it with a small check in set_iommu_device(). This also works for future VDPA
> implementation which may also need caching mode on.
> 
> For coldplug VFIO device:
> 
> qemu-system-x86_64: -device vfio-pci,host=0000:3b:00.0,id=hostdev3,bus=root0,iommufd=iommufd0: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.
> 
> For hotplug VFIO device:
> 
> Error: vfio 0000:3b:00.0: Failed to set vIOMMU: Device assignment is not allowed without enabling caching-mode=on for Intel IOMMU.
> 
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu.c | 47 ++++++-------------------------------------
>   hw/i386/pc.c          | 20 ------------------
>   2 files changed, 6 insertions(+), 61 deletions(-)

This is a nice cleanup.

Acked-by: Cédric Le Goater <clg@redhat.com>

Thanks,

C.



> 
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index f04300022e..5c67b42dde 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -85,13 +85,6 @@ struct vtd_iotlb_key {
>   static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>   static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>   
> -static void vtd_panic_require_caching_mode(void)
> -{
> -    error_report("We need to set caching-mode=on for intel-iommu to enable "
> -                 "device assignment with IOMMU protection.");
> -    exit(1);
> -}
> -
>   static void vtd_define_quad(IntelIOMMUState *s, hwaddr addr, uint64_t val,
>                               uint64_t wmask, uint64_t w1cmask)
>   {
> @@ -3731,13 +3724,6 @@ static int vtd_iommu_notify_flag_changed(IOMMUMemoryRegion *iommu,
>                            "Snoop Control with vhost or VFIO is not supported");
>           return -ENOTSUP;
>       }
> -    if (!s->caching_mode && (new & IOMMU_NOTIFIER_MAP)) {
> -        error_setg_errno(errp, ENOTSUP,
> -                         "device %02x.%02x.%x requires caching mode",
> -                         pci_bus_num(vtd_as->bus), PCI_SLOT(vtd_as->devfn),
> -                         PCI_FUNC(vtd_as->devfn));
> -        return -ENOTSUP;
> -    }
>       if (!x86_iommu->dt_supported && (new & IOMMU_NOTIFIER_DEVIOTLB_UNMAP)) {
>           error_setg_errno(errp, ENOTSUP,
>                            "device %02x.%02x.%x requires device IOTLB mode",
> @@ -4378,6 +4364,12 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>   
>       assert(hiod);
>   
> +    if (!s->caching_mode) {
> +        error_setg(errp, "Device assignment is not allowed without enabling "
> +                   "caching-mode=on for Intel IOMMU.");
> +        return false;
> +    }
> +
>       vtd_iommu_lock(s);
>   
>       if (g_hash_table_lookup(s->vtd_host_iommu_dev, &key)) {
> @@ -4910,32 +4902,6 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp)
>       return true;
>   }
>   
> -static int vtd_machine_done_notify_one(Object *child, void *unused)
> -{
> -    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
> -
> -    /*
> -     * We hard-coded here because vfio-pci is the only special case
> -     * here.  Let's be more elegant in the future when we can, but so
> -     * far there seems to be no better way.
> -     */
> -    if (object_dynamic_cast(child, "vfio-pci") && !iommu->caching_mode) {
> -        vtd_panic_require_caching_mode();
> -    }
> -
> -    return 0;
> -}
> -
> -static void vtd_machine_done_hook(Notifier *notifier, void *unused)
> -{
> -    object_child_foreach_recursive(object_get_root(),
> -                                   vtd_machine_done_notify_one, NULL);
> -}
> -
> -static Notifier vtd_machine_done_notify = {
> -    .notify = vtd_machine_done_hook,
> -};
> -
>   static void vtd_realize(DeviceState *dev, Error **errp)
>   {
>       MachineState *ms = MACHINE(qdev_get_machine());
> @@ -4990,7 +4956,6 @@ static void vtd_realize(DeviceState *dev, Error **errp)
>       pci_setup_iommu(bus, &vtd_iommu_ops, dev);
>       /* Pseudo address space under root PCI bus. */
>       x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, Q35_PSEUDO_DEVFN_IOAPIC);
> -    qemu_add_machine_init_done_notifier(&vtd_machine_done_notify);
>   }
>   
>   static void vtd_class_init(ObjectClass *klass, const void *data)
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index bc048a6d13..01cd9a67db 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -1720,25 +1720,6 @@ static void pc_machine_wakeup(MachineState *machine)
>       cpu_synchronize_all_post_reset();
>   }
>   
> -static bool pc_hotplug_allowed(MachineState *ms, DeviceState *dev, Error **errp)
> -{
> -    X86IOMMUState *iommu = x86_iommu_get_default();
> -    IntelIOMMUState *intel_iommu;
> -
> -    if (iommu &&
> -        object_dynamic_cast((Object *)iommu, TYPE_INTEL_IOMMU_DEVICE) &&
> -        object_dynamic_cast((Object *)dev, "vfio-pci")) {
> -        intel_iommu = INTEL_IOMMU_DEVICE(iommu);
> -        if (!intel_iommu->caching_mode) {
> -            error_setg(errp, "Device assignment is not allowed without "
> -                       "enabling caching-mode=on for Intel IOMMU.");
> -            return false;
> -        }
> -    }
> -
> -    return true;
> -}
> -
>   static void pc_machine_class_init(ObjectClass *oc, const void *data)
>   {
>       MachineClass *mc = MACHINE_CLASS(oc);
> @@ -1758,7 +1739,6 @@ static void pc_machine_class_init(ObjectClass *oc, const void *data)
>       x86mc->apic_xrupt_override = true;
>       assert(!mc->get_hotplug_handler);
>       mc->get_hotplug_handler = pc_get_hotplug_handler;
> -    mc->hotplug_allowed = pc_hotplug_allowed;
>       mc->auto_enable_numa_with_memhp = true;
>       mc->auto_enable_numa_with_memdev = true;
>       mc->has_hotpluggable_cpus = true;