[PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host

Zhenzhong Duan posted 23 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Zhenzhong Duan 3 months, 2 weeks ago
This captures the guest PASID table entry modifications and propagates
the changes to host to attach a hwpt with type determined per guest IOMMU
PGTT configuration.

When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
coming from VFIO.

Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/i386/intel_iommu.h |   1 +
 hw/i386/intel_iommu.c         | 150 +++++++++++++++++++++++++++++++++-
 hw/i386/trace-events          |   3 +
 3 files changed, 151 insertions(+), 3 deletions(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3758ac239c..b5f8a9fc29 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -104,6 +104,7 @@ struct VTDAddressSpace {
     PCIBus *bus;
     uint8_t devfn;
     uint32_t pasid;
+    uint32_t fs_hwpt;
     AddressSpace as;
     IOMMUMemoryRegion iommu;
     MemoryRegion root;          /* The root container of the device */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 871e6aad19..3789a36147 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -20,6 +20,7 @@
  */
 
 #include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
 #include "qemu/error-report.h"
 #include "qemu/main-loop.h"
 #include "qapi/error.h"
@@ -42,6 +43,9 @@
 #include "migration/vmstate.h"
 #include "trace.h"
 #include "system/iommufd.h"
+#ifdef CONFIG_IOMMUFD
+#include <linux/iommufd.h>
+#endif
 
 /* context entry operations */
 #define PASID_0    0
@@ -87,6 +91,7 @@ struct vtd_iotlb_key {
 
 static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
+static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
 
 static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
 {
@@ -98,7 +103,11 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
     g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
     while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
         VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
-        pc_entry->valid = false;
+        if (pc_entry->valid) {
+            pc_entry->valid = false;
+            /* It's fatal to get failure during reset */
+            vtd_bind_guest_pasid(vtd_as, &error_fatal);
+        }
     }
 }
 
@@ -2380,6 +2389,128 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
     vtd_iommu_replay_all(s);
 }
 
+#ifdef CONFIG_IOMMUFD
+static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+                              VTDPASIDEntry *pe, uint32_t *fs_hwpt,
+                              Error **errp)
+{
+    struct iommu_hwpt_vtd_s1 vtd = {};
+
+    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ? IOMMU_VTD_S1_SRE : 0) |
+                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ? IOMMU_VTD_S1_WPE : 0) |
+                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ? IOMMU_VTD_S1_EAFE : 0);
+    vtd.addr_width = vtd_pe_get_fs_aw(pe);
+    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
+
+    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
+                                       idev->hwpt_id, 0, IOMMU_HWPT_DATA_VTD_S1,
+                                       sizeof(vtd), &vtd, fs_hwpt, errp);
+}
+
+static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
+                                    VTDAddressSpace *vtd_as)
+{
+    if (!vtd_as->fs_hwpt) {
+        return;
+    }
+    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
+    vtd_as->fs_hwpt = 0;
+}
+
+static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
+                                     VTDAddressSpace *vtd_as, Error **errp)
+{
+    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
+    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
+    uint32_t hwpt_id;
+    bool ret;
+
+    /*
+     * We can get here only if flts=on, the supported PGTT is FST and PT.
+     * Catch invalid PGTT when processing invalidation request to avoid
+     * attaching to wrong hwpt.
+     */
+    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
+        error_setg(errp, "Invalid PGTT type");
+        return -EINVAL;
+    }
+
+    if (vtd_pe_pgtt_is_pt(pe)) {
+        hwpt_id = idev->hwpt_id;
+    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
+        return -EINVAL;
+    }
+
+    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
+    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, !ret);
+    if (ret) {
+        /* Destroy old fs_hwpt if it's a replacement */
+        vtd_destroy_old_fs_hwpt(idev, vtd_as);
+        if (vtd_pe_pgtt_is_fst(pe)) {
+            vtd_as->fs_hwpt = hwpt_id;
+        }
+    } else if (vtd_pe_pgtt_is_fst(pe)) {
+        iommufd_backend_free_id(idev->iommufd, hwpt_id);
+    }
+
+    return !ret;
+}
+
+static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
+                                     VTDAddressSpace *vtd_as, Error **errp)
+{
+    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    uint32_t pasid = vtd_as->pasid;
+    bool ret;
+
+    if (s->dmar_enabled && s->root_scalable) {
+        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
+        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
+    } else {
+        /*
+         * If DMAR remapping is disabled or guest switches to legacy mode,
+         * we fallback to the default HWPT which contains shadow page table.
+         * So guest DMA could still work.
+         */
+        ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
+        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
+                                           !ret);
+    }
+
+    if (ret) {
+        vtd_destroy_old_fs_hwpt(idev, vtd_as);
+    }
+
+    return !ret;
+}
+
+static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
+{
+    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
+    int ret;
+
+    /* Ignore emulated device or legacy VFIO backed device */
+    if (!vtd_hiod) {
+        return 0;
+    }
+
+    if (pc_entry->valid) {
+        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
+    } else {
+        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
+    }
+
+    return ret;
+}
+#else
+static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
+{
+    return 0;
+}
+#endif
+
 /* Do a context-cache device-selective invalidation.
  * @func_mask: FM field after shifting
  */
@@ -3134,6 +3265,8 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
     VTDPASIDEntry pe;
     IOMMUNotifier *n;
     uint16_t did;
+    const char *err_prefix;
+    Error *local_err = NULL;
 
     if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
         if (!pc_entry->valid) {
@@ -3154,7 +3287,9 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
             vtd_address_space_unmap(vtd_as, n);
         }
         vtd_switch_address_space(vtd_as);
-        return;
+
+        err_prefix = "Detaching from HWPT failed: ";
+        goto do_bind_unbind;
     }
 
     /*
@@ -3182,12 +3317,21 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
     if (!pc_entry->valid) {
         pc_entry->pasid_entry = pe;
         pc_entry->valid = true;
-    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
+        err_prefix = "Attaching to HWPT failed: ";
+    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
+        err_prefix = "Replacing HWPT attachment failed: ";
+    } else {
         return;
     }
 
     vtd_switch_address_space(vtd_as);
     vtd_address_space_sync(vtd_as);
+
+do_bind_unbind:
+    /* TODO: Fault event injection into guest, report error to QEMU for now */
+    if (vtd_bind_guest_pasid(vtd_as, &local_err)) {
+        error_reportf_err(local_err, "%s", err_prefix);
+    }
 }
 
 static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index b704f4f90c..5a3ee1cf64 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
 vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
 vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
 vtd_reset_exit(void) ""
+vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
+vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret) "dev_id %d pasid %d ret: %d"
+vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
 
 # amd_iommu.c
 amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
-- 
2.47.1
Re: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Eric Auger 3 months, 1 week ago
Hi Zhenzhong,

On 10/24/25 10:43 AM, Zhenzhong Duan wrote:
> This captures the guest PASID table entry modifications and propagates
> the changes to host to attach a hwpt with type determined per guest IOMMU
> PGTT configuration.
>
> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
> coming from VFIO.
>
> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/i386/intel_iommu.h |   1 +
>  hw/i386/intel_iommu.c         | 150 +++++++++++++++++++++++++++++++++-
>  hw/i386/trace-events          |   3 +
>  3 files changed, 151 insertions(+), 3 deletions(-)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3758ac239c..b5f8a9fc29 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>      PCIBus *bus;
>      uint8_t devfn;
>      uint32_t pasid;
> +    uint32_t fs_hwpt;
>      AddressSpace as;
>      IOMMUMemoryRegion iommu;
>      MemoryRegion root;          /* The root container of the device */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 871e6aad19..3789a36147 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -20,6 +20,7 @@
>   */
>  
>  #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>  #include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
>  #include "qapi/error.h"
> @@ -42,6 +43,9 @@
>  #include "migration/vmstate.h"
>  #include "trace.h"
>  #include "system/iommufd.h"
> +#ifdef CONFIG_IOMMUFD
> +#include <linux/iommufd.h>
> +#endif
>  
>  /* context entry operations */
>  #define PASID_0    0
> @@ -87,6 +91,7 @@ struct vtd_iotlb_key {
>  
>  static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>  
>  static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>  {
> @@ -98,7 +103,11 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>      g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>      while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>          VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> -        pc_entry->valid = false;
> +        if (pc_entry->valid) {
> +            pc_entry->valid = false;
> +            /* It's fatal to get failure during reset */
> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
> +        }
>      }
>  }
>  
> @@ -2380,6 +2389,128 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>      vtd_iommu_replay_all(s);
>  }
>  
> +#ifdef CONFIG_IOMMUFD
> +static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                              VTDPASIDEntry *pe, uint32_t *fs_hwpt,
> +                              Error **errp)
> +{
> +    struct iommu_hwpt_vtd_s1 vtd = {};
> +
> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ? IOMMU_VTD_S1_SRE : 0) |
> +                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ? IOMMU_VTD_S1_WPE : 0) |
> +                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ? IOMMU_VTD_S1_EAFE : 0);
> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
> +
> +    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                       idev->hwpt_id, 0, IOMMU_HWPT_DATA_VTD_S1,
> +                                       sizeof(vtd), &vtd, fs_hwpt, errp);
see comments below
> +}
> +
> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                                    VTDAddressSpace *vtd_as)
> +{
> +    if (!vtd_as->fs_hwpt) {
> +        return;
> +    }
> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
> +    vtd_as->fs_hwpt = 0;
> +}
> +
> +static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
> +    uint32_t hwpt_id;
> +    bool ret;
> +
> +    /*
> +     * We can get here only if flts=on, the supported PGTT is FST and PT.
is FST or PT
> +     * Catch invalid PGTT when processing invalidation request to avoid
> +     * attaching to wrong hwpt.
> +     */
> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
> +        error_setg(errp, "Invalid PGTT type");
print the wrong PGTT value?
> +        return -EINVAL;
> +    }
> +
> +    if (vtd_pe_pgtt_is_pt(pe)) {
> +        hwpt_id = idev->hwpt_id;
> +    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> +        return -EINVAL;
> +    }
> +
> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, !ret);
why !ret in the trace? I think e chose to return true on success for

host_iommu_device_iommufd_attach_hwpt()

. Let's keep this consistent otherwise we end up not knowning what is
the success value.
> +    if (ret) {
> +        /* Destroy old fs_hwpt if it's a replacement */
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +        if (vtd_pe_pgtt_is_fst(pe)) {
> +            vtd_as->fs_hwpt = hwpt_id;
> +        }
> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
> +    }
> +
> +    return !ret;
argh. Eventually it is difficult to follow ;-)
> +}
> +
> +static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    uint32_t pasid = vtd_as->pasid;
> +    bool ret;
> +
> +    if (s->dmar_enabled && s->root_scalable) {
> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
> same
> +    } else {
> +        /*
> +         * If DMAR remapping is disabled or guest switches to legacy mode,
> +         * we fallback to the default HWPT which contains shadow page table.
> +         * So guest DMA could still work.
> +         */
> +        ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
> +                                           !ret);
same
> +    }
> +
> +    if (ret) {
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +    }
> +
> +    return !ret;
> +}
> +
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
> +    int ret;
> +
> +    /* Ignore emulated device or legacy VFIO backed device */
> +    if (!vtd_hiod) {
> +        return 0;
> +    }
> +
> +    if (pc_entry->valid) {
> +        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
return directly
> +    } else {
remove the else and return directly?
> +        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
> +    }
> +
> +    return ret;
> +}
> +#else
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    return 0;
> +}
> +#endif
> +
>  /* Do a context-cache device-selective invalidation.
>   * @func_mask: FM field after shifting
>   */
> @@ -3134,6 +3265,8 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>      VTDPASIDEntry pe;
>      IOMMUNotifier *n;
>      uint16_t did;
> +    const char *err_prefix;
> +    Error *local_err = NULL;
>  
>      if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>          if (!pc_entry->valid) {
> @@ -3154,7 +3287,9 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>              vtd_address_space_unmap(vtd_as, n);
>          }
>          vtd_switch_address_space(vtd_as);
> -        return;
> +
> +        err_prefix = "Detaching from HWPT failed: ";
> +        goto do_bind_unbind;
>      }
>  
>      /*
> @@ -3182,12 +3317,21 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>      if (!pc_entry->valid) {
>          pc_entry->pasid_entry = pe;
>          pc_entry->valid = true;
> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Attaching to HWPT failed: ";
> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Replacing HWPT attachment failed: ";
> +    } else {
>          return;
>      }
>  
>      vtd_switch_address_space(vtd_as);
>      vtd_address_space_sync(vtd_as);
> +
> +do_bind_unbind:
> +    /* TODO: Fault event injection into guest, report error to QEMU for now */
> +    if (vtd_bind_guest_pasid(vtd_as, &local_err)) {
> +        error_reportf_err(local_err, "%s", err_prefix);
> +    }
>  }
>  
>  static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index b704f4f90c..5a3ee1cf64 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>  vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>  vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
>  vtd_reset_exit(void) ""
> +vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
> +vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret) "dev_id %d pasid %d ret: %d"
> +vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>  
>  # amd_iommu.c
>  amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
Eric
Re: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Cédric Le Goater 3 months, 2 weeks ago
On 10/24/25 10:43, Zhenzhong Duan wrote:
> This captures the guest PASID table entry modifications and propagates
> the changes to host to attach a hwpt with type determined per guest IOMMU
> PGTT configuration.
> 
> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
> coming from VFIO.
> 
> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/i386/intel_iommu.h |   1 +
>   hw/i386/intel_iommu.c         | 150 +++++++++++++++++++++++++++++++++-
>   hw/i386/trace-events          |   3 +
>   3 files changed, 151 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3758ac239c..b5f8a9fc29 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>       PCIBus *bus;
>       uint8_t devfn;
>       uint32_t pasid;
> +    uint32_t fs_hwpt;
>       AddressSpace as;
>       IOMMUMemoryRegion iommu;
>       MemoryRegion root;          /* The root container of the device */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 871e6aad19..3789a36147 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -20,6 +20,7 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>   #include "qemu/error-report.h"
>   #include "qemu/main-loop.h"
>   #include "qapi/error.h"
> @@ -42,6 +43,9 @@
>   #include "migration/vmstate.h"
>   #include "trace.h"
>   #include "system/iommufd.h"
> +#ifdef CONFIG_IOMMUFD
> +#include <linux/iommufd.h>
> +#endif
>   
>   /* context entry operations */
>   #define PASID_0    0
> @@ -87,6 +91,7 @@ struct vtd_iotlb_key {
>   
>   static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>   static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>   
>   static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>   {
> @@ -98,7 +103,11 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>       g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>       while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>           VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> -        pc_entry->valid = false;
> +        if (pc_entry->valid) {
> +            pc_entry->valid = false;
> +            /* It's fatal to get failure during reset */
> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
> +        }
>       }
>   }
>   
> @@ -2380,6 +2389,128 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>       vtd_iommu_replay_all(s);
>   }
>   
> +#ifdef CONFIG_IOMMUFD
> +static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                              VTDPASIDEntry *pe, uint32_t *fs_hwpt,
> +                              Error **errp)

Returning a bool is better. Same for the routines below.

> +{
> +    struct iommu_hwpt_vtd_s1 vtd = {};
> +
> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ? IOMMU_VTD_S1_SRE : 0) |
> +                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ? IOMMU_VTD_S1_WPE : 0) |
> +                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ? IOMMU_VTD_S1_EAFE : 0);
> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
> +
> +    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                       idev->hwpt_id, 0, IOMMU_HWPT_DATA_VTD_S1,
> +                                       sizeof(vtd), &vtd, fs_hwpt, errp);
> +}
> +
> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                                    VTDAddressSpace *vtd_as)
> +{
> +    if (!vtd_as->fs_hwpt) {
> +        return;
> +    }
> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
> +    vtd_as->fs_hwpt = 0;
> +}
> +
> +static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
> +    uint32_t hwpt_id;
> +    bool ret;
> +
> +    /*
> +     * We can get here only if flts=on, the supported PGTT is FST and PT.
> +     * Catch invalid PGTT when processing invalidation request to avoid
> +     * attaching to wrong hwpt.
> +     */
> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
> +        error_setg(errp, "Invalid PGTT type");
> +        return -EINVAL;
> +    }
> +
> +    if (vtd_pe_pgtt_is_pt(pe)) {
> +        hwpt_id = idev->hwpt_id;
> +    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> +        return -EINVAL;
> +    }
> +
> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, !ret);
> +    if (ret) {
> +        /* Destroy old fs_hwpt if it's a replacement */
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +        if (vtd_pe_pgtt_is_fst(pe)) {
> +            vtd_as->fs_hwpt = hwpt_id;
> +        }
> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
> +    }
> +
> +    return !ret;
> +}
> +
> +static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    uint32_t pasid = vtd_as->pasid;
> +    bool ret;
> +
> +    if (s->dmar_enabled && s->root_scalable) {
> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
> +    } else {
> +        /*
> +         * If DMAR remapping is disabled or guest switches to legacy mode,
> +         * we fallback to the default HWPT which contains shadow page table.
> +         * So guest DMA could still work.
> +         */
> +        ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
> +                                           !ret);
> +    }
> +
> +    if (ret) {
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +    }
> +
> +    return !ret;
> +}
> +
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
> +    int ret;
> +
> +    /* Ignore emulated device or legacy VFIO backed device */
> +    if (!vtd_hiod) {
> +        return 0;
> +    }
> +
> +    if (pc_entry->valid) {
> +        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
> +    } else {
> +        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
> +    }
> +
> +    return ret;
> +}
> +#else
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    return 0;
> +}
> +#endif
> +
>   /* Do a context-cache device-selective invalidation.
>    * @func_mask: FM field after shifting
>    */
> @@ -3134,6 +3265,8 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>       VTDPASIDEntry pe;
>       IOMMUNotifier *n;
>       uint16_t did;
> +    const char *err_prefix;

Setting this prefix looks a bit fragile. May be add a default value here.


Thanks,

C.


> +    Error *local_err = NULL;
>   
>       if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>           if (!pc_entry->valid) {
> @@ -3154,7 +3287,9 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>               vtd_address_space_unmap(vtd_as, n);
>           }
>           vtd_switch_address_space(vtd_as);
> -        return;
> +
> +        err_prefix = "Detaching from HWPT failed: ";
> +        goto do_bind_unbind;
>       }
>   
>       /*
> @@ -3182,12 +3317,21 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>       if (!pc_entry->valid) {
>           pc_entry->pasid_entry = pe;
>           pc_entry->valid = true;
> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Attaching to HWPT failed: ";
> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Replacing HWPT attachment failed: ";
> +    } else {
>           return;
>       }
>   
>       vtd_switch_address_space(vtd_as);
>       vtd_address_space_sync(vtd_as);
> +
> +do_bind_unbind:
> +    /* TODO: Fault event injection into guest, report error to QEMU for now */
> +    if (vtd_bind_guest_pasid(vtd_as, &local_err)) {
> +        error_reportf_err(local_err, "%s", err_prefix);
> +    }
>   }
>   
>   static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index b704f4f90c..5a3ee1cf64 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>   vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>   vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
>   vtd_reset_exit(void) ""
> +vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
> +vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret) "dev_id %d pasid %d ret: %d"
> +vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>   
>   # amd_iommu.c
>   amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
RE: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Duan, Zhenzhong 3 months, 1 week ago

>-----Original Message-----
>From: Cédric Le Goater <clg@redhat.com>
>Subject: Re: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to
>host
>
>On 10/24/25 10:43, Zhenzhong Duan wrote:
>> This captures the guest PASID table entry modifications and propagates
>> the changes to host to attach a hwpt with type determined per guest
>IOMMU
>> PGTT configuration.
>>
>> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
>> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
>> coming from VFIO.
>>
>> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/i386/intel_iommu.h |   1 +
>>   hw/i386/intel_iommu.c         | 150
>+++++++++++++++++++++++++++++++++-
>>   hw/i386/trace-events          |   3 +
>>   3 files changed, 151 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 3758ac239c..b5f8a9fc29 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>>       PCIBus *bus;
>>       uint8_t devfn;
>>       uint32_t pasid;
>> +    uint32_t fs_hwpt;
>>       AddressSpace as;
>>       IOMMUMemoryRegion iommu;
>>       MemoryRegion root;          /* The root container of the
>device */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 871e6aad19..3789a36147 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -20,6 +20,7 @@
>>    */
>>
>>   #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>   #include "qemu/error-report.h"
>>   #include "qemu/main-loop.h"
>>   #include "qapi/error.h"
>> @@ -42,6 +43,9 @@
>>   #include "migration/vmstate.h"
>>   #include "trace.h"
>>   #include "system/iommufd.h"
>> +#ifdef CONFIG_IOMMUFD
>> +#include <linux/iommufd.h>
>> +#endif
>>
>>   /* context entry operations */
>>   #define PASID_0    0
>> @@ -87,6 +91,7 @@ struct vtd_iotlb_key {
>>
>>   static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>>   static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n);
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>>
>>   static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>>   {
>> @@ -98,7 +103,11 @@ static void
>vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>>       g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>>       while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>>           VTDPASIDCacheEntry *pc_entry =
>&vtd_as->pasid_cache_entry;
>> -        pc_entry->valid = false;
>> +        if (pc_entry->valid) {
>> +            pc_entry->valid = false;
>> +            /* It's fatal to get failure during reset */
>> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
>> +        }
>>       }
>>   }
>>
>> @@ -2380,6 +2389,128 @@ static void
>vtd_context_global_invalidate(IntelIOMMUState *s)
>>       vtd_iommu_replay_all(s);
>>   }
>>
>> +#ifdef CONFIG_IOMMUFD
>> +static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                              VTDPASIDEntry *pe, uint32_t
>*fs_hwpt,
>> +                              Error **errp)
>
>Returning a bool is better. Same for the routines below.

We will need the returned error code to determine the fault event to inject to guest
in future fault event series.

>
>> +{
>> +    struct iommu_hwpt_vtd_s1 vtd = {};
>> +
>> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ?
>IOMMU_VTD_S1_SRE : 0) |
>> +                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ?
>IOMMU_VTD_S1_WPE : 0) |
>> +                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ?
>IOMMU_VTD_S1_EAFE : 0);
>> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
>> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
>> +
>> +    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
>> +                                       idev->hwpt_id, 0,
>IOMMU_HWPT_DATA_VTD_S1,
>> +                                       sizeof(vtd), &vtd, fs_hwpt,
>errp);
>> +}
>> +
>> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                                    VTDAddressSpace *vtd_as)
>> +{
>> +    if (!vtd_as->fs_hwpt) {
>> +        return;
>> +    }
>> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
>> +    vtd_as->fs_hwpt = 0;
>> +}
>> +
>> +static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> +                                     VTDAddressSpace *vtd_as,
>Error **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
>> +    uint32_t hwpt_id;
>> +    bool ret;
>> +
>> +    /*
>> +     * We can get here only if flts=on, the supported PGTT is FST and PT.
>> +     * Catch invalid PGTT when processing invalidation request to avoid
>> +     * attaching to wrong hwpt.
>> +     */
>> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
>> +        error_setg(errp, "Invalid PGTT type");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vtd_pe_pgtt_is_pt(pe)) {
>> +        hwpt_id = idev->hwpt_id;
>> +    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id,
>errp);
>> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid,
>hwpt_id, !ret);
>> +    if (ret) {
>> +        /* Destroy old fs_hwpt if it's a replacement */
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +        if (vtd_pe_pgtt_is_fst(pe)) {
>> +            vtd_as->fs_hwpt = hwpt_id;
>> +        }
>> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
>> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
>> +    }
>> +
>> +    return !ret;
>> +}
>> +
>> +static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> +                                     VTDAddressSpace *vtd_as,
>Error **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    uint32_t pasid = vtd_as->pasid;
>> +    bool ret;
>> +
>> +    if (s->dmar_enabled && s->root_scalable) {
>> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
>> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
>> +    } else {
>> +        /*
>> +         * If DMAR remapping is disabled or guest switches to legacy
>mode,
>> +         * we fallback to the default HWPT which contains shadow page
>table.
>> +         * So guest DMA could still work.
>> +         */
>> +        ret = host_iommu_device_iommufd_attach_hwpt(idev,
>idev->hwpt_id, errp);
>> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid,
>idev->hwpt_id,
>> +                                           !ret);
>> +    }
>> +
>> +    if (ret) {
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +    }
>> +
>> +    return !ret;
>> +}
>> +
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
>> +{
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
>> +    int ret;
>> +
>> +    /* Ignore emulated device or legacy VFIO backed device */
>> +    if (!vtd_hiod) {
>> +        return 0;
>> +    }
>> +
>> +    if (pc_entry->valid) {
>> +        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
>> +    } else {
>> +        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
>> +    }
>> +
>> +    return ret;
>> +}
>> +#else
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
>> +{
>> +    return 0;
>> +}
>> +#endif
>> +
>>   /* Do a context-cache device-selective invalidation.
>>    * @func_mask: FM field after shifting
>>    */
>> @@ -3134,6 +3265,8 @@ static void
>vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>       VTDPASIDEntry pe;
>>       IOMMUNotifier *n;
>>       uint16_t did;
>> +    const char *err_prefix;
>
>Setting this prefix looks a bit fragile. May be add a default value here.

OK, like:

    const char *err_prefix = "Attaching to HWPT failed: ";

Thanks
Zhenzhong
Re: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Cédric Le Goater 3 months, 2 weeks ago
On 10/24/25 10:43, Zhenzhong Duan wrote:
> This captures the guest PASID table entry modifications and propagates
> the changes to host to attach a hwpt with type determined per guest IOMMU
> PGTT configuration.
> 
> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
> coming from VFIO.
> 
> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   include/hw/i386/intel_iommu.h |   1 +
>   hw/i386/intel_iommu.c         | 150 +++++++++++++++++++++++++++++++++-
>   hw/i386/trace-events          |   3 +
>   3 files changed, 151 insertions(+), 3 deletions(-)
> 
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3758ac239c..b5f8a9fc29 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>       PCIBus *bus;
>       uint8_t devfn;
>       uint32_t pasid;
> +    uint32_t fs_hwpt;
>       AddressSpace as;
>       IOMMUMemoryRegion iommu;
>       MemoryRegion root;          /* The root container of the device */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 871e6aad19..3789a36147 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -20,6 +20,7 @@
>    */
>   
>   #include "qemu/osdep.h"
> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>   #include "qemu/error-report.h"
>   #include "qemu/main-loop.h"
>   #include "qapi/error.h"
> @@ -42,6 +43,9 @@
>   #include "migration/vmstate.h"
>   #include "trace.h"
>   #include "system/iommufd.h"
> +#ifdef CONFIG_IOMMUFD
> +#include <linux/iommufd.h>
> +#endif


Exposing IOMMUFD in the Intel vIOMMU is unexpected. Initially, we
introduced HostIOMMUDeviceClass to avoid exposing the IOMMU backends.
Are we OK to bypass this abstract layer now ?


Thanks,

C.



  >   /* context entry operations */
>   #define PASID_0    0
> @@ -87,6 +91,7 @@ struct vtd_iotlb_key {
>   
>   static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>   static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>   
>   static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>   {
> @@ -98,7 +103,11 @@ static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>       g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>       while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>           VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> -        pc_entry->valid = false;
> +        if (pc_entry->valid) {
> +            pc_entry->valid = false;
> +            /* It's fatal to get failure during reset */
> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
> +        }
>       }
>   }
>   
> @@ -2380,6 +2389,128 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>       vtd_iommu_replay_all(s);
>   }
>   
> +#ifdef CONFIG_IOMMUFD
> +static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                              VTDPASIDEntry *pe, uint32_t *fs_hwpt,
> +                              Error **errp)
> +{
> +    struct iommu_hwpt_vtd_s1 vtd = {};
> +
> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ? IOMMU_VTD_S1_SRE : 0) |
> +                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ? IOMMU_VTD_S1_WPE : 0) |
> +                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ? IOMMU_VTD_S1_EAFE : 0);
> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
> +
> +    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
> +                                       idev->hwpt_id, 0, IOMMU_HWPT_DATA_VTD_S1,
> +                                       sizeof(vtd), &vtd, fs_hwpt, errp);
> +}
> +
> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
> +                                    VTDAddressSpace *vtd_as)
> +{
> +    if (!vtd_as->fs_hwpt) {
> +        return;
> +    }
> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
> +    vtd_as->fs_hwpt = 0;
> +}
> +
> +static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
> +    uint32_t hwpt_id;
> +    bool ret;
> +
> +    /*
> +     * We can get here only if flts=on, the supported PGTT is FST and PT.
> +     * Catch invalid PGTT when processing invalidation request to avoid
> +     * attaching to wrong hwpt.
> +     */
> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
> +        error_setg(errp, "Invalid PGTT type");
> +        return -EINVAL;
> +    }
> +
> +    if (vtd_pe_pgtt_is_pt(pe)) {
> +        hwpt_id = idev->hwpt_id;
> +    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
> +        return -EINVAL;
> +    }
> +
> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid, hwpt_id, !ret);
> +    if (ret) {
> +        /* Destroy old fs_hwpt if it's a replacement */
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +        if (vtd_pe_pgtt_is_fst(pe)) {
> +            vtd_as->fs_hwpt = hwpt_id;
> +        }
> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
> +    }
> +
> +    return !ret;
> +}
> +
> +static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
> +                                     VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    HostIOMMUDeviceIOMMUFD *idev = HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    uint32_t pasid = vtd_as->pasid;
> +    bool ret;
> +
> +    if (s->dmar_enabled && s->root_scalable) {
> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
> +    } else {
> +        /*
> +         * If DMAR remapping is disabled or guest switches to legacy mode,
> +         * we fallback to the default HWPT which contains shadow page table.
> +         * So guest DMA could still work.
> +         */
> +        ret = host_iommu_device_iommufd_attach_hwpt(idev, idev->hwpt_id, errp);
> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid, idev->hwpt_id,
> +                                           !ret);
> +    }
> +
> +    if (ret) {
> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
> +    }
> +
> +    return !ret;
> +}
> +
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
> +    int ret;
> +
> +    /* Ignore emulated device or legacy VFIO backed device */
> +    if (!vtd_hiod) {
> +        return 0;
> +    }
> +
> +    if (pc_entry->valid) {
> +        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
> +    } else {
> +        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
> +    }
> +
> +    return ret;
> +}
> +#else
> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
> +{
> +    return 0;
> +}
> +#endif
> +
>   /* Do a context-cache device-selective invalidation.
>    * @func_mask: FM field after shifting
>    */
> @@ -3134,6 +3265,8 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>       VTDPASIDEntry pe;
>       IOMMUNotifier *n;
>       uint16_t did;
> +    const char *err_prefix;
> +    Error *local_err = NULL;
>   
>       if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>           if (!pc_entry->valid) {
> @@ -3154,7 +3287,9 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>               vtd_address_space_unmap(vtd_as, n);
>           }
>           vtd_switch_address_space(vtd_as);
> -        return;
> +
> +        err_prefix = "Detaching from HWPT failed: ";
> +        goto do_bind_unbind;
>       }
>   
>       /*
> @@ -3182,12 +3317,21 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>       if (!pc_entry->valid) {
>           pc_entry->pasid_entry = pe;
>           pc_entry->valid = true;
> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Attaching to HWPT failed: ";
> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> +        err_prefix = "Replacing HWPT attachment failed: ";
> +    } else {
>           return;
>       }
>   
>       vtd_switch_address_space(vtd_as);
>       vtd_address_space_sync(vtd_as);
> +
> +do_bind_unbind:
> +    /* TODO: Fault event injection into guest, report error to QEMU for now */
> +    if (vtd_bind_guest_pasid(vtd_as, &local_err)) {
> +        error_reportf_err(local_err, "%s", err_prefix);
> +    }
>   }
>   
>   static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index b704f4f90c..5a3ee1cf64 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16
>   vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>   vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
>   vtd_reset_exit(void) ""
> +vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
> +vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret) "dev_id %d pasid %d ret: %d"
> +vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>   
>   # amd_iommu.c
>   amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
Re: [PATCH v7 13/23] intel_iommu: Bind/unbind guest page table to host
Posted by Eric Auger 3 months, 1 week ago
Hi Cédric, Zhenzhong,

On 10/24/25 7:01 PM, Cédric Le Goater wrote:
> On 10/24/25 10:43, Zhenzhong Duan wrote:
>> This captures the guest PASID table entry modifications and propagates
>> the changes to host to attach a hwpt with type determined per guest
>> IOMMU
>> PGTT configuration.
>>
>> When PGTT=PT, attach PASID_0 to a second stage HWPT(GPA->HPA).
>> When PGTT=FST, attach PASID_0 to nested HWPT with nesting parent HWPT
>> coming from VFIO.
>>
>> Co-Authored-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   include/hw/i386/intel_iommu.h |   1 +
>>   hw/i386/intel_iommu.c         | 150 +++++++++++++++++++++++++++++++++-
>>   hw/i386/trace-events          |   3 +
>>   3 files changed, 151 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>> index 3758ac239c..b5f8a9fc29 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -104,6 +104,7 @@ struct VTDAddressSpace {
>>       PCIBus *bus;
>>       uint8_t devfn;
>>       uint32_t pasid;
>> +    uint32_t fs_hwpt;
>>       AddressSpace as;
>>       IOMMUMemoryRegion iommu;
>>       MemoryRegion root;          /* The root container of the device */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 871e6aad19..3789a36147 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -20,6 +20,7 @@
>>    */
>>     #include "qemu/osdep.h"
>> +#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
>>   #include "qemu/error-report.h"
>>   #include "qemu/main-loop.h"
>>   #include "qapi/error.h"
>> @@ -42,6 +43,9 @@
>>   #include "migration/vmstate.h"
>>   #include "trace.h"
>>   #include "system/iommufd.h"
>> +#ifdef CONFIG_IOMMUFD
>> +#include <linux/iommufd.h>
>> +#endif
>
>
> Exposing IOMMUFD in the Intel vIOMMU is unexpected. Initially, we
> introduced HostIOMMUDeviceClass to avoid exposing the IOMMU backends.
> Are we OK to bypass this abstract layer now ?
HostIOMMUDeviceClass was rather introduced to hide the differences
between VFIO and IOMMUFD backend implementations.
This feature is only implemented based on iommufd. Besides it is
specialized for VTD and ARM so to me it looks like a different class
derivation

Maybe you should put that code in a separate file (-accel.c) as
Shameer/Nicolin are doing for SMMU?

Thanks

Eric
>
>
> Thanks,
>
> C.
>
>
>
>  >   /* context entry operations */
>>   #define PASID_0    0
>> @@ -87,6 +91,7 @@ struct vtd_iotlb_key {
>>     static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>>   static void vtd_address_space_unmap(VTDAddressSpace *as,
>> IOMMUNotifier *n);
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>>     static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>>   {
>> @@ -98,7 +103,11 @@ static void
>> vtd_pasid_cache_reset_locked(IntelIOMMUState *s)
>>       g_hash_table_iter_init(&as_it, s->vtd_address_spaces);
>>       while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_as)) {
>>           VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> -        pc_entry->valid = false;
>> +        if (pc_entry->valid) {
>> +            pc_entry->valid = false;
>> +            /* It's fatal to get failure during reset */
>> +            vtd_bind_guest_pasid(vtd_as, &error_fatal);
>> +        }
>>       }
>>   }
>>   @@ -2380,6 +2389,128 @@ static void
>> vtd_context_global_invalidate(IntelIOMMUState *s)
>>       vtd_iommu_replay_all(s);
>>   }
>>   +#ifdef CONFIG_IOMMUFD
>> +static int vtd_create_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                              VTDPASIDEntry *pe, uint32_t *fs_hwpt,
>> +                              Error **errp)
>> +{
>> +    struct iommu_hwpt_vtd_s1 vtd = {};
>> +
>> +    vtd.flags = (VTD_SM_PASID_ENTRY_SRE_BIT(pe) ? IOMMU_VTD_S1_SRE :
>> 0) |
>> +                (VTD_SM_PASID_ENTRY_WPE_BIT(pe) ? IOMMU_VTD_S1_WPE :
>> 0) |
>> +                (VTD_SM_PASID_ENTRY_EAFE_BIT(pe) ? IOMMU_VTD_S1_EAFE
>> : 0);
>> +    vtd.addr_width = vtd_pe_get_fs_aw(pe);
>> +    vtd.pgtbl_addr = (uint64_t)vtd_pe_get_fspt_base(pe);
>> +
>> +    return !iommufd_backend_alloc_hwpt(idev->iommufd, idev->devid,
>> +                                       idev->hwpt_id, 0,
>> IOMMU_HWPT_DATA_VTD_S1,
>> +                                       sizeof(vtd), &vtd, fs_hwpt,
>> errp);
>> +}
>> +
>> +static void vtd_destroy_old_fs_hwpt(HostIOMMUDeviceIOMMUFD *idev,
>> +                                    VTDAddressSpace *vtd_as)
>> +{
>> +    if (!vtd_as->fs_hwpt) {
>> +        return;
>> +    }
>> +    iommufd_backend_free_id(idev->iommufd, vtd_as->fs_hwpt);
>> +    vtd_as->fs_hwpt = 0;
>> +}
>> +
>> +static int vtd_device_attach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> +                                     VTDAddressSpace *vtd_as, Error
>> **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>> HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    VTDPASIDEntry *pe = &vtd_as->pasid_cache_entry.pasid_entry;
>> +    uint32_t hwpt_id;
>> +    bool ret;
>> +
>> +    /*
>> +     * We can get here only if flts=on, the supported PGTT is FST
>> and PT.
>> +     * Catch invalid PGTT when processing invalidation request to avoid
>> +     * attaching to wrong hwpt.
>> +     */
>> +    if (!vtd_pe_pgtt_is_fst(pe) && !vtd_pe_pgtt_is_pt(pe)) {
>> +        error_setg(errp, "Invalid PGTT type");
>> +        return -EINVAL;
>> +    }
>> +
>> +    if (vtd_pe_pgtt_is_pt(pe)) {
>> +        hwpt_id = idev->hwpt_id;
>> +    } else if (vtd_create_fs_hwpt(idev, pe, &hwpt_id, errp)) {
>> +        return -EINVAL;
>> +    }
>> +
>> +    ret = host_iommu_device_iommufd_attach_hwpt(idev, hwpt_id, errp);
>> +    trace_vtd_device_attach_hwpt(idev->devid, vtd_as->pasid,
>> hwpt_id, !ret);
>> +    if (ret) {
>> +        /* Destroy old fs_hwpt if it's a replacement */
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +        if (vtd_pe_pgtt_is_fst(pe)) {
>> +            vtd_as->fs_hwpt = hwpt_id;
>> +        }
>> +    } else if (vtd_pe_pgtt_is_fst(pe)) {
>> +        iommufd_backend_free_id(idev->iommufd, hwpt_id);
>> +    }
>> +
>> +    return !ret;
>> +}
>> +
>> +static int vtd_device_detach_iommufd(VTDHostIOMMUDevice *vtd_hiod,
>> +                                     VTDAddressSpace *vtd_as, Error
>> **errp)
>> +{
>> +    HostIOMMUDeviceIOMMUFD *idev =
>> HOST_IOMMU_DEVICE_IOMMUFD(vtd_hiod->hiod);
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    uint32_t pasid = vtd_as->pasid;
>> +    bool ret;
>> +
>> +    if (s->dmar_enabled && s->root_scalable) {
>> +        ret = host_iommu_device_iommufd_detach_hwpt(idev, errp);
>> +        trace_vtd_device_detach_hwpt(idev->devid, pasid, !ret);
>> +    } else {
>> +        /*
>> +         * If DMAR remapping is disabled or guest switches to legacy
>> mode,
>> +         * we fallback to the default HWPT which contains shadow
>> page table.
>> +         * So guest DMA could still work.
>> +         */
>> +        ret = host_iommu_device_iommufd_attach_hwpt(idev,
>> idev->hwpt_id, errp);
>> +        trace_vtd_device_reattach_def_hwpt(idev->devid, pasid,
>> idev->hwpt_id,
>> +                                           !ret);
>> +    }
>> +
>> +    if (ret) {
>> +        vtd_destroy_old_fs_hwpt(idev, vtd_as);
>> +    }
>> +
>> +    return !ret;
>> +}
>> +
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
>> +{
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDHostIOMMUDevice *vtd_hiod = vtd_find_hiod_iommufd(vtd_as);
>> +    int ret;
>> +
>> +    /* Ignore emulated device or legacy VFIO backed device */
>> +    if (!vtd_hiod) {
>> +        return 0;
>> +    }
>> +
>> +    if (pc_entry->valid) {
>> +        ret = vtd_device_attach_iommufd(vtd_hiod, vtd_as, errp);
>> +    } else {
>> +        ret = vtd_device_detach_iommufd(vtd_hiod, vtd_as, errp);
>> +    }
>> +
>> +    return ret;
>> +}
>> +#else
>> +static int vtd_bind_guest_pasid(VTDAddressSpace *vtd_as, Error **errp)
>> +{
>> +    return 0;
>> +}
>> +#endif
>> +
>>   /* Do a context-cache device-selective invalidation.
>>    * @func_mask: FM field after shifting
>>    */
>> @@ -3134,6 +3265,8 @@ static void
>> vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>       VTDPASIDEntry pe;
>>       IOMMUNotifier *n;
>>       uint16_t did;
>> +    const char *err_prefix;
>> +    Error *local_err = NULL;
>>         if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>>           if (!pc_entry->valid) {
>> @@ -3154,7 +3287,9 @@ static void
>> vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>               vtd_address_space_unmap(vtd_as, n);
>>           }
>>           vtd_switch_address_space(vtd_as);
>> -        return;
>> +
>> +        err_prefix = "Detaching from HWPT failed: ";
>> +        goto do_bind_unbind;
>>       }
>>         /*
>> @@ -3182,12 +3317,21 @@ static void
>> vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>       if (!pc_entry->valid) {
>>           pc_entry->pasid_entry = pe;
>>           pc_entry->valid = true;
>> -    } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
>> +        err_prefix = "Attaching to HWPT failed: ";
>> +    } else if (vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
>> +        err_prefix = "Replacing HWPT attachment failed: ";
>> +    } else {
>>           return;
>>       }
>>         vtd_switch_address_space(vtd_as);
>>       vtd_address_space_sync(vtd_as);
>> +
>> +do_bind_unbind:
>> +    /* TODO: Fault event injection into guest, report error to QEMU
>> for now */
>> +    if (vtd_bind_guest_pasid(vtd_as, &local_err)) {
>> +        error_reportf_err(local_err, "%s", err_prefix);
>> +    }
>>   }
>>     static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>> VTDPASIDCacheInfo *pc_info)
>> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
>> index b704f4f90c..5a3ee1cf64 100644
>> --- a/hw/i386/trace-events
>> +++ b/hw/i386/trace-events
>> @@ -73,6 +73,9 @@ vtd_warn_invalid_qi_tail(uint16_t tail) "tail
>> 0x%"PRIx16
>>   vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target)
>> "sid 0x%"PRIx16" index %d vec %d (should be: %d)"
>>   vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target)
>> "sid 0x%"PRIx16" index %d trigger %d (should be: %d)"
>>   vtd_reset_exit(void) ""
>> +vtd_device_attach_hwpt(uint32_t dev_id, uint32_t pasid, uint32_t
>> hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>> +vtd_device_detach_hwpt(uint32_t dev_id, uint32_t pasid, int ret)
>> "dev_id %d pasid %d ret: %d"
>> +vtd_device_reattach_def_hwpt(uint32_t dev_id, uint32_t pasid,
>> uint32_t hwpt_id, int ret) "dev_id %d pasid %d hwpt_id %d, ret: %d"
>>     # amd_iommu.c
>>   amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to
>> write at addr 0x%"PRIx64" +  offset 0x%"PRIx32
>