[PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation

Zhenzhong Duan posted 22 patches 1 month, 3 weeks ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Fabiano Rosas <farosas@suse.de>, Laurent Vivier <lvivier@redhat.com>
There is a newer version of this series
[PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
Posted by Zhenzhong Duan 1 month, 3 weeks ago
This adds PASID cache sync for RID_PASID, non-RID_PASID isn't supported.

Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the pasid
entry and track PASID usage and future PASID tagged DMA address translation
support in vIOMMU.

When guest triggers pasid cache invalidation, QEMU will capture it and
update or invalidate pasid cache.

vIOMMU emulator could figure out the reason by fetching latest guest pasid
entry in memory and compare it with cached PASID entry if it's valid.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  19 +++-
 include/hw/i386/intel_iommu.h  |   6 ++
 hw/i386/intel_iommu.c          | 157 ++++++++++++++++++++++++++++++---
 hw/i386/trace-events           |   3 +
 4 files changed, 173 insertions(+), 12 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 9cdc8d5dbb..d400bcee21 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
                                   * request while disabled */
     VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
 
+    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
     /* PASID directory entry access failure */
     VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
     /* The Present(P) field of pasid directory entry is 0 */
@@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
 #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
 
+/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
+#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
+#define VTD_INV_DESC_PASIDC_G_DSI       0
+#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
+#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
+#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16, 16)
+#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32, 20)
+#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
+
 /* Information about page-selective IOTLB invalidate */
 struct VTDIOTLBPageInvInfo {
     uint16_t domain_id;
@@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
 
+typedef struct VTDPASIDCacheInfo {
+    uint8_t type;
+    uint16_t did;
+    uint32_t pasid;
+    bool reset;
+} VTDPASIDCacheInfo;
+
 /* PASID Table Related Definitions */
 #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
 #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
@@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
 
 #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
-#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
+#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
 
 #define VTD_SM_PASID_ENTRY_FSPM          3ULL
 #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index 3351892da0..ff01e5c82d 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -95,6 +95,11 @@ struct VTDPASIDEntry {
     uint64_t val[8];
 };
 
+typedef struct VTDPASIDCacheEntry {
+    struct VTDPASIDEntry pasid_entry;
+    bool valid;
+} VTDPASIDCacheEntry;
+
 struct VTDAddressSpace {
     PCIBus *bus;
     uint8_t devfn;
@@ -107,6 +112,7 @@ struct VTDAddressSpace {
     MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
     IntelIOMMUState *iommu_state;
     VTDContextCacheEntry context_cache_entry;
+    VTDPASIDCacheEntry pasid_cache_entry;
     QLIST_ENTRY(VTDAddressSpace) next;
     /* Superset of notifier flags that this address space has */
     IOMMUNotifierFlag notifier_flags;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index d37d47115a..24061f6dc6 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -1614,7 +1614,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
 
     if (s->root_scalable) {
         vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
-        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
+        return VTD_SM_PASID_ENTRY_DID(&pe);
     }
 
     return VTD_CONTEXT_ENTRY_DID(ce->hi);
@@ -3074,6 +3074,144 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
     return true;
 }
 
+static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
+                                            VTDPASIDEntry *pe)
+{
+    IntelIOMMUState *s = vtd_as->iommu_state;
+    VTDContextEntry ce;
+    int ret;
+
+    if (!s->root_scalable) {
+        return -VTD_FR_RTADDR_INV_TTM;
+    }
+
+    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn,
+                                   &ce);
+    if (ret) {
+        return ret;
+    }
+
+    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
+}
+
+/*
+ * For each IOMMUFD backed device, update or invalidate pasid cache based on
+ * the value in memory.
+ */
+static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
+                                        gpointer user_data)
+{
+    VTDPASIDCacheInfo *pc_info = user_data;
+    VTDAddressSpace *vtd_as = value;
+    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
+    VTDPASIDEntry pe;
+    uint16_t did;
+
+    /* Ignore emulated device or legacy VFIO backed device */
+    if (!vtd_find_hiod_iommufd(vtd_as)) {
+        return;
+    }
+
+    /* non-RID_PASID isn't supported yet */
+    assert(vtd_as->pasid == PCI_NO_PASID);
+
+    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
+        /*
+         * No valid pasid entry in guest memory. e.g. pasid entry was modified
+         * to be either all-zero or non-present. Either case means existing
+         * pasid cache should be invalidated.
+         */
+        pc_entry->valid = false;
+        return;
+    }
+
+    /*
+     * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
+     * DID check. If DID doesn't match the value in cache or memory, then
+     * it's not a pasid entry we want to invalidate.
+     */
+    switch (pc_info->type) {
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        if (pc_entry->valid) {
+            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
+            if (pc_info->did == did) {
+                break;
+            }
+        }
+        did = VTD_SM_PASID_ENTRY_DID(&pe);
+        if (pc_info->did == did) {
+            break;
+        }
+        return;
+    }
+
+    pc_entry->pasid_entry = pe;
+    pc_entry->valid = true;
+}
+
+static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+    if (!s->fsts || !s->root_scalable || !s->dmar_enabled) {
+        return;
+    }
+
+    vtd_iommu_lock(s);
+    g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
+                         pc_info);
+    vtd_iommu_unlock(s);
+}
+
+static bool vtd_process_pasid_desc(IntelIOMMUState *s,
+                                   VTDInvDesc *inv_desc)
+{
+    uint16_t did;
+    uint32_t pasid;
+    VTDPASIDCacheInfo pc_info = {};
+    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE,
+                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
+
+    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
+                                     __func__, "pasid cache inv")) {
+        return false;
+    }
+
+    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
+    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
+    pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc);
+
+    switch (pc_info.type) {
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        trace_vtd_inv_desc_pasid_cache_dsi(did);
+        pc_info.did = did;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+        /* PASID selective implies a DID selective */
+        trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
+        /* Currently non-RID_PASID invalidation requests are ignored */
+        if (pasid != RID_PASID) {
+            return true;
+        }
+        pc_info.did = did;
+        pc_info.pasid = pasid;
+        break;
+
+    case VTD_INV_DESC_PASIDC_G_GLOBAL:
+        trace_vtd_inv_desc_pasid_cache_gsi();
+        break;
+
+    default:
+        error_report_once("invalid granularity field in PASID-cache invalidate "
+                          "descriptor, hi: 0x%"PRIx64" lo: 0x%" PRIx64,
+                           inv_desc->val[1], inv_desc->val[0]);
+        return false;
+    }
+
+    vtd_pasid_cache_sync(s, &pc_info);
+    return true;
+}
+
 static bool vtd_process_inv_iec_desc(IntelIOMMUState *s,
                                      VTDInvDesc *inv_desc)
 {
@@ -3236,6 +3374,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
+    case VTD_INV_DESC_PC:
+        trace_vtd_inv_desc("pasid-cache", inv_desc.val[1], inv_desc.val[0]);
+        if (!vtd_process_pasid_desc(s, &inv_desc)) {
+            return false;
+        }
+        break;
+
     case VTD_INV_DESC_PIOTLB:
         trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]);
         if (!vtd_process_piotlb_desc(s, &inv_desc)) {
@@ -3271,16 +3416,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s)
         }
         break;
 
-    /*
-     * TODO: the entity of below two cases will be implemented in future series.
-     * To make guest (which integrates scalable mode support patch set in
-     * iommu driver) work, just return true is enough so far.
-     */
-    case VTD_INV_DESC_PC:
-        if (s->scalable_mode) {
-            break;
-        }
-    /* fallthrough */
     default:
         error_report_once("%s: invalid inv desc: hi=%"PRIx64", lo=%"PRIx64
                           " (unknown type)", __func__, inv_desc.hi,
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index ac9e1a10aa..298addb24d 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -24,6 +24,9 @@ vtd_inv_qi_head(uint16_t head) "read head %d"
 vtd_inv_qi_tail(uint16_t head) "write tail %d"
 vtd_inv_qi_fetch(void) ""
 vtd_context_cache_reset(void) ""
+vtd_inv_desc_pasid_cache_gsi(void) ""
+vtd_inv_desc_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
+vtd_inv_desc_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
-- 
2.47.1
Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
Posted by Yi Liu 1 month ago
On 2025/9/18 16:57, Zhenzhong Duan wrote:
> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't supported.
> 
> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the pasid
> entry and track PASID usage and future PASID tagged DMA address translation
> support in vIOMMU.
> 
> When guest triggers pasid cache invalidation, QEMU will capture it and
> update or invalidate pasid cache.
> 
> vIOMMU emulator could figure out the reason by fetching latest guest pasid
> entry in memory and compare it with cached PASID entry if it's valid.
> 
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_internal.h |  19 +++-
>   include/hw/i386/intel_iommu.h  |   6 ++
>   hw/i386/intel_iommu.c          | 157 ++++++++++++++++++++++++++++++---
>   hw/i386/trace-events           |   3 +
>   4 files changed, 173 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 9cdc8d5dbb..d400bcee21 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
>                                     * request while disabled */
>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>   
> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>       /* PASID directory entry access failure */
>       VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>       /* The Present(P) field of pasid directory entry is 0 */
> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL0     0xfff000000000f1c0ULL
>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>   
> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
> +#define VTD_INV_DESC_PASIDC_G_DSI       0
> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16, 16)
> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32, 20)
> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
> +
>   /* Information about page-selective IOTLB invalidate */
>   struct VTDIOTLBPageInvInfo {
>       uint16_t domain_id;
> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>   
> +typedef struct VTDPASIDCacheInfo {
> +    uint8_t type;
> +    uint16_t did;
> +    uint32_t pasid;
> +    bool reset;
> +} VTDPASIDCacheInfo;
> +
>   /* PASID Table Related Definitions */
>   #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>   #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>   
>   #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted guest-address-width */
> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) & VTD_DOMAIN_ID_MASK)
> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
>   
>   #define VTD_SM_PASID_ENTRY_FSPM          3ULL
>   #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index 3351892da0..ff01e5c82d 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>       uint64_t val[8];
>   };
>   
> +typedef struct VTDPASIDCacheEntry {
> +    struct VTDPASIDEntry pasid_entry;
> +    bool valid;
> +} VTDPASIDCacheEntry;
> +
>   struct VTDAddressSpace {
>       PCIBus *bus;
>       uint8_t devfn;
> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>       MemoryRegion iommu_ir_fault; /* Interrupt region for catching fault */
>       IntelIOMMUState *iommu_state;
>       VTDContextCacheEntry context_cache_entry;
> +    VTDPASIDCacheEntry pasid_cache_entry;
>       QLIST_ENTRY(VTDAddressSpace) next;
>       /* Superset of notifier flags that this address space has */
>       IOMMUNotifierFlag notifier_flags;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index d37d47115a..24061f6dc6 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -1614,7 +1614,7 @@ static uint16_t vtd_get_domain_id(IntelIOMMUState *s,
>   
>       if (s->root_scalable) {
>           vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>       }
>   
>       return VTD_CONTEXT_ENTRY_DID(ce->hi);
> @@ -3074,6 +3074,144 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s,
>       return true;
>   }
>   
> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
> +                                            VTDPASIDEntry *pe)
> +{
> +    IntelIOMMUState *s = vtd_as->iommu_state;
> +    VTDContextEntry ce;
> +    int ret;
> +
> +    if (!s->root_scalable) {
> +        return -VTD_FR_RTADDR_INV_TTM;
> +    }
> +
> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn,
> +                                   &ce);
> +    if (ret) {
> +        return ret;
> +    }
> +
> +    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
> +}
> +
> +/*
> + * For each IOMMUFD backed device, update or invalidate pasid cache based on
> + * the value in memory.
> + */
> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> +                                        gpointer user_data)
> +{
> +    VTDPASIDCacheInfo *pc_info = user_data;
> +    VTDAddressSpace *vtd_as = value;
> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> +    VTDPASIDEntry pe;
> +    uint16_t did;
> +
> +    /* Ignore emulated device or legacy VFIO backed device */
> +    if (!vtd_find_hiod_iommufd(vtd_as)) {
> +        return;
> +    }
> +
> +    /* non-RID_PASID isn't supported yet */
> +    assert(vtd_as->pasid == PCI_NO_PASID);
> +
> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
> +        /*
> +         * No valid pasid entry in guest memory. e.g. pasid entry was modified
> +         * to be either all-zero or non-present. Either case means existing
> +         * pasid cache should be invalidated.
> +         */
> +        pc_entry->valid = false;
> +        return;
> +    }
> +
> +    /*
> +     * VTD_INV_DESC_PASIDC_G_DSI and VTD_INV_DESC_PASIDC_G_PASID_SI require
> +     * DID check. If DID doesn't match the value in cache or memory, then
> +     * it's not a pasid entry we want to invalidate.

I think comparing DID applies to the case in which pc_entry->valid is
true. If pc_entry->valid is false, this means no cached pc_entry yet. If
pe in guest memory is valid, the pc_entry should be updated/set hence
the bind_pasid operation (added in later patch) would be conducted.

> +     */
> +    switch (pc_info->type) {
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        if (pc_entry->valid) {
> +            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
> +            if (pc_info->did == did) {
> +                break;
> +            }
> +        }
> +        did = VTD_SM_PASID_ENTRY_DID(&pe);
> +        if (pc_info->did == did) {
> +            break;
> +        }
> +        return;
> +    }
> +
> +    pc_entry->pasid_entry = pe;
> +    pc_entry->valid = true;
> +}
> +
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> +    if (!s->fsts || !s->root_scalable || !s->dmar_enabled) {
> +        return;
> +    }
> +
> +    vtd_iommu_lock(s);
> +    g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
> +                         pc_info);
> +    vtd_iommu_unlock(s);
> +}
> +
> +static bool vtd_process_pasid_desc(IntelIOMMUState *s,
> +                                   VTDInvDesc *inv_desc)
> +{
> +    uint16_t did;
> +    uint32_t pasid;
> +    VTDPASIDCacheInfo pc_info = {};
> +    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0, VTD_INV_DESC_ALL_ONE,
> +                        VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE};
> +
> +    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
> +                                     __func__, "pasid cache inv")) {
> +        return false;
> +    }
> +
> +    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
> +    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
> +    pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc);
> +
> +    switch (pc_info.type) {
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        trace_vtd_inv_desc_pasid_cache_dsi(did);
> +        pc_info.did = did;
> +        break;
> +
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +        /* PASID selective implies a DID selective */
> +        trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
> +        /* Currently non-RID_PASID invalidation requests are ignored */

I'm a bit doubting if this is safe given the ATS path (for emulated
device) is merged. ATS path supports non-RID_PASID if emulated device
has PASID cap. The lucky thing is that the ATS path does not have
pasid level cache. So skipping invalidation for non-RID_PASID is not
harmful so far. Just a note to other reviewers although I didn't see a
problem here.

> +        if (pasid != RID_PASID) {
> +            return true;
> +        }
> +        pc_info.did = did;
> +        pc_info.pasid = pasid;
> +        break;
> +

Regards,
Yi Liu
RE: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
Posted by Duan, Zhenzhong 1 month ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
>
>On 2025/9/18 16:57, Zhenzhong Duan wrote:
>> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't supported.
>>
>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>pasid
>> entry and track PASID usage and future PASID tagged DMA address
>translation
>> support in vIOMMU.
>>
>> When guest triggers pasid cache invalidation, QEMU will capture it and
>> update or invalidate pasid cache.
>>
>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>> entry in memory and compare it with cached PASID entry if it's valid.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/i386/intel_iommu_internal.h |  19 +++-
>>   include/hw/i386/intel_iommu.h  |   6 ++
>>   hw/i386/intel_iommu.c          | 157
>++++++++++++++++++++++++++++++---
>>   hw/i386/trace-events           |   3 +
>>   4 files changed, 173 insertions(+), 12 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index 9cdc8d5dbb..d400bcee21 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
>>                                     * request while disabled */
>>       VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>
>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>       /* PASID directory entry access failure */
>>       VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>       /* The Present(P) field of pasid directory entry is 0 */
>> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL0
>0xfff000000000f1c0ULL
>>   #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>
>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
>> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
>> +#define VTD_INV_DESC_PASIDC_G_DSI       0
>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
>> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16,
>16)
>> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32,
>20)
>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
>> +
>>   /* Information about page-selective IOTLB invalidate */
>>   struct VTDIOTLBPageInvInfo {
>>       uint16_t domain_id;
>> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>0xffffffffffe00000ULL
>>
>> +typedef struct VTDPASIDCacheInfo {
>> +    uint8_t type;
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +    bool reset;
>> +} VTDPASIDCacheInfo;
>> +
>>   /* PASID Table Related Definitions */
>>   #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>   #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>>
>>   #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted
>guest-address-width */
>> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) &
>VTD_DOMAIN_ID_MASK)
>> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
>>
>>   #define VTD_SM_PASID_ENTRY_FSPM          3ULL
>>   #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index 3351892da0..ff01e5c82d 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>       uint64_t val[8];
>>   };
>>
>> +typedef struct VTDPASIDCacheEntry {
>> +    struct VTDPASIDEntry pasid_entry;
>> +    bool valid;
>> +} VTDPASIDCacheEntry;
>> +
>>   struct VTDAddressSpace {
>>       PCIBus *bus;
>>       uint8_t devfn;
>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>       MemoryRegion iommu_ir_fault; /* Interrupt region for catching
>fault */
>>       IntelIOMMUState *iommu_state;
>>       VTDContextCacheEntry context_cache_entry;
>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>       QLIST_ENTRY(VTDAddressSpace) next;
>>       /* Superset of notifier flags that this address space has */
>>       IOMMUNotifierFlag notifier_flags;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index d37d47115a..24061f6dc6 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -1614,7 +1614,7 @@ static uint16_t
>vtd_get_domain_id(IntelIOMMUState *s,
>>
>>       if (s->root_scalable) {
>>           vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
>> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
>> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>>       }
>>
>>       return VTD_CONTEXT_ENTRY_DID(ce->hi);
>> @@ -3074,6 +3074,144 @@ static bool
>vtd_process_piotlb_desc(IntelIOMMUState *s,
>>       return true;
>>   }
>>
>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> +                                            VTDPASIDEntry *pe)
>> +{
>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>> +    VTDContextEntry ce;
>> +    int ret;
>> +
>> +    if (!s->root_scalable) {
>> +        return -VTD_FR_RTADDR_INV_TTM;
>> +    }
>> +
>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>vtd_as->devfn,
>> +                                   &ce);
>> +    if (ret) {
>> +        return ret;
>> +    }
>> +
>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
>> +}
>> +
>> +/*
>> + * For each IOMMUFD backed device, update or invalidate pasid cache
>based on
>> + * the value in memory.
>> + */
>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>> +                                        gpointer user_data)
>> +{
>> +    VTDPASIDCacheInfo *pc_info = user_data;
>> +    VTDAddressSpace *vtd_as = value;
>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> +    VTDPASIDEntry pe;
>> +    uint16_t did;
>> +
>> +    /* Ignore emulated device or legacy VFIO backed device */
>> +    if (!vtd_find_hiod_iommufd(vtd_as)) {
>> +        return;
>> +    }
>> +
>> +    /* non-RID_PASID isn't supported yet */
>> +    assert(vtd_as->pasid == PCI_NO_PASID);
>> +
>> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>> +        /*
>> +         * No valid pasid entry in guest memory. e.g. pasid entry was
>modified
>> +         * to be either all-zero or non-present. Either case means
>existing
>> +         * pasid cache should be invalidated.
>> +         */
>> +        pc_entry->valid = false;
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * VTD_INV_DESC_PASIDC_G_DSI and
>VTD_INV_DESC_PASIDC_G_PASID_SI require
>> +     * DID check. If DID doesn't match the value in cache or memory,
>then
>> +     * it's not a pasid entry we want to invalidate.
>
>I think comparing DID applies to the case in which pc_entry->valid is
>true. If pc_entry->valid is false, this means no cached pc_entry yet. If
>pe in guest memory is valid, the pc_entry should be updated/set hence
>the bind_pasid operation (added in later patch) would be conducted.

We get here only when pe in guest memory is valid, or else we have returned in "if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {" check.

If no cached pe but valid pe in guest memory, that means a new pe.
For new entry, guest constructs pasid cache invalidation request with DID
field filled with DID from pe in memory. We don't unconditionally cache new pe
for all devices for one pasid cache invalidation except it's global invalidation.

>
>> +     */
>> +    switch (pc_info->type) {
>> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> +    case VTD_INV_DESC_PASIDC_G_DSI:
>> +        if (pc_entry->valid) {
>> +            did = VTD_SM_PASID_ENTRY_DID(&pc_entry->pasid_entry);
>> +            if (pc_info->did == did) {
>> +                break;
>> +            }
>> +        }
>> +        did = VTD_SM_PASID_ENTRY_DID(&pe);
>> +        if (pc_info->did == did) {
>> +            break;
>> +        }
>> +        return;
>> +    }
>> +
>> +    pc_entry->pasid_entry = pe;
>> +    pc_entry->valid = true;
>> +}
>> +
>> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>VTDPASIDCacheInfo *pc_info)
>> +{
>> +    if (!s->fsts || !s->root_scalable || !s->dmar_enabled) {
>> +        return;
>> +    }
>> +
>> +    vtd_iommu_lock(s);
>> +    g_hash_table_foreach(s->vtd_address_spaces,
>vtd_pasid_cache_sync_locked,
>> +                         pc_info);
>> +    vtd_iommu_unlock(s);
>> +}
>> +
>> +static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>> +                                   VTDInvDesc *inv_desc)
>> +{
>> +    uint16_t did;
>> +    uint32_t pasid;
>> +    VTDPASIDCacheInfo pc_info = {};
>> +    uint64_t mask[4] = {VTD_INV_DESC_PASIDC_RSVD_VAL0,
>VTD_INV_DESC_ALL_ONE,
>> +                        VTD_INV_DESC_ALL_ONE,
>VTD_INV_DESC_ALL_ONE};
>> +
>> +    if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true,
>> +                                     __func__, "pasid cache inv"))
>{
>> +        return false;
>> +    }
>> +
>> +    did = VTD_INV_DESC_PASIDC_DID(inv_desc);
>> +    pasid = VTD_INV_DESC_PASIDC_PASID(inv_desc);
>> +    pc_info.type = VTD_INV_DESC_PASIDC_G(inv_desc);
>> +
>> +    switch (pc_info.type) {
>> +    case VTD_INV_DESC_PASIDC_G_DSI:
>> +        trace_vtd_inv_desc_pasid_cache_dsi(did);
>> +        pc_info.did = did;
>> +        break;
>> +
>> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> +        /* PASID selective implies a DID selective */
>> +        trace_vtd_inv_desc_pasid_cache_psi(did, pasid);
>> +        /* Currently non-RID_PASID invalidation requests are ignored */
>
>I'm a bit doubting if this is safe given the ATS path (for emulated
>device) is merged. ATS path supports non-RID_PASID if emulated device
>has PASID cap. The lucky thing is that the ATS path does not have
>pasid level cache. So skipping invalidation for non-RID_PASID is not
>harmful so far. Just a note to other reviewers although I didn't see a
>problem here.

Yes, there is no emulated device supporting PASID cap currently,
so I don't cache pasid entry for emulated device for now.

Thanks
Zhenzhong
Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
Posted by Yi Liu 1 month ago
On 2025/10/13 15:37, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
>>
>> On 2025/9/18 16:57, Zhenzhong Duan wrote:
>>> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't supported.
>>>
>>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache the
>> pasid
>>> entry and track PASID usage and future PASID tagged DMA address
>> translation
>>> support in vIOMMU.
>>>
>>> When guest triggers pasid cache invalidation, QEMU will capture it and
>>> update or invalidate pasid cache.
>>>
>>> vIOMMU emulator could figure out the reason by fetching latest guest pasid
>>> entry in memory and compare it with cached PASID entry if it's valid.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_internal.h |  19 +++-
>>>    include/hw/i386/intel_iommu.h  |   6 ++
>>>    hw/i386/intel_iommu.c          | 157
>> ++++++++++++++++++++++++++++++---
>>>    hw/i386/trace-events           |   3 +
>>>    4 files changed, 173 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/hw/i386/intel_iommu_internal.h
>> b/hw/i386/intel_iommu_internal.h
>>> index 9cdc8d5dbb..d400bcee21 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
>>>                                      * request while disabled */
>>>        VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>>
>>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>>        /* PASID directory entry access failure */
>>>        VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>>        /* The Present(P) field of pasid directory entry is 0 */
>>> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL0
>> 0xfff000000000f1c0ULL
>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>>
>>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
>>> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4, 2)
>>> +#define VTD_INV_DESC_PASIDC_G_DSI       0
>>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
>>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
>>> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16,
>> 16)
>>> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32,
>> 20)
>>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0   0xfff000000000f1c0ULL
>>> +
>>>    /* Information about page-selective IOTLB invalidate */
>>>    struct VTDIOTLBPageInvInfo {
>>>        uint16_t domain_id;
>>> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>> ~VTD_HAW_MASK(aw))
>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>> 0xffffffffffe00000ULL
>>>
>>> +typedef struct VTDPASIDCacheInfo {
>>> +    uint8_t type;
>>> +    uint16_t did;
>>> +    uint32_t pasid;
>>> +    bool reset;
>>> +} VTDPASIDCacheInfo;
>>> +
>>>    /* PASID Table Related Definitions */
>>>    #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>>    #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>>> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>    #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>>>
>>>    #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted
>> guest-address-width */
>>> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) &
>> VTD_DOMAIN_ID_MASK)
>>> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0, 16)
>>>
>>>    #define VTD_SM_PASID_ENTRY_FSPM          3ULL
>>>    #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index 3351892da0..ff01e5c82d 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>>        uint64_t val[8];
>>>    };
>>>
>>> +typedef struct VTDPASIDCacheEntry {
>>> +    struct VTDPASIDEntry pasid_entry;
>>> +    bool valid;
>>> +} VTDPASIDCacheEntry;
>>> +
>>>    struct VTDAddressSpace {
>>>        PCIBus *bus;
>>>        uint8_t devfn;
>>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>>        MemoryRegion iommu_ir_fault; /* Interrupt region for catching
>> fault */
>>>        IntelIOMMUState *iommu_state;
>>>        VTDContextCacheEntry context_cache_entry;
>>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>>        QLIST_ENTRY(VTDAddressSpace) next;
>>>        /* Superset of notifier flags that this address space has */
>>>        IOMMUNotifierFlag notifier_flags;
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index d37d47115a..24061f6dc6 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -1614,7 +1614,7 @@ static uint16_t
>> vtd_get_domain_id(IntelIOMMUState *s,
>>>
>>>        if (s->root_scalable) {
>>>            vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
>>> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
>>> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>>>        }
>>>
>>>        return VTD_CONTEXT_ENTRY_DID(ce->hi);
>>> @@ -3074,6 +3074,144 @@ static bool
>> vtd_process_piotlb_desc(IntelIOMMUState *s,
>>>        return true;
>>>    }
>>>
>>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>>> +                                            VTDPASIDEntry *pe)
>>> +{
>>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>>> +    VTDContextEntry ce;
>>> +    int ret;
>>> +
>>> +    if (!s->root_scalable) {
>>> +        return -VTD_FR_RTADDR_INV_TTM;
>>> +    }
>>> +
>>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>> vtd_as->devfn,
>>> +                                   &ce);
>>> +    if (ret) {
>>> +        return ret;
>>> +    }
>>> +
>>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
>>> +}
>>> +
>>> +/*
>>> + * For each IOMMUFD backed device, update or invalidate pasid cache
>> based on
>>> + * the value in memory.
>>> + */
>>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>> +                                        gpointer user_data)
>>> +{
>>> +    VTDPASIDCacheInfo *pc_info = user_data;
>>> +    VTDAddressSpace *vtd_as = value;
>>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>>> +    VTDPASIDEntry pe;
>>> +    uint16_t did;
>>> +
>>> +    /* Ignore emulated device or legacy VFIO backed device */
>>> +    if (!vtd_find_hiod_iommufd(vtd_as)) {
>>> +        return;
>>> +    }
>>> +
>>> +    /* non-RID_PASID isn't supported yet */
>>> +    assert(vtd_as->pasid == PCI_NO_PASID);
>>> +
>>> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>>> +        /*
>>> +         * No valid pasid entry in guest memory. e.g. pasid entry was
>> modified
>>> +         * to be either all-zero or non-present. Either case means
>> existing
>>> +         * pasid cache should be invalidated.
>>> +         */
>>> +        pc_entry->valid = false;
>>> +        return;
>>> +    }
>>> +
>>> +    /*
>>> +     * VTD_INV_DESC_PASIDC_G_DSI and
>> VTD_INV_DESC_PASIDC_G_PASID_SI require
>>> +     * DID check. If DID doesn't match the value in cache or memory,
>> then
>>> +     * it's not a pasid entry we want to invalidate.
>>
>> I think comparing DID applies to the case in which pc_entry->valid is
>> true. If pc_entry->valid is false, this means no cached pc_entry yet. If
>> pe in guest memory is valid, the pc_entry should be updated/set hence
>> the bind_pasid operation (added in later patch) would be conducted.
> 
> We get here only when pe in guest memory is valid, or else we have returned in "if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {" check.
> 
> If no cached pe but valid pe in guest memory, that means a new pe.
> For new entry, guest constructs pasid cache invalidation request with DID
> field filled with DID from pe in memory. We don't unconditionally cache new pe
> for all devices for one pasid cache invalidation except it's global invalidation.

I see. yes, intel iommu driver has already used the did configed in the
pasid entry to flush pasid cache per caching mode. But there seems no
words stated this in spec. Anyway, I don't see any reason why a guest
iommu driver wants to use a did unequal to the one in pasid entry when
this is a newly set pasid entry. So it's fine to me now.

btw. it would be nice to note how you support the global invalidation
since it's no more part of pc_info->type.

Regards,
Yi Liu
RE: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
Posted by Duan, Zhenzhong 1 month ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache invalidation
>
>On 2025/10/13 15:37, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v6 12/22] intel_iommu: Handle PASID cache
>invalidation
>>>
>>> On 2025/9/18 16:57, Zhenzhong Duan wrote:
>>>> This adds PASID cache sync for RID_PASID, non-RID_PASID isn't
>supported.
>>>>
>>>> Adds an new entry VTDPASIDCacheEntry in VTDAddressSpace to cache
>the
>>> pasid
>>>> entry and track PASID usage and future PASID tagged DMA address
>>> translation
>>>> support in vIOMMU.
>>>>
>>>> When guest triggers pasid cache invalidation, QEMU will capture it and
>>>> update or invalidate pasid cache.
>>>>
>>>> vIOMMU emulator could figure out the reason by fetching latest guest
>pasid
>>>> entry in memory and compare it with cached PASID entry if it's valid.
>>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>    hw/i386/intel_iommu_internal.h |  19 +++-
>>>>    include/hw/i386/intel_iommu.h  |   6 ++
>>>>    hw/i386/intel_iommu.c          | 157
>>> ++++++++++++++++++++++++++++++---
>>>>    hw/i386/trace-events           |   3 +
>>>>    4 files changed, 173 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>>> b/hw/i386/intel_iommu_internal.h
>>>> index 9cdc8d5dbb..d400bcee21 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -316,6 +316,7 @@ typedef enum VTDFaultReason {
>>>>                                      * request while disabled */
>>>>        VTD_FR_IR_SID_ERR = 0x26,   /* Invalid Source-ID */
>>>>
>>>> +    VTD_FR_RTADDR_INV_TTM = 0x31,  /* Invalid TTM in RTADDR */
>>>>        /* PASID directory entry access failure */
>>>>        VTD_FR_PASID_DIR_ACCESS_ERR = 0x50,
>>>>        /* The Present(P) field of pasid directory entry is 0 */
>>>> @@ -493,6 +494,15 @@ typedef union VTDInvDesc VTDInvDesc;
>>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL0
>>> 0xfff000000000f1c0ULL
>>>>    #define VTD_INV_DESC_PIOTLB_RSVD_VAL1     0xf80ULL
>>>>
>>>> +/* PASID-cache Invalidate Descriptor (pc_inv_dsc) fields */
>>>> +#define VTD_INV_DESC_PASIDC_G(x)        extract64((x)->val[0], 4,
>2)
>>>> +#define VTD_INV_DESC_PASIDC_G_DSI       0
>>>> +#define VTD_INV_DESC_PASIDC_G_PASID_SI  1
>>>> +#define VTD_INV_DESC_PASIDC_G_GLOBAL    3
>>>> +#define VTD_INV_DESC_PASIDC_DID(x)      extract64((x)->val[0], 16,
>>> 16)
>>>> +#define VTD_INV_DESC_PASIDC_PASID(x)    extract64((x)->val[0], 32,
>>> 20)
>>>> +#define VTD_INV_DESC_PASIDC_RSVD_VAL0
>0xfff000000000f1c0ULL
>>>> +
>>>>    /* Information about page-selective IOTLB invalidate */
>>>>    struct VTDIOTLBPageInvInfo {
>>>>        uint16_t domain_id;
>>>> @@ -552,6 +562,13 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>>> ~VTD_HAW_MASK(aw))
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>>> 0xffffffffffe00000ULL
>>>>
>>>> +typedef struct VTDPASIDCacheInfo {
>>>> +    uint8_t type;
>>>> +    uint16_t did;
>>>> +    uint32_t pasid;
>>>> +    bool reset;
>>>> +} VTDPASIDCacheInfo;
>>>> +
>>>>    /* PASID Table Related Definitions */
>>>>    #define VTD_PASID_DIR_BASE_ADDR_MASK  (~0xfffULL)
>>>>    #define VTD_PASID_TABLE_BASE_ADDR_MASK (~0xfffULL)
>>>> @@ -573,7 +590,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_SM_PASID_ENTRY_PT          (4ULL << 6)
>>>>
>>>>    #define VTD_SM_PASID_ENTRY_AW          7ULL /* Adjusted
>>> guest-address-width */
>>>> -#define VTD_SM_PASID_ENTRY_DID(val)    ((val) &
>>> VTD_DOMAIN_ID_MASK)
>>>> +#define VTD_SM_PASID_ENTRY_DID(x)      extract64((x)->val[1], 0,
>16)
>>>>
>>>>    #define VTD_SM_PASID_ENTRY_FSPM          3ULL
>>>>    #define VTD_SM_PASID_ENTRY_FSPTPTR       (~0xfffULL)
>>>> diff --git a/include/hw/i386/intel_iommu.h
>>> b/include/hw/i386/intel_iommu.h
>>>> index 3351892da0..ff01e5c82d 100644
>>>> --- a/include/hw/i386/intel_iommu.h
>>>> +++ b/include/hw/i386/intel_iommu.h
>>>> @@ -95,6 +95,11 @@ struct VTDPASIDEntry {
>>>>        uint64_t val[8];
>>>>    };
>>>>
>>>> +typedef struct VTDPASIDCacheEntry {
>>>> +    struct VTDPASIDEntry pasid_entry;
>>>> +    bool valid;
>>>> +} VTDPASIDCacheEntry;
>>>> +
>>>>    struct VTDAddressSpace {
>>>>        PCIBus *bus;
>>>>        uint8_t devfn;
>>>> @@ -107,6 +112,7 @@ struct VTDAddressSpace {
>>>>        MemoryRegion iommu_ir_fault; /* Interrupt region for catching
>>> fault */
>>>>        IntelIOMMUState *iommu_state;
>>>>        VTDContextCacheEntry context_cache_entry;
>>>> +    VTDPASIDCacheEntry pasid_cache_entry;
>>>>        QLIST_ENTRY(VTDAddressSpace) next;
>>>>        /* Superset of notifier flags that this address space has */
>>>>        IOMMUNotifierFlag notifier_flags;
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index d37d47115a..24061f6dc6 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -1614,7 +1614,7 @@ static uint16_t
>>> vtd_get_domain_id(IntelIOMMUState *s,
>>>>
>>>>        if (s->root_scalable) {
>>>>            vtd_ce_get_pasid_entry(s, ce, &pe, pasid);
>>>> -        return VTD_SM_PASID_ENTRY_DID(pe.val[1]);
>>>> +        return VTD_SM_PASID_ENTRY_DID(&pe);
>>>>        }
>>>>
>>>>        return VTD_CONTEXT_ENTRY_DID(ce->hi);
>>>> @@ -3074,6 +3074,144 @@ static bool
>>> vtd_process_piotlb_desc(IntelIOMMUState *s,
>>>>        return true;
>>>>    }
>>>>
>>>> +static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>>>> +                                            VTDPASIDEntry
>*pe)
>>>> +{
>>>> +    IntelIOMMUState *s = vtd_as->iommu_state;
>>>> +    VTDContextEntry ce;
>>>> +    int ret;
>>>> +
>>>> +    if (!s->root_scalable) {
>>>> +        return -VTD_FR_RTADDR_INV_TTM;
>>>> +    }
>>>> +
>>>> +    ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus),
>>> vtd_as->devfn,
>>>> +                                   &ce);
>>>> +    if (ret) {
>>>> +        return ret;
>>>> +    }
>>>> +
>>>> +    return vtd_ce_get_pasid_entry(s, &ce, pe, vtd_as->pasid);
>>>> +}
>>>> +
>>>> +/*
>>>> + * For each IOMMUFD backed device, update or invalidate pasid cache
>>> based on
>>>> + * the value in memory.
>>>> + */
>>>> +static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>>>> +                                        gpointer user_data)
>>>> +{
>>>> +    VTDPASIDCacheInfo *pc_info = user_data;
>>>> +    VTDAddressSpace *vtd_as = value;
>>>> +    VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>>>> +    VTDPASIDEntry pe;
>>>> +    uint16_t did;
>>>> +
>>>> +    /* Ignore emulated device or legacy VFIO backed device */
>>>> +    if (!vtd_find_hiod_iommufd(vtd_as)) {
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /* non-RID_PASID isn't supported yet */
>>>> +    assert(vtd_as->pasid == PCI_NO_PASID);
>>>> +
>>>> +    if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>>>> +        /*
>>>> +         * No valid pasid entry in guest memory. e.g. pasid entry was
>>> modified
>>>> +         * to be either all-zero or non-present. Either case means
>>> existing
>>>> +         * pasid cache should be invalidated.
>>>> +         */
>>>> +        pc_entry->valid = false;
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    /*
>>>> +     * VTD_INV_DESC_PASIDC_G_DSI and
>>> VTD_INV_DESC_PASIDC_G_PASID_SI require
>>>> +     * DID check. If DID doesn't match the value in cache or memory,
>>> then
>>>> +     * it's not a pasid entry we want to invalidate.
>>>
>>> I think comparing DID applies to the case in which pc_entry->valid is
>>> true. If pc_entry->valid is false, this means no cached pc_entry yet. If
>>> pe in guest memory is valid, the pc_entry should be updated/set hence
>>> the bind_pasid operation (added in later patch) would be conducted.
>>
>> We get here only when pe in guest memory is valid, or else we have
>returned in "if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {" check.
>>
>> If no cached pe but valid pe in guest memory, that means a new pe.
>> For new entry, guest constructs pasid cache invalidation request with DID
>> field filled with DID from pe in memory. We don't unconditionally cache
>new pe
>> for all devices for one pasid cache invalidation except it's global
>invalidation.
>
>I see. yes, intel iommu driver has already used the did configed in the
>pasid entry to flush pasid cache per caching mode. But there seems no
>words stated this in spec. Anyway, I don't see any reason why a guest
>iommu driver wants to use a did unequal to the one in pasid entry when
>this is a newly set pasid entry. So it's fine to me now.
>
>btw. it would be nice to note how you support the global invalidation
>since it's no more part of pc_info->type.

It is still part of pc_info-> type, I have used VTD_INV_DESC_PASIDC_G_GLOBAL
in "intel_iommu: Replay all pasid bindings when either SRTP or TE bit is changed" and
"intel_iommu: Replay pasid bindings after context cache invalidation"

For "intel_iommu: Replay pasid bindings after context cache invalidation",
I have to bring it back in v7 as I found old linux kernel doesn't follow vtd spec,
so need that workaround to support old guest.

Thanks
Zhenzhong