[PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request

Zhenzhong Duan posted 13 patches 1 month ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, Peter Maydell <peter.maydell@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>
There is a newer version of this series
[PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Zhenzhong Duan 1 month ago
Structure VTDAddressSpace includes some elements suitable for emulated
device and passthrough device without PASID, e.g., address space,
different memory regions, etc, it is also protected by vtd iommu lock,
all these are useless and become a burden for passthrough device with
PASID.

When there are lots of PASIDs used in one device, the AS and MRs are
all registered to memory core and impact the whole system performance.

So instead of using VTDAddressSpace to cache pasid entry for each pasid
of a passthrough device, we define a light weight structure
VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
will use this struct as a parameter to conduct binding/unbinding to
nested hwpt and to record the current binded nested hwpt. It's also
designed to support PASID_0.

When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
(pasid cache invalidation) request, walk through each pasid in each
passthrough device for valid pasid entries, create a new
VTDACCELPASIDCacheEntry if not existing yet.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_accel.h    |  13 +++
 hw/i386/intel_iommu_internal.h |   8 ++
 hw/i386/intel_iommu.c          |   3 +
 hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
 4 files changed, 194 insertions(+)

diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
index e5f0b077b4..a77fd06fe0 100644
--- a/hw/i386/intel_iommu_accel.h
+++ b/hw/i386/intel_iommu_accel.h
@@ -12,6 +12,13 @@
 #define HW_I386_INTEL_IOMMU_ACCEL_H
 #include CONFIG_DEVICES
 
+typedef struct VTDACCELPASIDCacheEntry {
+    VTDHostIOMMUDevice *vtd_hiod;
+    VTDPASIDEntry pe;
+    uint32_t pasid;
+    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
+} VTDACCELPASIDCacheEntry;
+
 #ifdef CONFIG_VTD_ACCEL
 bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
                           Error **errp);
@@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
 void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
                                       uint32_t pasid, hwaddr addr,
                                       uint64_t npages, bool ih);
+void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
 void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
 #else
 static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
@@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
 {
 }
 
+static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
+                                              VTDPASIDCacheInfo *pc_info)
+{
+}
+
 static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
 {
 }
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index c7e107fe87..ede4db6d2d 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
 #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
 
 #define PASID_0                             0
+#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
 #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
 #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
@@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
 #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
 #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
 #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
 #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
 #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
 #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
@@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
     PCIBus *bus;
     uint8_t devfn;
     HostIOMMUDevice *hiod;
+    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
 } VTDHostIOMMUDevice;
 
 /*
@@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
     return memcmp(p1, p2, sizeof(*p1));
 }
 
+static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
+{
+    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
+}
+
 int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
                                   VTDPASIDDirEntry *pdire);
 int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 744b5967b2..984adc639a 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
     g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
                          pc_info);
     vtd_iommu_unlock(s);
+
+    vtd_pasid_cache_sync_accel(s, pc_info);
 }
 
 static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
@@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
     vtd_hiod->devfn = (uint8_t)devfn;
     vtd_hiod->iommu_state = s;
     vtd_hiod->hiod = hiod;
+    QLIST_INIT(&vtd_hiod->pasid_cache_list);
 
     if (!vtd_check_hiod(s, vtd_hiod, errp)) {
         g_free(vtd_hiod);
diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
index c2757f3bcd..0acf3ae77f 100644
--- a/hw/i386/intel_iommu_accel.c
+++ b/hw/i386/intel_iommu_accel.c
@@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
                          vtd_flush_host_piotlb_locked, &piotlb_info);
 }
 
+static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
+                            VTDPASIDEntry *pe)
+{
+    VTDACCELPASIDCacheEntry *vtd_pce;
+
+    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
+        if (vtd_pce->pasid == pasid) {
+            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
+                vtd_pce->pe = *pe;
+            }
+            return;
+        }
+    }
+
+    vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
+    vtd_pce->vtd_hiod = vtd_hiod;
+    vtd_pce->pasid = pasid;
+    vtd_pce->pe = *pe;
+    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
+}
+
+/*
+ * This function walks over PASID range within [start, end) in a single
+ * PASID table for entries matching @info type/did, then create
+ * VTDACCELPASIDCacheEntry if not exist yet.
+ */
+static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
+                                        dma_addr_t pt_base,
+                                        int start,
+                                        int end,
+                                        VTDPASIDCacheInfo *info)
+{
+    IntelIOMMUState *s = vtd_hiod->iommu_state;
+    VTDPASIDEntry pe;
+    int pasid;
+
+    for (pasid = start; pasid < end; pasid++) {
+        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
+            !vtd_pe_present(&pe)) {
+            continue;
+        }
+
+        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
+             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
+            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
+            /*
+             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
+             * requires domain id check. If domain id check fail,
+             * go to next pasid.
+             */
+            continue;
+        }
+
+        vtd_find_add_pc(vtd_hiod, pasid, &pe);
+    }
+}
+
+/*
+ * In VT-d scalable mode translation, PASID dir + PASID table is used.
+ * This function aims at looping over a range of PASIDs in the given
+ * two level table to identify the pasid config in guest.
+ */
+static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
+                                    dma_addr_t pdt_base,
+                                    int start, int end,
+                                    VTDPASIDCacheInfo *info)
+{
+    VTDPASIDDirEntry pdire;
+    int pasid = start;
+    int pasid_next;
+    dma_addr_t pt_base;
+
+    while (pasid < end) {
+        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
+                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
+        pasid_next = pasid_next < end ? pasid_next : end;
+
+        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+            && vtd_pdire_present(&pdire)) {
+            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
+                                        info);
+        }
+        pasid = pasid_next;
+    }
+}
+
+static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
+                                          int start, int end,
+                                          VTDPASIDCacheInfo *pc_info)
+{
+    IntelIOMMUState *s = vtd_hiod->iommu_state;
+    VTDContextEntry ce;
+    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
+
+    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
+                                  vtd_hiod->devfn, &ce)) {
+        VTDPASIDCacheInfo walk_info = *pc_info;
+        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
+                                VTD_PASID_TABLE_ENTRY_NUM;
+
+        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
+
+        vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
+                                start, end, &walk_info);
+    }
+}
+
+/*
+ * This function replays the guest pasid bindings by walking the two level
+ * guest PASID table. For each valid pasid entry, it creates an entry
+ * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
+ * info specific to a pasid
+ */
+void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
+{
+    int start = PASID_0, end = 1 << s->pasid;
+    VTDHostIOMMUDevice *vtd_hiod;
+    GHashTableIter as_it;
+
+    if (!s->fsts) {
+        return;
+    }
+
+    /*
+     * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
+     * iommu pasid
+     */
+    if (pc_info->pasid == PCI_NO_PASID) {
+        pc_info->pasid = PASID_0;
+    }
+
+    switch (pc_info->type) {
+    case VTD_INV_DESC_PASIDC_G_PASID_SI:
+        start = pc_info->pasid;
+        end = pc_info->pasid + 1;
+        /* fall through */
+    case VTD_INV_DESC_PASIDC_G_DSI:
+        /*
+         * loop all assigned devices, do domain id check in
+         * vtd_sm_pasid_table_walk_one() after get pasid entry.
+         */
+        break;
+    case VTD_INV_DESC_PASIDC_G_GLOBAL:
+        /* loop all assigned devices */
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    /*
+     * In this replay, one only needs to care about the devices which are
+     * backed by host IOMMU. Those devices have a corresponding vtd_hiod
+     * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
+     * is not necessary to replay the bindings since their cache should be
+     * created in the future DMA address translation.
+     *
+     * VTD translation callback never accesses vtd_hiod and its corresponding
+     * cached pasid entry, so no iommu lock needed here.
+     */
+    g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
+    while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
+        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
+                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
+            continue;
+        }
+        vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
+    }
+}
+
 static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
                                           void *caps, uint32_t size)
 {
-- 
2.47.3
Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Yi Liu 3 weeks, 1 day ago
On 3/6/26 11:44, Zhenzhong Duan wrote:
> Structure VTDAddressSpace includes some elements suitable for emulated
> device and passthrough device without PASID, e.g., address space,
> different memory regions, etc, it is also protected by vtd iommu lock,
> all these are useless and become a burden for passthrough device with
> PASID.
> 
> When there are lots of PASIDs used in one device, the AS and MRs are
> all registered to memory core and impact the whole system performance.
> 
> So instead of using VTDAddressSpace to cache pasid entry for each pasid
> of a passthrough device, we define a light weight structure
> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
> will use this struct as a parameter to conduct binding/unbinding to
> nested hwpt and to record the current binded nested hwpt. It's also

s/binded/bound/

> designed to support PASID_0.
> 
> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
> (pasid cache invalidation) request, walk through each pasid in each
> passthrough device for valid pasid entries, create a new
> VTDACCELPASIDCacheEntry if not existing yet.

I think some tweak is preferred w.r.t. this and the next patch.

In this patch you only need to handle the PASID entry addition. Hence
you assume no existing VTDACCELPASIDCacheEntry yet.

> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_accel.h    |  13 +++
>   hw/i386/intel_iommu_internal.h |   8 ++
>   hw/i386/intel_iommu.c          |   3 +
>   hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>   4 files changed, 194 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index e5f0b077b4..a77fd06fe0 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -12,6 +12,13 @@
>   #define HW_I386_INTEL_IOMMU_ACCEL_H
>   #include CONFIG_DEVICES
>   
> +typedef struct VTDACCELPASIDCacheEntry {
> +    VTDHostIOMMUDevice *vtd_hiod;
> +    VTDPASIDEntry pe;
> +    uint32_t pasid;
> +    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
> +} VTDACCELPASIDCacheEntry;

btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
> +
>   #ifdef CONFIG_VTD_ACCEL
>   bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
>                             Error **errp);
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>   void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>                                         uint32_t pasid, hwaddr addr,
>                                         uint64_t npages, bool ih);
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
>   void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>   #else
>   static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> @@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>   {
>   }
>   
> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
> +                                              VTDPASIDCacheInfo *pc_info)
> +{
> +}
> +
>   static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>   {
>   }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index c7e107fe87..ede4db6d2d 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>   
>   #define PASID_0                             0
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>   #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>   #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
>   #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>   #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>   #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
>   #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>       PCIBus *bus;
>       uint8_t devfn;
>       HostIOMMUDevice *hiod;
> +    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>   } VTDHostIOMMUDevice;
>   
>   /*
> @@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>       return memcmp(p1, p2, sizeof(*p1));
>   }
>   
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
>   int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>                                     VTDPASIDDirEntry *pdire);
>   int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 744b5967b2..984adc639a 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
>       g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
>                            pc_info);
>       vtd_iommu_unlock(s);
> +
> +    vtd_pasid_cache_sync_accel(s, pc_info);
>   }
>   
>   static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>       vtd_hiod->devfn = (uint8_t)devfn;
>       vtd_hiod->iommu_state = s;
>       vtd_hiod->hiod = hiod;
> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>   
>       if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>           g_free(vtd_hiod);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index c2757f3bcd..0acf3ae77f 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>                            vtd_flush_host_piotlb_locked, &piotlb_info);
>   }
>   
> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> +                            VTDPASIDEntry *pe)
> +{

then you can name this as vtd_accel_fill_pc(). And I think you would 
have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.

> +    VTDACCELPASIDCacheEntry *vtd_pce;
> +
> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
> +        if (vtd_pce->pasid == pasid) {
> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
> +                vtd_pce->pe = *pe;
> +            }
> +            return;
> +        }
> +    }

hence this loop can be avoided.

> +    vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
> +    vtd_pce->vtd_hiod = vtd_hiod;
> +    vtd_pce->pasid = pasid;
> +    vtd_pce->pe = *pe;
> +    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> +}
> +
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then create
> + * VTDACCELPASIDCacheEntry if not exist yet.
> + */
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
> +                                        dma_addr_t pt_base,
> +                                        int start,
> +                                        int end,
> +                                        VTDPASIDCacheInfo *info)
> +{
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
> +    VTDPASIDEntry pe;
> +    int pasid;
> +
> +    for (pasid = start; pasid < end; pasid++) {
> +        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
> +            !vtd_pe_present(&pe)) {
> +            continue;
> +        }
> +
> +        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
> +             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
> +            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> +            /*
> +             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> +             * requires domain id check. If domain id check fail,
> +             * go to next pasid.
> +             */
> +            continue;
> +        }
> +
> +        vtd_find_add_pc(vtd_hiod, pasid, &pe);
> +    }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
> +                                    dma_addr_t pdt_base,
> +                                    int start, int end,
> +                                    VTDPASIDCacheInfo *info)
> +{
> +    VTDPASIDDirEntry pdire;
> +    int pasid = start;
> +    int pasid_next;
> +    dma_addr_t pt_base;
> +
> +    while (pasid < end) {
> +        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
> +                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
> +        pasid_next = pasid_next < end ? pasid_next : end;
> +
> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> +            && vtd_pdire_present(&pdire)) {
> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> +            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
> +                                        info);
> +        }
> +        pasid = pasid_next;
> +    }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
> +                                          int start, int end,
> +                                          VTDPASIDCacheInfo *pc_info)
> +{
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
> +    VTDContextEntry ce;
> +    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
> +
> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> +                                  vtd_hiod->devfn, &ce)) {
> +        VTDPASIDCacheInfo walk_info = *pc_info;
> +        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
> +                                VTD_PASID_TABLE_ENTRY_NUM;
> +
> +        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
> +
> +        vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
> +                                start, end, &walk_info);
> +    }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it creates an entry
> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
> + * info specific to a pasid
> + */
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> +    int start = PASID_0, end = 1 << s->pasid;
> +    VTDHostIOMMUDevice *vtd_hiod;
> +    GHashTableIter as_it;

s/as_it/hiod_it/

> +
> +    if (!s->fsts) {
> +        return;
> +    }
> +
> +    /*
> +     * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
> +     * iommu pasid
> +     */
> +    if (pc_info->pasid == PCI_NO_PASID) {
> +        pc_info->pasid = PASID_0;
> +    }
> +
> +    switch (pc_info->type) {
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +        start = pc_info->pasid;
> +        end = pc_info->pasid + 1;
> +        /* fall through */
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        /*
> +         * loop all assigned devices, do domain id check in
> +         * vtd_sm_pasid_table_walk_one() after get pasid entry.
> +         */
> +        break;
> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:
> +        /* loop all assigned devices */
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    /*
> +     * In this replay, one only needs to care about the devices which are
> +     * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> +     * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> +     * is not necessary to replay the bindings since their cache should be
> +     * created in the future DMA address translation.
> +     *
> +     * VTD translation callback never accesses vtd_hiod and its corresponding
> +     * cached pasid entry, so no iommu lock needed here.
> +     */
> +    g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> +    while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> +        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> +                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> +            continue;
> +        }
> +        vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> +    }
> +}
> +
>   static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
>                                             void *caps, uint32_t size)
>   {
RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Duan, Zhenzhong 2 weeks, 5 days ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/6/26 11:44, Zhenzhong Duan wrote:
>> Structure VTDAddressSpace includes some elements suitable for emulated
>> device and passthrough device without PASID, e.g., address space,
>> different memory regions, etc, it is also protected by vtd iommu lock,
>> all these are useless and become a burden for passthrough device with
>> PASID.
>>
>> When there are lots of PASIDs used in one device, the AS and MRs are
>> all registered to memory core and impact the whole system performance.
>>
>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>> of a passthrough device, we define a light weight structure
>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>> will use this struct as a parameter to conduct binding/unbinding to
>> nested hwpt and to record the current binded nested hwpt. It's also
>
>s/binded/bound/

OK.

>
>> designed to support PASID_0.
>>
>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>> (pasid cache invalidation) request, walk through each pasid in each
>> passthrough device for valid pasid entries, create a new
>> VTDACCELPASIDCacheEntry if not existing yet.
>
>I think some tweak is preferred w.r.t. this and the next patch.
>
>In this patch you only need to handle the PASID entry addition. Hence
>you assume no existing VTDACCELPASIDCacheEntry yet.

[...]

>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/i386/intel_iommu_accel.h    |  13 +++
>>   hw/i386/intel_iommu_internal.h |   8 ++
>>   hw/i386/intel_iommu.c          |   3 +
>>   hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>>   4 files changed, 194 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index e5f0b077b4..a77fd06fe0 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -12,6 +12,13 @@
>>   #define HW_I386_INTEL_IOMMU_ACCEL_H
>>   #include CONFIG_DEVICES
>>
>> +typedef struct VTDACCELPASIDCacheEntry {
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    VTDPASIDEntry pe;
>> +    uint32_t pasid;
>> +    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>> +} VTDACCELPASIDCacheEntry;
>
>btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)

Sure.

>> +
>>   #ifdef CONFIG_VTD_ACCEL
>>   bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hiod,
>>                             Error **errp);
>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>*vtd_as, Error **errp);
>>   void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>domain_id,
>>                                         uint32_t pasid, hwaddr addr,
>>                                         uint64_t npages, bool ih);
>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info);
>>   void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>   #else
>>   static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>> @@ -49,6 +57,11 @@ static inline void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>   {
>>   }
>>
>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>> +                                              VTDPASIDCacheInfo *pc_info)
>> +{
>> +}
>> +
>>   static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>   {
>>   }
>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>> index c7e107fe87..ede4db6d2d 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>   #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>
>>   #define PASID_0                             0
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>>   #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>>   #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) &
>VTD_PASID_DIR_BITS_MASK)
>>   #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
>> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>>   #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>   #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>>   #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable
>*/
>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>>       PCIBus *bus;
>>       uint8_t devfn;
>>       HostIOMMUDevice *hiod;
>> +    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>>   } VTDHostIOMMUDevice;
>>
>>   /*
>> @@ -768,6 +771,11 @@ static inline int
>vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>       return memcmp(p1, p2, sizeof(*p1));
>>   }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>> +{
>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>> +}
>> +
>>   int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>>                                     VTDPASIDDirEntry *pdire);
>>   int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 744b5967b2..984adc639a 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState
>*s, VTDPASIDCacheInfo *pc_info)
>>       g_hash_table_foreach(s->vtd_address_spaces,
>vtd_pasid_cache_sync_locked,
>>                            pc_info);
>>       vtd_iommu_unlock(s);
>> +
>> +    vtd_pasid_cache_sync_accel(s, pc_info);
>>   }
>>
>>   static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus,
>void *opaque, int devfn,
>>       vtd_hiod->devfn = (uint8_t)devfn;
>>       vtd_hiod->iommu_state = s;
>>       vtd_hiod->hiod = hiod;
>> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>
>>       if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>           g_free(vtd_hiod);
>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>> index c2757f3bcd..0acf3ae77f 100644
>> --- a/hw/i386/intel_iommu_accel.c
>> +++ b/hw/i386/intel_iommu_accel.c
>> @@ -257,6 +257,176 @@ void
>vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>                            vtd_flush_host_piotlb_locked, &piotlb_info);
>>   }
>>
>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>> +                            VTDPASIDEntry *pe)
>> +{
>
>then you can name this as vtd_accel_fill_pc(). And I think you would
>have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.

Yes, in current implementation this patch handles pasid entry addition and update,
next patch handles removal.
What's the benefit if we move pasid entry update into next patch?

>
>> +    VTDACCELPASIDCacheEntry *vtd_pce;
>> +
>> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>> +        if (vtd_pce->pasid == pasid) {
>> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>> +                vtd_pce->pe = *pe;
>> +            }
>> +            return;
>> +        }
>> +    }
>
>hence this loop can be avoided.

Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
to bypass existing pasid entry to avoid adding duplicate pasid entry.
Let me know if I misunderstand what you mean.

>
>> +    vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
>> +    vtd_pce->vtd_hiod = vtd_hiod;
>> +    vtd_pce->pasid = pasid;
>> +    vtd_pce->pe = *pe;
>> +    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
>> +}
>> +
>> +/*
>> + * This function walks over PASID range within [start, end) in a single
>> + * PASID table for entries matching @info type/did, then create
>> + * VTDACCELPASIDCacheEntry if not exist yet.
>> + */
>> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
>> +                                        dma_addr_t pt_base,
>> +                                        int start,
>> +                                        int end,
>> +                                        VTDPASIDCacheInfo *info)
>> +{
>> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
>> +    VTDPASIDEntry pe;
>> +    int pasid;
>> +
>> +    for (pasid = start; pasid < end; pasid++) {
>> +        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
>> +            !vtd_pe_present(&pe)) {
>> +            continue;
>> +        }
>> +
>> +        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
>> +             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
>> +            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>> +            /*
>> +             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
>> +             * requires domain id check. If domain id check fail,
>> +             * go to next pasid.
>> +             */
>> +            continue;
>> +        }
>> +
>> +        vtd_find_add_pc(vtd_hiod, pasid, &pe);
>> +    }
>> +}
>> +
>> +/*
>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>> + * This function aims at looping over a range of PASIDs in the given
>> + * two level table to identify the pasid config in guest.
>> + */
>> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
>> +                                    dma_addr_t pdt_base,
>> +                                    int start, int end,
>> +                                    VTDPASIDCacheInfo *info)
>> +{
>> +    VTDPASIDDirEntry pdire;
>> +    int pasid = start;
>> +    int pasid_next;
>> +    dma_addr_t pt_base;
>> +
>> +    while (pasid < end) {
>> +        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
>> +                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
>> +        pasid_next = pasid_next < end ? pasid_next : end;
>> +
>> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>> +            && vtd_pdire_present(&pdire)) {
>> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
>> +            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
>> +                                        info);
>> +        }
>> +        pasid = pasid_next;
>> +    }
>> +}
>> +
>> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
>> +                                          int start, int end,
>> +                                          VTDPASIDCacheInfo *pc_info)
>> +{
>> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
>> +    VTDContextEntry ce;
>> +    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
>> +
>> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
>> +                                  vtd_hiod->devfn, &ce)) {
>> +        VTDPASIDCacheInfo walk_info = *pc_info;
>> +        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>> +                                VTD_PASID_TABLE_ENTRY_NUM;
>> +
>> +        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
>> +
>> +        vtd_sm_pasid_table_walk(vtd_hiod,
>VTD_CE_GET_PASID_DIR_TABLE(&ce),
>> +                                start, end, &walk_info);
>> +    }
>> +}
>> +
>> +/*
>> + * This function replays the guest pasid bindings by walking the two level
>> + * guest PASID table. For each valid pasid entry, it creates an entry
>> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
>> + * info specific to a pasid
>> + */
>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>*pc_info)
>> +{
>> +    int start = PASID_0, end = 1 << s->pasid;
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    GHashTableIter as_it;
>
>s/as_it/hiod_it/

Sure.

Thanks
Zhenzhong

>
>> +
>> +    if (!s->fsts) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry
>honors
>> +     * iommu pasid
>> +     */
>> +    if (pc_info->pasid == PCI_NO_PASID) {
>> +        pc_info->pasid = PASID_0;
>> +    }
>> +
>> +    switch (pc_info->type) {
>> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
>> +        start = pc_info->pasid;
>> +        end = pc_info->pasid + 1;
>> +        /* fall through */
>> +    case VTD_INV_DESC_PASIDC_G_DSI:
>> +        /*
>> +         * loop all assigned devices, do domain id check in
>> +         * vtd_sm_pasid_table_walk_one() after get pasid entry.
>> +         */
>> +        break;
>> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:
>> +        /* loop all assigned devices */
>> +        break;
>> +    default:
>> +        g_assert_not_reached();
>> +    }
>> +
>> +    /*
>> +     * In this replay, one only needs to care about the devices which are
>> +     * backed by host IOMMU. Those devices have a corresponding vtd_hiod
>> +     * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
>> +     * is not necessary to replay the bindings since their cache should be
>> +     * created in the future DMA address translation.
>> +     *
>> +     * VTD translation callback never accesses vtd_hiod and its corresponding
>> +     * cached pasid entry, so no iommu lock needed here.
>> +     */
>> +    g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
>> +    while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
>> +        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
>> +                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
>> +            continue;
>> +        }
>> +        vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
>> +    }
>> +}
>> +
>>   static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
>>                                             void *caps, uint32_t size)
>>   {

Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Yi Liu 2 weeks, 5 days ago
On 3/23/26 13:50, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>> pc_inv_dsc request
>>
>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>> device and passthrough device without PASID, e.g., address space,
>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>> all these are useless and become a burden for passthrough device with
>>> PASID.
>>>
>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>> all registered to memory core and impact the whole system performance.
>>>
>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>> of a passthrough device, we define a light weight structure
>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>> will use this struct as a parameter to conduct binding/unbinding to
>>> nested hwpt and to record the current binded nested hwpt. It's also
>>
>> s/binded/bound/
> 
> OK.
> 
>>
>>> designed to support PASID_0.
>>>
>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>> (pasid cache invalidation) request, walk through each pasid in each
>>> passthrough device for valid pasid entries, create a new
>>> VTDACCELPASIDCacheEntry if not existing yet.
>>
>> I think some tweak is preferred w.r.t. this and the next patch.
>>
>> In this patch you only need to handle the PASID entry addition. Hence
>> you assume no existing VTDACCELPASIDCacheEntry yet.
> 
> [...]
> 
>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_accel.h    |  13 +++
>>>    hw/i386/intel_iommu_internal.h |   8 ++
>>>    hw/i386/intel_iommu.c          |   3 +
>>>    hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>>>    4 files changed, 194 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>> index e5f0b077b4..a77fd06fe0 100644
>>> --- a/hw/i386/intel_iommu_accel.h
>>> +++ b/hw/i386/intel_iommu_accel.h
>>> @@ -12,6 +12,13 @@
>>>    #define HW_I386_INTEL_IOMMU_ACCEL_H
>>>    #include CONFIG_DEVICES
>>>
>>> +typedef struct VTDACCELPASIDCacheEntry {
>>> +    VTDHostIOMMUDevice *vtd_hiod;
>>> +    VTDPASIDEntry pe;
>>> +    uint32_t pasid;
>>> +    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>>> +} VTDACCELPASIDCacheEntry;
>>
>> btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
> 
> Sure.
> 
>>> +
>>>    #ifdef CONFIG_VTD_ACCEL
>>>    bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>> *vtd_hiod,
>>>                              Error **errp);
>>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>> *vtd_as, Error **errp);
>>>    void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>> domain_id,
>>>                                          uint32_t pasid, hwaddr addr,
>>>                                          uint64_t npages, bool ih);
>>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>> *pc_info);
>>>    void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>>    #else
>>>    static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>> @@ -49,6 +57,11 @@ static inline void
>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>>    {
>>>    }
>>>
>>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>>> +                                              VTDPASIDCacheInfo *pc_info)
>>> +{
>>> +}
>>> +
>>>    static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>>    {
>>>    }
>>> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
>>> index c7e107fe87..ede4db6d2d 100644
>>> --- a/hw/i386/intel_iommu_internal.h
>>> +++ b/hw/i386/intel_iommu_internal.h
>>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>    #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>>
>>>    #define PASID_0                             0
>>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>> ~VTD_HAW_MASK(aw))
>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>>    #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>>>    #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>>>    #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) &
>> VTD_PASID_DIR_BITS_MASK)
>>>    #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
>>> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>>>    #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>>    #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>> VTD_PASID_TABLE_BITS_MASK)
>>>    #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable
>> */
>>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>>>        PCIBus *bus;
>>>        uint8_t devfn;
>>>        HostIOMMUDevice *hiod;
>>> +    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>>>    } VTDHostIOMMUDevice;
>>>
>>>    /*
>>> @@ -768,6 +771,11 @@ static inline int
>> vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>>        return memcmp(p1, p2, sizeof(*p1));
>>>    }
>>>
>>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>>> +{
>>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>>> +}
>>> +
>>>    int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>>>                                      VTDPASIDDirEntry *pdire);
>>>    int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index 744b5967b2..984adc639a 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState
>> *s, VTDPASIDCacheInfo *pc_info)
>>>        g_hash_table_foreach(s->vtd_address_spaces,
>> vtd_pasid_cache_sync_locked,
>>>                             pc_info);
>>>        vtd_iommu_unlock(s);
>>> +
>>> +    vtd_pasid_cache_sync_accel(s, pc_info);
>>>    }
>>>
>>>    static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus,
>> void *opaque, int devfn,
>>>        vtd_hiod->devfn = (uint8_t)devfn;
>>>        vtd_hiod->iommu_state = s;
>>>        vtd_hiod->hiod = hiod;
>>> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>>
>>>        if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>>            g_free(vtd_hiod);
>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>> index c2757f3bcd..0acf3ae77f 100644
>>> --- a/hw/i386/intel_iommu_accel.c
>>> +++ b/hw/i386/intel_iommu_accel.c
>>> @@ -257,6 +257,176 @@ void
>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>>                             vtd_flush_host_piotlb_locked, &piotlb_info);
>>>    }
>>>
>>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
>>> +                            VTDPASIDEntry *pe)
>>> +{
>>
>> then you can name this as vtd_accel_fill_pc(). And I think you would
>> have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
> 
> Yes, in current implementation this patch handles pasid entry addition and update,
> next patch handles removal.
> What's the benefit if we move pasid entry update into next patch?

If no redundant flush, for a newly created pasid entry, you need not to
loop the pasid cache entry list. But, it's possible to have such guest. :)

>>
>>> +    VTDACCELPASIDCacheEntry *vtd_pce;
>>> +
>>> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>>> +        if (vtd_pce->pasid == pasid) {
>>> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>>> +                vtd_pce->pe = *pe;
>>> +            }
>>> +            return;
>>> +        }
>>> +    }
>>
>> hence this loop can be avoided.
> 
> Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
> to bypass existing pasid entry to avoid adding duplicate pasid entry.
> Let me know if I misunderstand what you mean.

Although I have an idea to handle it. But it relies on some kind of flag
returned by the removal/update processing path to tell no need to re-
create pasid cache entry in the vtd_replay_pasid_bind_for_dev() path. I
don't think we need to make things so complicated. So let's follow the
current splitting. But I think you still can name this helper as
vtd_accel_fill_pc(). "find" sometimes means return something back.
RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Duan, Zhenzhong 2 weeks, 5 days ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/23/26 13:50, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>>> pc_inv_dsc request
>>>
>>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>>> device and passthrough device without PASID, e.g., address space,
>>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>>> all these are useless and become a burden for passthrough device with
>>>> PASID.
>>>>
>>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>>> all registered to memory core and impact the whole system performance.
>>>>
>>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>>> of a passthrough device, we define a light weight structure
>>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>>> will use this struct as a parameter to conduct binding/unbinding to
>>>> nested hwpt and to record the current binded nested hwpt. It's also
>>>
>>> s/binded/bound/
>>
>> OK.
>>
>>>
>>>> designed to support PASID_0.
>>>>
>>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>>> (pasid cache invalidation) request, walk through each pasid in each
>>>> passthrough device for valid pasid entries, create a new
>>>> VTDACCELPASIDCacheEntry if not existing yet.
>>>
>>> I think some tweak is preferred w.r.t. this and the next patch.
>>>
>>> In this patch you only need to handle the PASID entry addition. Hence
>>> you assume no existing VTDACCELPASIDCacheEntry yet.
>>
>> [...]
>>
>>>
>>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>>> ---
>>>>    hw/i386/intel_iommu_accel.h    |  13 +++
>>>>    hw/i386/intel_iommu_internal.h |   8 ++
>>>>    hw/i386/intel_iommu.c          |   3 +
>>>>    hw/i386/intel_iommu_accel.c    | 170
>+++++++++++++++++++++++++++++++++
>>>>    4 files changed, 194 insertions(+)
>>>>
>>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>>> index e5f0b077b4..a77fd06fe0 100644
>>>> --- a/hw/i386/intel_iommu_accel.h
>>>> +++ b/hw/i386/intel_iommu_accel.h
>>>> @@ -12,6 +12,13 @@
>>>>    #define HW_I386_INTEL_IOMMU_ACCEL_H
>>>>    #include CONFIG_DEVICES
>>>>
>>>> +typedef struct VTDACCELPASIDCacheEntry {
>>>> +    VTDHostIOMMUDevice *vtd_hiod;
>>>> +    VTDPASIDEntry pe;
>>>> +    uint32_t pasid;
>>>> +    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
>>>> +} VTDACCELPASIDCacheEntry;
>>>
>>> btw. s/VTDACCELPASIDCacheEntry/VTDAccelPASIDCacheEntry/ looks better. :)
>>
>> Sure.
>>
>>>> +
>>>>    #ifdef CONFIG_VTD_ACCEL
>>>>    bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice
>>> *vtd_hiod,
>>>>                              Error **errp);
>>>> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace
>>> *vtd_as, Error **errp);
>>>>    void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t
>>> domain_id,
>>>>                                          uint32_t pasid, hwaddr addr,
>>>>                                          uint64_t npages, bool ih);
>>>> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo
>>> *pc_info);
>>>>    void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>>>>    #else
>>>>    static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
>>>> @@ -49,6 +57,11 @@ static inline void
>>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>>>>    {
>>>>    }
>>>>
>>>> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
>>>> +                                              VTDPASIDCacheInfo *pc_info)
>>>> +{
>>>> +}
>>>> +
>>>>    static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>>>>    {
>>>>    }
>>>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>>>> index c7e107fe87..ede4db6d2d 100644
>>>> --- a/hw/i386/intel_iommu_internal.h
>>>> +++ b/hw/i386/intel_iommu_internal.h
>>>> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>>>>    #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>>>>
>>>>    #define PASID_0                             0
>>>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL |
>>> ~VTD_HAW_MASK(aw))
>>>>    #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>>>>    #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
>>>> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>>>>    #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>>>>    #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) &
>>> VTD_PASID_DIR_BITS_MASK)
>>>>    #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable
>*/
>>>> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>>>>    #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>>>>    #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) &
>>> VTD_PASID_TABLE_BITS_MASK)
>>>>    #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing
>Disable
>>> */
>>>> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>>>>        PCIBus *bus;
>>>>        uint8_t devfn;
>>>>        HostIOMMUDevice *hiod;
>>>> +    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>>>>    } VTDHostIOMMUDevice;
>>>>
>>>>    /*
>>>> @@ -768,6 +771,11 @@ static inline int
>>> vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>>>>        return memcmp(p1, p2, sizeof(*p1));
>>>>    }
>>>>
>>>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
>>>> +{
>>>> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>>>> +}
>>>> +
>>>>    int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t
>pasid,
>>>>                                      VTDPASIDDirEntry *pdire);
>>>>    int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>>> index 744b5967b2..984adc639a 100644
>>>> --- a/hw/i386/intel_iommu.c
>>>> +++ b/hw/i386/intel_iommu.c
>>>> @@ -3202,6 +3202,8 @@ static void
>vtd_pasid_cache_sync(IntelIOMMUState
>>> *s, VTDPASIDCacheInfo *pc_info)
>>>>        g_hash_table_foreach(s->vtd_address_spaces,
>>> vtd_pasid_cache_sync_locked,
>>>>                             pc_info);
>>>>        vtd_iommu_unlock(s);
>>>> +
>>>> +    vtd_pasid_cache_sync_accel(s, pc_info);
>>>>    }
>>>>
>>>>    static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
>>>> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus
>*bus,
>>> void *opaque, int devfn,
>>>>        vtd_hiod->devfn = (uint8_t)devfn;
>>>>        vtd_hiod->iommu_state = s;
>>>>        vtd_hiod->hiod = hiod;
>>>> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>>>>
>>>>        if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>>>>            g_free(vtd_hiod);
>>>> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
>>>> index c2757f3bcd..0acf3ae77f 100644
>>>> --- a/hw/i386/intel_iommu_accel.c
>>>> +++ b/hw/i386/intel_iommu_accel.c
>>>> @@ -257,6 +257,176 @@ void
>>> vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>>>>                             vtd_flush_host_piotlb_locked, &piotlb_info);
>>>>    }
>>>>
>>>> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t
>pasid,
>>>> +                            VTDPASIDEntry *pe)
>>>> +{
>>>
>>> then you can name this as vtd_accel_fill_pc(). And I think you would
>>> have vtd_accel_update_pc() and vtd_accel_delete_pc() in the next patch.
>>
>> Yes, in current implementation this patch handles pasid entry addition and
>update,
>> next patch handles removal.
>> What's the benefit if we move pasid entry update into next patch?
>
>If no redundant flush, for a newly created pasid entry, you need not to
>loop the pasid cache entry list. But, it's possible to have such guest. :)
>
>>>
>>>> +    VTDACCELPASIDCacheEntry *vtd_pce;
>>>> +
>>>> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
>>>> +        if (vtd_pce->pasid == pasid) {
>>>> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
>>>> +                vtd_pce->pe = *pe;
>>>> +            }
>>>> +            return;
>>>> +        }
>>>> +    }
>>>
>>> hence this loop can be avoided.
>>
>> Hm, I think guest may send redundant pv_inv_dsc, so we need this loop
>> to bypass existing pasid entry to avoid adding duplicate pasid entry.
>> Let me know if I misunderstand what you mean.
>
>Although I have an idea to handle it. But it relies on some kind of flag
>returned by the removal/update processing path to tell no need to re-
>create pasid cache entry in the vtd_replay_pasid_bind_for_dev() path. I
>don't think we need to make things so complicated. So let's follow the
>current splitting. But I think you still can name this helper as
>vtd_accel_fill_pc(). "find" sometimes means return something back.

Got it, will do.

Thanks
Zhenzhong
Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Yi Liu 3 weeks, 3 days ago
On 3/6/26 11:44, Zhenzhong Duan wrote:
> Structure VTDAddressSpace includes some elements suitable for emulated
> device and passthrough device without PASID, e.g., address space,
> different memory regions, etc, it is also protected by vtd iommu lock,
> all these are useless and become a burden for passthrough device with
> PASID.
> 
> When there are lots of PASIDs used in one device, the AS and MRs are
> all registered to memory core and impact the whole system performance.
> 
> So instead of using VTDAddressSpace to cache pasid entry for each pasid
> of a passthrough device, we define a light weight structure
> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
> will use this struct as a parameter to conduct binding/unbinding to
> nested hwpt and to record the current binded nested hwpt. It's also
> designed to support PASID_0.

PASID_0 of passthrough device still need to register MRs in case guest
does not operate in scalable mode. So for PASID_0, you will have both
VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?

> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
> (pasid cache invalidation) request, walk through each pasid in each
> passthrough device for valid pasid entries, create a new
> VTDACCELPASIDCacheEntry if not existing yet.

I think this idea is ok, but we need to be clear about the
boundary between the usages of the two pasid cache structures.

> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>   hw/i386/intel_iommu_accel.h    |  13 +++
>   hw/i386/intel_iommu_internal.h |   8 ++
>   hw/i386/intel_iommu.c          |   3 +
>   hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>   4 files changed, 194 insertions(+)
> 
> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
> index e5f0b077b4..a77fd06fe0 100644
> --- a/hw/i386/intel_iommu_accel.h
> +++ b/hw/i386/intel_iommu_accel.h
> @@ -12,6 +12,13 @@
>   #define HW_I386_INTEL_IOMMU_ACCEL_H
>   #include CONFIG_DEVICES
>   
> +typedef struct VTDACCELPASIDCacheEntry {
> +    VTDHostIOMMUDevice *vtd_hiod;
> +    VTDPASIDEntry pe;

let's use pasid_entry here as we already used it in the 
VTDPASIDCacheEntry. Sometimees, it may help when for searching related
code.

> +    uint32_t pasid;
> +    QLIST_ENTRY(VTDACCELPASIDCacheEntry) next;
> +} VTDACCELPASIDCacheEntry;
> +
>   #ifdef CONFIG_VTD_ACCEL
>   bool vtd_check_hiod_accel(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hiod,
>                             Error **errp);
> @@ -20,6 +27,7 @@ bool vtd_propagate_guest_pasid(VTDAddressSpace *vtd_as, Error **errp);
>   void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>                                         uint32_t pasid, hwaddr addr,
>                                         uint64_t npages, bool ih);
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info);
>   void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops);
>   #else
>   static inline bool vtd_check_hiod_accel(IntelIOMMUState *s,
> @@ -49,6 +57,11 @@ static inline void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s,
>   {
>   }
>   
> +static inline void vtd_pasid_cache_sync_accel(IntelIOMMUState *s,
> +                                              VTDPASIDCacheInfo *pc_info)
> +{
> +}
> +
>   static inline void vtd_iommu_ops_update_accel(PCIIOMMUOps *ops)
>   {
>   }
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index c7e107fe87..ede4db6d2d 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -616,6 +616,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>   #define VTD_CTX_ENTRY_SCALABLE_SIZE   32
>   
>   #define PASID_0                             0
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x)        extract64((x)->val[0], 9, 3)
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw)  (0x1e0ULL | ~VTD_HAW_MASK(aw))
>   #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1      0xffffffffffe00000ULL
>   #define VTD_SM_CONTEXT_ENTRY_PRE            0x10ULL
> @@ -646,6 +647,7 @@ typedef struct VTDPIOTLBInvInfo {
>   #define VTD_PASID_DIR_BITS_MASK       (0x3fffULL)
>   #define VTD_PASID_DIR_INDEX(pasid)    (((pasid) >> 6) & VTD_PASID_DIR_BITS_MASK)
>   #define VTD_PASID_DIR_FPD             (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TABLE_ENTRY_NUM     (1ULL << 6)
>   #define VTD_PASID_TABLE_BITS_MASK     (0x3fULL)
>   #define VTD_PASID_TABLE_INDEX(pasid)  ((pasid) & VTD_PASID_TABLE_BITS_MASK)
>   #define VTD_PASID_ENTRY_FPD           (1ULL << 1) /* Fault Processing Disable */
> @@ -711,6 +713,7 @@ typedef struct VTDHostIOMMUDevice {
>       PCIBus *bus;
>       uint8_t devfn;
>       HostIOMMUDevice *hiod;
> +    QLIST_HEAD(, VTDACCELPASIDCacheEntry) pasid_cache_list;
>   } VTDHostIOMMUDevice;
>   
>   /*
> @@ -768,6 +771,11 @@ static inline int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
>       return memcmp(p1, p2, sizeof(*p1));
>   }
>   
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> +    return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
>   int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, uint32_t pasid,
>                                     VTDPASIDDirEntry *pdire);
>   int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint32_t pasid,
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 744b5967b2..984adc639a 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3202,6 +3202,8 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
>       g_hash_table_foreach(s->vtd_address_spaces, vtd_pasid_cache_sync_locked,
>                            pc_info);
>       vtd_iommu_unlock(s);
> +
> +    vtd_pasid_cache_sync_accel(s, pc_info);
>   }
>   
>   static void vtd_replay_pasid_bindings_all(IntelIOMMUState *s)
> @@ -4760,6 +4762,7 @@ static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>       vtd_hiod->devfn = (uint8_t)devfn;
>       vtd_hiod->iommu_state = s;
>       vtd_hiod->hiod = hiod;
> +    QLIST_INIT(&vtd_hiod->pasid_cache_list);
>   
>       if (!vtd_check_hiod(s, vtd_hiod, errp)) {
>           g_free(vtd_hiod);
> diff --git a/hw/i386/intel_iommu_accel.c b/hw/i386/intel_iommu_accel.c
> index c2757f3bcd..0acf3ae77f 100644
> --- a/hw/i386/intel_iommu_accel.c
> +++ b/hw/i386/intel_iommu_accel.c
> @@ -257,6 +257,176 @@ void vtd_flush_host_piotlb_all_locked(IntelIOMMUState *s, uint16_t domain_id,
>                            vtd_flush_host_piotlb_locked, &piotlb_info);
>   }
>   
> +static void vtd_find_add_pc(VTDHostIOMMUDevice *vtd_hiod, uint32_t pasid,
> +                            VTDPASIDEntry *pe)
> +{
> +    VTDACCELPASIDCacheEntry *vtd_pce;
> +
> +    QLIST_FOREACH(vtd_pce, &vtd_hiod->pasid_cache_list, next) {
> +        if (vtd_pce->pasid == pasid) {
> +            if (vtd_pasid_entry_compare(pe, &vtd_pce->pe)) {
> +                vtd_pce->pe = *pe;
> +            }
> +            return;
> +        }
> +    }
> +
> +    vtd_pce = g_malloc0(sizeof(VTDACCELPASIDCacheEntry));
> +    vtd_pce->vtd_hiod = vtd_hiod;
> +    vtd_pce->pasid = pasid;
> +    vtd_pce->pe = *pe;
> +    QLIST_INSERT_HEAD(&vtd_hiod->pasid_cache_list, vtd_pce, next);
> +}
> +
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then create
> + * VTDACCELPASIDCacheEntry if not exist yet.
> + */
> +static void vtd_sm_pasid_table_walk_one(VTDHostIOMMUDevice *vtd_hiod,
> +                                        dma_addr_t pt_base,
> +                                        int start,
> +                                        int end,
> +                                        VTDPASIDCacheInfo *info)
> +{
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
> +    VTDPASIDEntry pe;
> +    int pasid;
> +
> +    for (pasid = start; pasid < end; pasid++) {
> +        if (vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe) ||
> +            !vtd_pe_present(&pe)) {
> +            continue;
> +        }
> +
> +        if ((info->type == VTD_INV_DESC_PASIDC_G_DSI ||
> +             info->type == VTD_INV_DESC_PASIDC_G_PASID_SI) &&
> +            (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> +            /*
> +             * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> +             * requires domain id check. If domain id check fail,
> +             * go to next pasid.
> +             */
> +            continue;
> +        }
> +
> +        vtd_find_add_pc(vtd_hiod, pasid, &pe);
> +    }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(VTDHostIOMMUDevice *vtd_hiod,
> +                                    dma_addr_t pdt_base,
> +                                    int start, int end,
> +                                    VTDPASIDCacheInfo *info)
> +{
> +    VTDPASIDDirEntry pdire;
> +    int pasid = start;
> +    int pasid_next;
> +    dma_addr_t pt_base;
> +
> +    while (pasid < end) {
> +        pasid_next = (pasid + VTD_PASID_TABLE_ENTRY_NUM) &
> +                     ~(VTD_PASID_TABLE_ENTRY_NUM - 1);
> +        pasid_next = pasid_next < end ? pasid_next : end;
> +
> +        if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> +            && vtd_pdire_present(&pdire)) {
> +            pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> +            vtd_sm_pasid_table_walk_one(vtd_hiod, pt_base, pasid, pasid_next,
> +                                        info);
> +        }
> +        pasid = pasid_next;
> +    }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(VTDHostIOMMUDevice *vtd_hiod,
> +                                          int start, int end,
> +                                          VTDPASIDCacheInfo *pc_info)
> +{
> +    IntelIOMMUState *s = vtd_hiod->iommu_state;
> +    VTDContextEntry ce;
> +    int dev_max_pasid = 1 << vtd_hiod->hiod->caps.max_pasid_log2;
> +
> +    if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_hiod->bus),
> +                                  vtd_hiod->devfn, &ce)) {
> +        VTDPASIDCacheInfo walk_info = *pc_info;
> +        uint32_t ce_max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
> +                                VTD_PASID_TABLE_ENTRY_NUM;
> +
> +        end = MIN(end, MIN(dev_max_pasid, ce_max_pasid));
> +
> +        vtd_sm_pasid_table_walk(vtd_hiod, VTD_CE_GET_PASID_DIR_TABLE(&ce),
> +                                start, end, &walk_info);
> +    }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it creates an entry
> + * VTDACCELPASIDCacheEntry dynamically if not exist yet. This entry holds
> + * info specific to a pasid
> + */
> +void vtd_pasid_cache_sync_accel(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> +{
> +    int start = PASID_0, end = 1 << s->pasid;
> +    VTDHostIOMMUDevice *vtd_hiod;
> +    GHashTableIter as_it;
> +
> +    if (!s->fsts) {
> +        return;
> +    }
> +
> +    /*
> +     * VTDPASIDCacheInfo honors PCI pasid but VTDACCELPASIDCacheEntry honors
> +     * iommu pasid
> +     */
> +    if (pc_info->pasid == PCI_NO_PASID) {
> +        pc_info->pasid = PASID_0;
> +    }
> +
> +    switch (pc_info->type) {
> +    case VTD_INV_DESC_PASIDC_G_PASID_SI:
> +        start = pc_info->pasid;
> +        end = pc_info->pasid + 1;
> +        /* fall through */
> +    case VTD_INV_DESC_PASIDC_G_DSI:
> +        /*
> +         * loop all assigned devices, do domain id check in
> +         * vtd_sm_pasid_table_walk_one() after get pasid entry.
> +         */
> +        break;
> +    case VTD_INV_DESC_PASIDC_G_GLOBAL:
> +        /* loop all assigned devices */
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    /*
> +     * In this replay, one only needs to care about the devices which are
> +     * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> +     * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> +     * is not necessary to replay the bindings since their cache should be
> +     * created in the future DMA address translation.
> +     *
> +     * VTD translation callback never accesses vtd_hiod and its corresponding
> +     * cached pasid entry, so no iommu lock needed here.
> +     */
> +    g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> +    while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> +        if (!object_dynamic_cast(OBJECT(vtd_hiod->hiod),
> +                                 TYPE_HOST_IOMMU_DEVICE_IOMMUFD)) {
> +            continue;
> +        }
> +        vtd_replay_pasid_bind_for_dev(vtd_hiod, start, end, pc_info);
> +    }
> +}
> +
>   static uint64_t vtd_get_host_iommu_quirks(uint32_t type,
>                                             void *caps, uint32_t size)
>   {
RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Duan, Zhenzhong 3 weeks, 2 days ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>On 3/6/26 11:44, Zhenzhong Duan wrote:
>> Structure VTDAddressSpace includes some elements suitable for emulated
>> device and passthrough device without PASID, e.g., address space,
>> different memory regions, etc, it is also protected by vtd iommu lock,
>> all these are useless and become a burden for passthrough device with
>> PASID.
>>
>> When there are lots of PASIDs used in one device, the AS and MRs are
>> all registered to memory core and impact the whole system performance.
>>
>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>> of a passthrough device, we define a light weight structure
>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>> will use this struct as a parameter to conduct binding/unbinding to
>> nested hwpt and to record the current binded nested hwpt. It's also
>> designed to support PASID_0.
>
>PASID_0 of passthrough device still need to register MRs in case guest
>does not operate in scalable mode. So for PASID_0, you will have both
>VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?

Exactly!

>
>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>> (pasid cache invalidation) request, walk through each pasid in each
>> passthrough device for valid pasid entries, create a new
>> VTDACCELPASIDCacheEntry if not existing yet.
>
>I think this idea is ok, but we need to be clear about the
>boundary between the usages of the two pasid cache structures.

Yes, the principle here is VTDACCELPASIDCacheEntry is only used in intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c          

>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>   hw/i386/intel_iommu_accel.h    |  13 +++
>>   hw/i386/intel_iommu_internal.h |   8 ++
>>   hw/i386/intel_iommu.c          |   3 +
>>   hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>>   4 files changed, 194 insertions(+)
>>
>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>> index e5f0b077b4..a77fd06fe0 100644
>> --- a/hw/i386/intel_iommu_accel.h
>> +++ b/hw/i386/intel_iommu_accel.h
>> @@ -12,6 +12,13 @@
>>   #define HW_I386_INTEL_IOMMU_ACCEL_H
>>   #include CONFIG_DEVICES
>>
>> +typedef struct VTDACCELPASIDCacheEntry {
>> +    VTDHostIOMMUDevice *vtd_hiod;
>> +    VTDPASIDEntry pe;
>
>let's use pasid_entry here as we already used it in the
>VTDPASIDCacheEntry. Sometimees, it may help when for searching related
>code.

Good suggestion, will do.

Thanks
Zhenzhong

Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Yi Liu 3 weeks, 1 day ago

On 3/19/26 16:26, Duan, Zhenzhong wrote:
> 
> 
>> -----Original Message-----
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>> pc_inv_dsc request
>>
>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>> device and passthrough device without PASID, e.g., address space,
>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>> all these are useless and become a burden for passthrough device with
>>> PASID.
>>>
>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>> all registered to memory core and impact the whole system performance.
>>>
>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>> of a passthrough device, we define a light weight structure
>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>> will use this struct as a parameter to conduct binding/unbinding to
>>> nested hwpt and to record the current binded nested hwpt. It's also
>>> designed to support PASID_0.
>>
>> PASID_0 of passthrough device still need to register MRs in case guest
>> does not operate in scalable mode. So for PASID_0, you will have both
>> VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
> 
> Exactly!

better to note it somewhere (commit message, code comment). It would be
a good hint for future maintenance. e.g. it is easy to forget this fact. :)

>>
>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>> (pasid cache invalidation) request, walk through each pasid in each
>>> passthrough device for valid pasid entries, create a new
>>> VTDACCELPASIDCacheEntry if not existing yet.
>>
>> I think this idea is ok, but we need to be clear about the
>> boundary between the usages of the two pasid cache structures.
> 
> Yes, the principle here is VTDACCELPASIDCacheEntry is only used in intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c

just mark it in commit message as well.

>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>    hw/i386/intel_iommu_accel.h    |  13 +++
>>>    hw/i386/intel_iommu_internal.h |   8 ++
>>>    hw/i386/intel_iommu.c          |   3 +
>>>    hw/i386/intel_iommu_accel.c    | 170 +++++++++++++++++++++++++++++++++
>>>    4 files changed, 194 insertions(+)
>>>
>>> diff --git a/hw/i386/intel_iommu_accel.h b/hw/i386/intel_iommu_accel.h
>>> index e5f0b077b4..a77fd06fe0 100644
>>> --- a/hw/i386/intel_iommu_accel.h
>>> +++ b/hw/i386/intel_iommu_accel.h
>>> @@ -12,6 +12,13 @@
>>>    #define HW_I386_INTEL_IOMMU_ACCEL_H
>>>    #include CONFIG_DEVICES
>>>
>>> +typedef struct VTDACCELPASIDCacheEntry {
>>> +    VTDHostIOMMUDevice *vtd_hiod;
>>> +    VTDPASIDEntry pe;
>>
>> let's use pasid_entry here as we already used it in the
>> VTDPASIDCacheEntry. Sometimees, it may help when for searching related
>> code.
> 
> Good suggestion, will do.
> 
> Thanks
> Zhenzhong
>
RE: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for pc_inv_dsc request
Posted by Duan, Zhenzhong 2 weeks, 5 days ago

>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>pc_inv_dsc request
>
>
>
>On 3/19/26 16:26, Duan, Zhenzhong wrote:
>>
>>
>>> -----Original Message-----
>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>> Subject: Re: [PATCH v1 07/13] intel_iommu: Handle PASID entry addition for
>>> pc_inv_dsc request
>>>
>>> On 3/6/26 11:44, Zhenzhong Duan wrote:
>>>> Structure VTDAddressSpace includes some elements suitable for emulated
>>>> device and passthrough device without PASID, e.g., address space,
>>>> different memory regions, etc, it is also protected by vtd iommu lock,
>>>> all these are useless and become a burden for passthrough device with
>>>> PASID.
>>>>
>>>> When there are lots of PASIDs used in one device, the AS and MRs are
>>>> all registered to memory core and impact the whole system performance.
>>>>
>>>> So instead of using VTDAddressSpace to cache pasid entry for each pasid
>>>> of a passthrough device, we define a light weight structure
>>>> VTDACCELPASIDCacheEntry with only necessary elements for each pasid. We
>>>> will use this struct as a parameter to conduct binding/unbinding to
>>>> nested hwpt and to record the current binded nested hwpt. It's also
>>>> designed to support PASID_0.
>>>
>>> PASID_0 of passthrough device still need to register MRs in case guest
>>> does not operate in scalable mode. So for PASID_0, you will have both
>>> VTDAPASIDCacheEntry and VTDACCELPASIDCacheEntry. Is it?
>>
>> Exactly!
>
>better to note it somewhere (commit message, code comment). It would be
>a good hint for future maintenance. e.g. it is easy to forget this fact. :)

Will do.

>
>>>
>>>> When guest creates new PASID entries, QEMU will capture the pc_inv_dsc
>>>> (pasid cache invalidation) request, walk through each pasid in each
>>>> passthrough device for valid pasid entries, create a new
>>>> VTDACCELPASIDCacheEntry if not existing yet.
>>>
>>> I think this idea is ok, but we need to be clear about the
>>> boundary between the usages of the two pasid cache structures.
>>
>> Yes, the principle here is VTDACCELPASIDCacheEntry is only used in
>intel_iommu_accel.c, VTDPASIDCacheEntry is only used in hw/i386/intel_iommu.c
>
>just mark it in commit message as well.

Sure.

Thanks
Zhenzhong