When guest creates new PASID entries, QEMU will capture the guest pasid
selective pasid cache invalidation, walk through each passthrough device
and each pasid, when a match is found, identify an existing vtd_as or
create a new one and update its corresponding cached pasid entry.
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu_internal.h | 2 +
hw/i386/intel_iommu.c | 176 ++++++++++++++++++++++++++++++++-
2 files changed, 175 insertions(+), 3 deletions(-)
diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index b9b76dd996..fb2a919e87 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry;
#define VTD_CTX_ENTRY_LEGACY_SIZE 16
#define VTD_CTX_ENTRY_SCALABLE_SIZE 32
+#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
#define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
#define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
@@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo {
#define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
#define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
#define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
+#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6)
/* PASID Granular Translation Type Mask */
#define VTD_PASID_ENTRY_P 1ULL
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index a2ee6d684e..7d2c9feae7 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -826,6 +826,11 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
}
}
+static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
+{
+ return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
+}
+
static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
{
return pdire->val & 1;
@@ -1647,9 +1652,9 @@ static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value,
}
/* Translate iommu pasid to vtd_as */
-static inline
-VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
- uint16_t sid, uint32_t pasid)
+static VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
+ uint16_t sid,
+ uint32_t pasid)
{
struct vtd_as_raw_key key = {
.sid = sid,
@@ -3220,10 +3225,172 @@ remove:
return true;
}
+/*
+ * This function walks over PASID range within [start, end) in a single
+ * PASID table for entries matching @info type/did, then retrieve/create
+ * vtd_as and fill associated pasid entry cache.
+ */
+static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
+ dma_addr_t pt_base,
+ int start,
+ int end,
+ VTDPASIDCacheInfo *info)
+{
+ VTDPASIDEntry pe;
+ int pasid = start;
+
+ while (pasid < end) {
+ if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
+ && vtd_pe_present(&pe)) {
+ int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
+ uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
+ VTDPASIDCacheEntry *pc_entry;
+ VTDAddressSpace *vtd_as;
+
+ vtd_iommu_lock(s);
+ /*
+ * When indexed by rid2pasid, vtd_as should have been created,
+ * e.g., by PCI subsystem. For other iommu pasid, we need to
+ * create vtd_as dynamically. Other iommu pasid is same value
+ * as PCI's pasid, so it's used as input of vtd_find_add_as().
+ */
+ vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
+ vtd_iommu_unlock(s);
+ if (!vtd_as) {
+ vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
+ }
+
+ if ((info->type == VTD_PASID_CACHE_DOMSI ||
+ info->type == VTD_PASID_CACHE_PASIDSI) &&
+ (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
+ /*
+ * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
+ * requires domain id check. If domain id check fail,
+ * go to next pasid.
+ */
+ pasid++;
+ continue;
+ }
+
+ pc_entry = &vtd_as->pasid_cache_entry;
+ /*
+ * pasid cache update and clear are handled in
+ * vtd_flush_pasid_locked(), only care new pasid entry here.
+ */
+ if (!pc_entry->valid) {
+ pc_entry->pasid_entry = pe;
+ pc_entry->valid = true;
+ }
+ }
+ pasid++;
+ }
+}
+
+/*
+ * In VT-d scalable mode translation, PASID dir + PASID table is used.
+ * This function aims at looping over a range of PASIDs in the given
+ * two level table to identify the pasid config in guest.
+ */
+static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
+ dma_addr_t pdt_base,
+ int start, int end,
+ VTDPASIDCacheInfo *info)
+{
+ VTDPASIDDirEntry pdire;
+ int pasid = start;
+ int pasid_next;
+ dma_addr_t pt_base;
+
+ while (pasid < end) {
+ pasid_next =
+ (pasid + VTD_PASID_TBL_ENTRY_NUM) & ~(VTD_PASID_TBL_ENTRY_NUM - 1);
+ pasid_next = pasid_next < end ? pasid_next : end;
+
+ if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
+ && vtd_pdire_present(&pdire)) {
+ pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
+ vtd_sm_pasid_table_walk_one(s, pt_base, pasid, pasid_next, info);
+ }
+ pasid = pasid_next;
+ }
+}
+
+static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
+ int start, int end,
+ VTDPASIDCacheInfo *info)
+{
+ VTDContextEntry ce;
+
+ if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus), info->devfn,
+ &ce)) {
+ uint32_t max_pasid;
+
+ max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) * VTD_PASID_TBL_ENTRY_NUM;
+ if (end > max_pasid) {
+ end = max_pasid;
+ }
+ vtd_sm_pasid_table_walk(s,
+ VTD_CE_GET_PASID_DIR_TABLE(&ce),
+ start,
+ end,
+ info);
+ }
+}
+
+/*
+ * This function replays the guest pasid bindings by walking the two level
+ * guest PASID table. For each valid pasid entry, it finds or creates a
+ * vtd_as and caches pasid entry in vtd_as.
+ */
+static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
+ VTDPASIDCacheInfo *pc_info)
+{
+ /*
+ * Currently only Requests-without-PASID is supported, as vIOMMU doesn't
+ * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
+ */
+ int start = 0, end = 1;
+ VTDHostIOMMUDevice *vtd_hiod;
+ VTDPASIDCacheInfo walk_info;
+ GHashTableIter as_it;
+
+ switch (pc_info->type) {
+ case VTD_PASID_CACHE_PASIDSI:
+ start = pc_info->pasid;
+ end = pc_info->pasid + 1;
+ /* fall through */
+ case VTD_PASID_CACHE_DOMSI:
+ case VTD_PASID_CACHE_GLOBAL_INV:
+ /* loop all assigned devices */
+ break;
+ default:
+ error_setg(&error_fatal, "invalid pc_info->type for replay");
+ }
+
+ /*
+ * In this replay, one only needs to care about the devices which are
+ * backed by host IOMMU. Those devices have a corresponding vtd_hiod
+ * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
+ * is not necessary to replay the bindings since their cache could be
+ * re-created in the future DMA address translation.
+ *
+ * VTD translation callback never accesses vtd_hiod and its corresponding
+ * cached pasid entry, so no iommu lock needed here.
+ */
+ walk_info = *pc_info;
+ g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
+ while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
+ walk_info.bus = vtd_hiod->bus;
+ walk_info.devfn = vtd_hiod->devfn;
+ vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
+ }
+}
+
/*
* For a PASID cache invalidation, this function handles below scenarios:
* a) a present cached pasid entry needs to be removed
* b) a present cached pasid entry needs to be updated
+ * c) a present cached pasid entry needs to be created
*/
static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
{
@@ -3239,6 +3406,9 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid_locked,
pc_info);
vtd_iommu_unlock(s);
+
+ /* c): loop all passthrough device for new pasid entries */
+ vtd_replay_guest_pasid_bindings(s, pc_info);
}
static bool vtd_process_pasid_desc(IntelIOMMUState *s,
--
2.47.1
On 2025/8/22 14:40, Zhenzhong Duan wrote:
> When guest creates new PASID entries, QEMU will capture the guest pasid
> selective pasid cache invalidation, walk through each passthrough device
> and each pasid, when a match is found, identify an existing vtd_as or
> create a new one and update its corresponding cached pasid entry.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 +
> hw/i386/intel_iommu.c | 176 ++++++++++++++++++++++++++++++++-
> 2 files changed, 175 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index b9b76dd996..fb2a919e87 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> @@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo {
> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6)
>
> /* PASID Granular Translation Type Mask */
> #define VTD_PASID_ENTRY_P 1ULL
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a2ee6d684e..7d2c9feae7 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -826,6 +826,11 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> }
> }
>
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> {
> return pdire->val & 1;
> @@ -1647,9 +1652,9 @@ static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value,
> }
>
> /* Translate iommu pasid to vtd_as */
> -static inline
> -VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
> - uint16_t sid, uint32_t pasid)
> +static VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
> + uint16_t sid,
> + uint32_t pasid)
> {
> struct vtd_as_raw_key key = {
> .sid = sid,
this hunk can be merged with the patch that introduces
vtd_as_from_iommu_pasid_locked.
> @@ -3220,10 +3225,172 @@ remove:
> return true;
> }
>
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then retrieve/create
> + * vtd_as and fill associated pasid entry cache.
> + */
> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> + dma_addr_t pt_base,
> + int start,
> + int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDEntry pe;
> + int pasid = start;
> +
> + while (pasid < end) {
> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> + && vtd_pe_present(&pe)) {
> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
> + VTDPASIDCacheEntry *pc_entry;
> + VTDAddressSpace *vtd_as;
> +
> + vtd_iommu_lock(s);
> + /*
> + * When indexed by rid2pasid, vtd_as should have been created,
> + * e.g., by PCI subsystem. For other iommu pasid, we need to
> + * create vtd_as dynamically. Other iommu pasid is same value
> + * as PCI's pasid, so it's used as input of vtd_find_add_as().
> + */
> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
> + vtd_iommu_unlock(s);
> + if (!vtd_as) {
> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
> + }
> +
> + if ((info->type == VTD_PASID_CACHE_DOMSI ||
> + info->type == VTD_PASID_CACHE_PASIDSI) &&
> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> + /*
> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> + * requires domain id check. If domain id check fail,
> + * go to next pasid.
> + */
> + pasid++;
> + continue;
> + }
> +
> + pc_entry = &vtd_as->pasid_cache_entry;
> + /*
> + * pasid cache update and clear are handled in
> + * vtd_flush_pasid_locked(), only care new pasid entry here.
> + */
> + if (!pc_entry->valid) {
> + pc_entry->pasid_entry = pe;
> + pc_entry->valid = true;
> + }
> + }
> + pasid++;
> + }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
remove translation.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
> + dma_addr_t pdt_base,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDDirEntry pdire;
> + int pasid = start;
> + int pasid_next;
> + dma_addr_t pt_base;
> +
> + while (pasid < end) {
> + pasid_next =
> + (pasid + VTD_PASID_TBL_ENTRY_NUM) & ~(VTD_PASID_TBL_ENTRY_NUM - 1);
> + pasid_next = pasid_next < end ? pasid_next : end;
> +
> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> + && vtd_pdire_present(&pdire)) {
> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid, pasid_next, info);
> + }
> + pasid = pasid_next;
> + }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDContextEntry ce;
> +
> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus), info->devfn,
> + &ce)) {
> + uint32_t max_pasid;
> +
> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) * VTD_PASID_TBL_ENTRY_NUM;
> + if (end > max_pasid) {
> + end = max_pasid;
> + }
> + vtd_sm_pasid_table_walk(s,
> + VTD_CE_GET_PASID_DIR_TABLE(&ce),
> + start,
> + end,
> + info);
> + }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it finds or creates a
> + * vtd_as and caches pasid entry in vtd_as.
> + */
> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + /*
> + * Currently only Requests-without-PASID is supported, as vIOMMU doesn't
> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
RPS is not the reason for only requests-without-PASID. RPS is 0 means
rid_pasid is fixed to be 0. While if RPS is 1, rid_pasid can be other
values. This is different with requests-with-PASID. If only aims for
supporting requests-without-PASID when RPS is 0, we may want to re-org
the code here. No need to pretend to loop a range of PASIDs just use
pasid#0 as we know rid_pasid is 0.
To me, this series should be able to support non-rid_pasid PASIDs.
> + */
> + int start = 0, end = 1;
> + VTDHostIOMMUDevice *vtd_hiod;
> + VTDPASIDCacheInfo walk_info;
> + GHashTableIter as_it;
> +
> + switch (pc_info->type) {
> + case VTD_PASID_CACHE_PASIDSI:
> + start = pc_info->pasid;
> + end = pc_info->pasid + 1;
> + /* fall through */
> + case VTD_PASID_CACHE_DOMSI:
> + case VTD_PASID_CACHE_GLOBAL_INV:
> + /* loop all assigned devices */
> + break;
> + default:
> + error_setg(&error_fatal, "invalid pc_info->type for replay");
> + }
> +
> + /*
> + * In this replay, one only needs to care about the devices which are
> + * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> + * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> + * is not necessary to replay the bindings since their cache could be
> + * re-created in the future DMA address translation.
> + *
> + * VTD translation callback never accesses vtd_hiod and its corresponding
> + * cached pasid entry, so no iommu lock needed here.
> + */
> + walk_info = *pc_info;
> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> + walk_info.bus = vtd_hiod->bus;
> + walk_info.devfn = vtd_hiod->devfn;
> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> + }
> +}
> +
> /*
> * For a PASID cache invalidation, this function handles below scenarios:
> * a) a present cached pasid entry needs to be removed
> * b) a present cached pasid entry needs to be updated
> + * c) a present cached pasid entry needs to be created
> */
> static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> {
> @@ -3239,6 +3406,9 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid_locked,
> pc_info);
> vtd_iommu_unlock(s);
> +
> + /* c): loop all passthrough device for new pasid entries */
> + vtd_replay_guest_pasid_bindings(s, pc_info);
> }
>
> static bool vtd_process_pasid_desc(IntelIOMMUState *s,
Hi Zhenzhong,
On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
> When guest creates new PASID entries, QEMU will capture the guest pasid
> selective pasid cache invalidation, walk through each passthrough device
> and each pasid, when a match is found, identify an existing vtd_as or
> create a new one and update its corresponding cached pasid entry.
You need to emphasize that the support is currently limited to
Requests-without-PASID
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu_internal.h | 2 +
> hw/i386/intel_iommu.c | 176 ++++++++++++++++++++++++++++++++-
> 2 files changed, 175 insertions(+), 3 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index b9b76dd996..fb2a919e87 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry;
> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>
> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0], 9, 3)
> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL | ~VTD_HAW_MASK(aw))
> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1 0xffffffffffe00000ULL
> @@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo {
> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) & VTD_PASID_TABLE_BITS_MASK)
> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault Processing Disable */
> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6)
>
> /* PASID Granular Translation Type Mask */
> #define VTD_PASID_ENTRY_P 1ULL
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index a2ee6d684e..7d2c9feae7 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -826,6 +826,11 @@ static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
> }
> }
>
> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry *ce)
> +{
> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
> +}
> +
> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
> {
> return pdire->val & 1;
> @@ -1647,9 +1652,9 @@ static gboolean vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value,
> }
>
> /* Translate iommu pasid to vtd_as */
> -static inline
> -VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
> - uint16_t sid, uint32_t pasid)
> +static VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
> + uint16_t sid,
> + uint32_t pasid)
> {
> struct vtd_as_raw_key key = {
> .sid = sid,
> @@ -3220,10 +3225,172 @@ remove:
> return true;
> }
>
> +/*
> + * This function walks over PASID range within [start, end) in a single
> + * PASID table for entries matching @info type/did, then retrieve/create
> + * vtd_as and fill associated pasid entry cache.
> + */
> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
> + dma_addr_t pt_base,
> + int start,
> + int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDEntry pe;
> + int pasid = start;
> +
> + while (pasid < end) {
> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
> + && vtd_pe_present(&pe)) {
> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
> + VTDPASIDCacheEntry *pc_entry;
> + VTDAddressSpace *vtd_as;
> +
> + vtd_iommu_lock(s);
> + /*
> + * When indexed by rid2pasid, vtd_as should have been created,
> + * e.g., by PCI subsystem. For other iommu pasid, we need to
> + * create vtd_as dynamically. Other iommu pasid is same value
since you don't support somthing else than rid2pasid, I would drop that
and simplify the code. See below.
> + * as PCI's pasid, so it's used as input of vtd_find_add_as().
> + */
> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
> + vtd_iommu_unlock(s);
> + if (!vtd_as) {
> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
you could check the vtd_as already exists here per the rid2pasid support
limitation
> + }
> +
> + if ((info->type == VTD_PASID_CACHE_DOMSI ||
> + info->type == VTD_PASID_CACHE_PASIDSI) &&
> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
> + /*
> + * VTD_PASID_CACHE_DOMSI and VTD_PASID_CACHE_PASIDSI
> + * requires domain id check. If domain id check fail,
fails
> + * go to next pasid.
> + */
> + pasid++;
> + continue;
> + }
> +
> + pc_entry = &vtd_as->pasid_cache_entry;
> + /*
> + * pasid cache update and clear are handled in
> + * vtd_flush_pasid_locked(), only care new pasid entry here.
> + */
> + if (!pc_entry->valid) {
> + pc_entry->pasid_entry = pe;
> + pc_entry->valid = true;
> + }
> + }
> + pasid++;
> + }
> +}
> +
> +/*
> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
> + * This function aims at looping over a range of PASIDs in the given
> + * two level table to identify the pasid config in guest.
> + */
> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
> + dma_addr_t pdt_base,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDPASIDDirEntry pdire;
> + int pasid = start;
> + int pasid_next;
> + dma_addr_t pt_base;
> +
> + while (pasid < end) {
> + pasid_next =
> + (pasid + VTD_PASID_TBL_ENTRY_NUM) & ~(VTD_PASID_TBL_ENTRY_NUM - 1);
> + pasid_next = pasid_next < end ? pasid_next : end;
> +
> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
> + && vtd_pdire_present(&pdire)) {
> + pt_base = pdire.val & VTD_PASID_TABLE_BASE_ADDR_MASK;
> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid, pasid_next, info);
> + }
> + pasid = pasid_next;
> + }
> +}
> +
> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
> + int start, int end,
> + VTDPASIDCacheInfo *info)
> +{
> + VTDContextEntry ce;
> +
> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus), info->devfn,
> + &ce)) {
> + uint32_t max_pasid;
> +
> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) * VTD_PASID_TBL_ENTRY_NUM;
> + if (end > max_pasid) {
> + end = max_pasid;
> + }
> + vtd_sm_pasid_table_walk(s,
> + VTD_CE_GET_PASID_DIR_TABLE(&ce),
> + start,
> + end,
> + info);
> + }
> +}
> +
> +/*
> + * This function replays the guest pasid bindings by walking the two level
> + * guest PASID table. For each valid pasid entry, it finds or creates a
> + * vtd_as and caches pasid entry in vtd_as.
> + */
> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
> + VTDPASIDCacheInfo *pc_info)
> +{
> + /*
> + * Currently only Requests-without-PASID is supported, as vIOMMU doesn't
> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
> + */
> + int start = 0, end = 1;
> + VTDHostIOMMUDevice *vtd_hiod;
> + VTDPASIDCacheInfo walk_info;
> + GHashTableIter as_it;
> +
> + switch (pc_info->type) {
> + case VTD_PASID_CACHE_PASIDSI:
> + start = pc_info->pasid;
> + end = pc_info->pasid + 1;
if you never replay a range, you could simplify the code for now because
some code paths are not properly tested
> + /* fall through */
> + case VTD_PASID_CACHE_DOMSI:
Why can't we have other invalidation types along with request without
PASID? It is not obvious to me at least why it couldn't be used by the
guest. Would deserve a comment in the commit desc I think.
> + case VTD_PASID_CACHE_GLOBAL_INV:
> + /* loop all assigned devices */
> + break;
> + default:
> + error_setg(&error_fatal, "invalid pc_info->type for replay");
> + }
> +
> + /*
> + * In this replay, one only needs to care about the devices which are
> + * backed by host IOMMU. Those devices have a corresponding vtd_hiod
> + * in s->vtd_host_iommu_dev. For devices not backed by host IOMMU, it
> + * is not necessary to replay the bindings since their cache could be
> + * re-created in the future DMA address translation.
> + *
> + * VTD translation callback never accesses vtd_hiod and its corresponding
> + * cached pasid entry, so no iommu lock needed here.
> + */
> + walk_info = *pc_info;
> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
> + walk_info.bus = vtd_hiod->bus;
> + walk_info.devfn = vtd_hiod->devfn;
> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> + }
> +}
> +
> /*
> * For a PASID cache invalidation, this function handles below scenarios:
> * a) a present cached pasid entry needs to be removed
> * b) a present cached pasid entry needs to be updated
> + * c) a present cached pasid entry needs to be created
> */
> static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> {
> @@ -3239,6 +3406,9 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
> g_hash_table_foreach_remove(s->vtd_address_spaces, vtd_flush_pasid_locked,
> pc_info);
> vtd_iommu_unlock(s);
> +
> + /* c): loop all passthrough device for new pasid entries */
> + vtd_replay_guest_pasid_bindings(s, pc_info);
> }
>
> static bool vtd_process_pasid_desc(IntelIOMMUState *s,
Thanks
Eric
Hi Eric,
>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v5 12/21] intel_iommu: Handle PASID entry addition
>
>Hi Zhenzhong,
>
>On 8/22/25 8:40 AM, Zhenzhong Duan wrote:
>> When guest creates new PASID entries, QEMU will capture the guest pasid
>> selective pasid cache invalidation, walk through each passthrough device
>> and each pasid, when a match is found, identify an existing vtd_as or
>> create a new one and update its corresponding cached pasid entry.
>
>You need to emphasize that the support is currently limited to
>Requests-without-PASID
OK.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu_internal.h | 2 +
>> hw/i386/intel_iommu.c | 176
>++++++++++++++++++++++++++++++++-
>> 2 files changed, 175 insertions(+), 3 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index b9b76dd996..fb2a919e87 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -559,6 +559,7 @@ typedef struct VTDRootEntry VTDRootEntry;
>> #define VTD_CTX_ENTRY_LEGACY_SIZE 16
>> #define VTD_CTX_ENTRY_SCALABLE_SIZE 32
>>
>> +#define VTD_SM_CONTEXT_ENTRY_PDTS(x) extract64((x)->val[0],
>9, 3)
>> #define VTD_SM_CONTEXT_ENTRY_RID2PASID_MASK 0xfffff
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL0(aw) (0x1e0ULL |
>~VTD_HAW_MASK(aw))
>> #define VTD_SM_CONTEXT_ENTRY_RSVD_VAL1
>0xffffffffffe00000ULL
>> @@ -589,6 +590,7 @@ typedef struct VTDPASIDCacheInfo {
>> #define VTD_PASID_TABLE_BITS_MASK (0x3fULL)
>> #define VTD_PASID_TABLE_INDEX(pasid) ((pasid) &
>VTD_PASID_TABLE_BITS_MASK)
>> #define VTD_PASID_ENTRY_FPD (1ULL << 1) /* Fault
>Processing Disable */
>> +#define VTD_PASID_TBL_ENTRY_NUM (1ULL << 6)
>>
>> /* PASID Granular Translation Type Mask */
>> #define VTD_PASID_ENTRY_P 1ULL
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index a2ee6d684e..7d2c9feae7 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -826,6 +826,11 @@ static inline bool
>vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe)
>> }
>> }
>>
>> +static inline uint32_t vtd_sm_ce_get_pdt_entry_num(VTDContextEntry
>*ce)
>> +{
>> + return 1U << (VTD_SM_CONTEXT_ENTRY_PDTS(ce) + 7);
>> +}
>> +
>> static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire)
>> {
>> return pdire->val & 1;
>> @@ -1647,9 +1652,9 @@ static gboolean
>vtd_find_as_by_sid_and_iommu_pasid(gpointer key, gpointer value,
>> }
>>
>> /* Translate iommu pasid to vtd_as */
>> -static inline
>> -VTDAddressSpace *vtd_as_from_iommu_pasid_locked(IntelIOMMUState
>*s,
>> - uint16_t sid,
>uint32_t pasid)
>> +static VTDAddressSpace
>*vtd_as_from_iommu_pasid_locked(IntelIOMMUState *s,
>> +
>uint16_t sid,
>> +
>uint32_t pasid)
>> {
>> struct vtd_as_raw_key key = {
>> .sid = sid,
>> @@ -3220,10 +3225,172 @@ remove:
>> return true;
>> }
>>
>> +/*
>> + * This function walks over PASID range within [start, end) in a single
>> + * PASID table for entries matching @info type/did, then retrieve/create
>> + * vtd_as and fill associated pasid entry cache.
>> + */
>> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
>> + dma_addr_t pt_base,
>> + int start,
>> + int end,
>> + VTDPASIDCacheInfo
>*info)
>> +{
>> + VTDPASIDEntry pe;
>> + int pasid = start;
>> +
>> + while (pasid < end) {
>> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
>> + && vtd_pe_present(&pe)) {
>> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
>> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
>> + VTDPASIDCacheEntry *pc_entry;
>> + VTDAddressSpace *vtd_as;
>> +
>> + vtd_iommu_lock(s);
>> + /*
>> + * When indexed by rid2pasid, vtd_as should have been
>created,
>> + * e.g., by PCI subsystem. For other iommu pasid, we need
>to
>> + * create vtd_as dynamically. Other iommu pasid is same
>value
>since you don't support somthing else than rid2pasid, I would drop that
>and simplify the code. See below.
>> + * as PCI's pasid, so it's used as input of vtd_find_add_as().
>> + */
>> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
>> + vtd_iommu_unlock(s);
>> + if (!vtd_as) {
>> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
>you could check the vtd_as already exists here per the rid2pasid support
>limitation
In this series, I do include some basic codes for non-rid2pasid because they share some common code with rid2pasid and we already have emulated rid2pasid support in vIOMMU for a long time, it's not bad to accumulate some supporting code for non-rid2pasid for passthrough device. But I can do the factor out if you insist to have only rid_pasid code.
>> + }
>> +
>> + if ((info->type == VTD_PASID_CACHE_DOMSI ||
>> + info->type == VTD_PASID_CACHE_PASIDSI) &&
>> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>> + /*
>> + * VTD_PASID_CACHE_DOMSI and
>VTD_PASID_CACHE_PASIDSI
>> + * requires domain id check. If domain id check fail,
>fails
>> + * go to next pasid.
>> + */
>> + pasid++;
>> + continue;
>> + }
>> +
>> + pc_entry = &vtd_as->pasid_cache_entry;
>> + /*
>> + * pasid cache update and clear are handled in
>> + * vtd_flush_pasid_locked(), only care new pasid entry
>here.
>> + */
>> + if (!pc_entry->valid) {
>> + pc_entry->pasid_entry = pe;
>> + pc_entry->valid = true;
>> + }
>> + }
>> + pasid++;
>> + }
>> +}
>> +
>> +/*
>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>> + * This function aims at looping over a range of PASIDs in the given
>> + * two level table to identify the pasid config in guest.
>> + */
>> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
>> + dma_addr_t pdt_base,
>> + int start, int end,
>> + VTDPASIDCacheInfo *info)
>> +{
>> + VTDPASIDDirEntry pdire;
>> + int pasid = start;
>> + int pasid_next;
>> + dma_addr_t pt_base;
>> +
>> + while (pasid < end) {
>> + pasid_next =
>> + (pasid + VTD_PASID_TBL_ENTRY_NUM) &
>~(VTD_PASID_TBL_ENTRY_NUM - 1);
>> + pasid_next = pasid_next < end ? pasid_next : end;
>> +
>> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>> + && vtd_pdire_present(&pdire)) {
>> + pt_base = pdire.val &
>VTD_PASID_TABLE_BASE_ADDR_MASK;
>> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid,
>pasid_next, info);
>> + }
>> + pasid = pasid_next;
>> + }
>> +}
>> +
>> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
>> + int start, int end,
>> + VTDPASIDCacheInfo
>*info)
>> +{
>> + VTDContextEntry ce;
>> +
>> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus),
>info->devfn,
>> + &ce)) {
>> + uint32_t max_pasid;
>> +
>> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>VTD_PASID_TBL_ENTRY_NUM;
>> + if (end > max_pasid) {
>> + end = max_pasid;
>> + }
>> + vtd_sm_pasid_table_walk(s,
>> +
>VTD_CE_GET_PASID_DIR_TABLE(&ce),
>> + start,
>> + end,
>> + info);
>> + }
>> +}
>> +
>> +/*
>> + * This function replays the guest pasid bindings by walking the two level
>> + * guest PASID table. For each valid pasid entry, it finds or creates a
>> + * vtd_as and caches pasid entry in vtd_as.
>> + */
>> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>> + VTDPASIDCacheInfo
>*pc_info)
>> +{
>> + /*
>> + * Currently only Requests-without-PASID is supported, as vIOMMU
>doesn't
>> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
>> + */
>> + int start = 0, end = 1;
>> + VTDHostIOMMUDevice *vtd_hiod;
>> + VTDPASIDCacheInfo walk_info;
>> + GHashTableIter as_it;
>> +
>> + switch (pc_info->type) {
>> + case VTD_PASID_CACHE_PASIDSI:
>> + start = pc_info->pasid;
>> + end = pc_info->pasid + 1;
>if you never replay a range, you could simplify the code for now because
>some code paths are not properly tested
OK. Instead of assignment of start and end variable, maybe just an assert(!pc_info->pasid).
>> + /* fall through */
>> + case VTD_PASID_CACHE_DOMSI:
>Why can't we have other invalidation types along with request without
>PASID? It is not obvious to me at least why it couldn't be used by the
>guest. Would deserve a comment in the commit desc I think.
Other invalidation types are indeed used, just in pasid scope [0, 1), because [start, end) are already initialized to [0, 1), nothing more here, so just break.
Thanks
Zhenzhong
>> + case VTD_PASID_CACHE_GLOBAL_INV:
>> + /* loop all assigned devices */
>> + break;
>> + default:
>> + error_setg(&error_fatal, "invalid pc_info->type for replay");
>> + }
>> +
>> + /*
>> + * In this replay, one only needs to care about the devices which are
>> + * backed by host IOMMU. Those devices have a corresponding
>vtd_hiod
>> + * in s->vtd_host_iommu_dev. For devices not backed by host
>IOMMU, it
>> + * is not necessary to replay the bindings since their cache could be
>> + * re-created in the future DMA address translation.
>> + *
>> + * VTD translation callback never accesses vtd_hiod and its
>corresponding
>> + * cached pasid entry, so no iommu lock needed here.
>> + */
>> + walk_info = *pc_info;
>> + g_hash_table_iter_init(&as_it, s->vtd_host_iommu_dev);
>> + while (g_hash_table_iter_next(&as_it, NULL, (void **)&vtd_hiod)) {
>> + walk_info.bus = vtd_hiod->bus;
>> + walk_info.devfn = vtd_hiod->devfn;
>> + vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
>> + }
>> +}
>> +
>> /*
>> * For a PASID cache invalidation, this function handles below scenarios:
>> * a) a present cached pasid entry needs to be removed
>> * b) a present cached pasid entry needs to be updated
>> + * c) a present cached pasid entry needs to be created
>> */
>> static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>VTDPASIDCacheInfo *pc_info)
>> {
>> @@ -3239,6 +3406,9 @@ static void
>vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
>> g_hash_table_foreach_remove(s->vtd_address_spaces,
>vtd_flush_pasid_locked,
>> pc_info);
>> vtd_iommu_unlock(s);
>> +
>> + /* c): loop all passthrough device for new pasid entries */
>> + vtd_replay_guest_pasid_bindings(s, pc_info);
>> }
>>
>> static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>Thanks
>
>Eric
On 2025/9/1 17:03, Duan, Zhenzhong wrote:
>>> }
>>>
>>> +/*
>>> + * This function walks over PASID range within [start, end) in a single
>>> + * PASID table for entries matching @info type/did, then retrieve/create
>>> + * vtd_as and fill associated pasid entry cache.
>>> + */
>>> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
>>> + dma_addr_t pt_base,
>>> + int start,
>>> + int end,
>>> + VTDPASIDCacheInfo
>> *info)
>>> +{
>>> + VTDPASIDEntry pe;
>>> + int pasid = start;
>>> +
>>> + while (pasid < end) {
>>> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
>>> + && vtd_pe_present(&pe)) {
>>> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
>>> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
>>> + VTDPASIDCacheEntry *pc_entry;
>>> + VTDAddressSpace *vtd_as;
>>> +
>>> + vtd_iommu_lock(s);
>>> + /*
>>> + * When indexed by rid2pasid, vtd_as should have been
>> created,
>>> + * e.g., by PCI subsystem. For other iommu pasid, we need
>> to
>>> + * create vtd_as dynamically. Other iommu pasid is same
>> value
>> since you don't support somthing else than rid2pasid, I would drop that
>> and simplify the code. See below.
>>> + * as PCI's pasid, so it's used as input of vtd_find_add_as().
>>> + */
>>> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
>>> + vtd_iommu_unlock(s);
>>> + if (!vtd_as) {
>>> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
>> you could check the vtd_as already exists here per the rid2pasid support
>> limitation
>
> In this series, I do include some basic codes for non-rid2pasid because they share some common code with rid2pasid and we already have emulated rid2pasid support in vIOMMU for a long time, it's not bad to accumulate some supporting code for non-rid2pasid for passthrough device. But I can do the factor out if you insist to have only rid_pasid code.
I think it's a reasonable ask. :)
>
>>> + }
>>> +
>>> + if ((info->type == VTD_PASID_CACHE_DOMSI ||
>>> + info->type == VTD_PASID_CACHE_PASIDSI) &&
>>> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>>> + /*
>>> + * VTD_PASID_CACHE_DOMSI and
>> VTD_PASID_CACHE_PASIDSI
>>> + * requires domain id check. If domain id check fail,
>> fails
>>> + * go to next pasid.
>>> + */
>>> + pasid++;
>>> + continue;
>>> + }
>>> +
>>> + pc_entry = &vtd_as->pasid_cache_entry;
>>> + /*
>>> + * pasid cache update and clear are handled in
>>> + * vtd_flush_pasid_locked(), only care new pasid entry
>> here.
>>> + */
>>> + if (!pc_entry->valid) {
>>> + pc_entry->pasid_entry = pe;
>>> + pc_entry->valid = true;
>>> + }
>>> + }
>>> + pasid++;
>>> + }
>>> +}
>>> +
>>> +/*
>>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>>> + * This function aims at looping over a range of PASIDs in the given
>>> + * two level table to identify the pasid config in guest.
>>> + */
>>> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
>>> + dma_addr_t pdt_base,
>>> + int start, int end,
>>> + VTDPASIDCacheInfo *info)
>>> +{
>>> + VTDPASIDDirEntry pdire;
>>> + int pasid = start;
>>> + int pasid_next;
>>> + dma_addr_t pt_base;
>>> +
>>> + while (pasid < end) {
>>> + pasid_next =
>>> + (pasid + VTD_PASID_TBL_ENTRY_NUM) &
>> ~(VTD_PASID_TBL_ENTRY_NUM - 1);
>>> + pasid_next = pasid_next < end ? pasid_next : end;
>>> +
>>> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>>> + && vtd_pdire_present(&pdire)) {
>>> + pt_base = pdire.val &
>> VTD_PASID_TABLE_BASE_ADDR_MASK;
>>> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid,
>> pasid_next, info);
>>> + }
>>> + pasid = pasid_next;
>>> + }
>>> +}
>>> +
>>> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
>>> + int start, int end,
>>> + VTDPASIDCacheInfo
>> *info)
>>> +{
>>> + VTDContextEntry ce;
>>> +
>>> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus),
>> info->devfn,
>>> + &ce)) {
>>> + uint32_t max_pasid;
>>> +
>>> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>> VTD_PASID_TBL_ENTRY_NUM;
>>> + if (end > max_pasid) {
>>> + end = max_pasid;
>>> + }
>>> + vtd_sm_pasid_table_walk(s,
>>> +
>> VTD_CE_GET_PASID_DIR_TABLE(&ce),
>>> + start,
>>> + end,
>>> + info);
>>> + }
>>> +}
>>> +
>>> +/*
>>> + * This function replays the guest pasid bindings by walking the two level
>>> + * guest PASID table. For each valid pasid entry, it finds or creates a
>>> + * vtd_as and caches pasid entry in vtd_as.
>>> + */
>>> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>>> + VTDPASIDCacheInfo
>> *pc_info)
>>> +{
>>> + /*
>>> + * Currently only Requests-without-PASID is supported, as vIOMMU
>> doesn't
>>> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
>>> + */
>>> + int start = 0, end = 1;
>>> + VTDHostIOMMUDevice *vtd_hiod;
>>> + VTDPASIDCacheInfo walk_info;
>>> + GHashTableIter as_it;
>>> +
>>> + switch (pc_info->type) {
>>> + case VTD_PASID_CACHE_PASIDSI:
>>> + start = pc_info->pasid;
>>> + end = pc_info->pasid + 1;
>> if you never replay a range, you could simplify the code for now because
>> some code paths are not properly tested
>
> OK. Instead of assignment of start and end variable, maybe just an assert(!pc_info->pasid).
I think there are two reasons for this range replay.
1) as a preparation for patch 16 of this series.
2) support domain selective or global pasid cache invalidation
>
>>> + /* fall through */
>>> + case VTD_PASID_CACHE_DOMSI:
>> Why can't we have other invalidation types along with request without
>> PASID? It is not obvious to me at least why it couldn't be used by the
>> guest. Would deserve a comment in the commit desc I think.
>
> Other invalidation types are indeed used, just in pasid scope [0, 1), because [start, end) are already initialized to [0, 1), nothing more here, so just break.
hmmm. The fixed scope makes the range replay a fake one. It's better
holding the range replay logic for now and add it when there is
non-rid_pasid support for passthrough devices.
Regards,
Yi Liu
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v5 12/21] intel_iommu: Handle PASID entry addition
>
>On 2025/9/1 17:03, Duan, Zhenzhong wrote:
>
>>>> }
>>>>
>>>> +/*
>>>> + * This function walks over PASID range within [start, end) in a single
>>>> + * PASID table for entries matching @info type/did, then retrieve/create
>>>> + * vtd_as and fill associated pasid entry cache.
>>>> + */
>>>> +static void vtd_sm_pasid_table_walk_one(IntelIOMMUState *s,
>>>> + dma_addr_t pt_base,
>>>> + int start,
>>>> + int end,
>>>> + VTDPASIDCacheInfo
>>> *info)
>>>> +{
>>>> + VTDPASIDEntry pe;
>>>> + int pasid = start;
>>>> +
>>>> + while (pasid < end) {
>>>> + if (!vtd_get_pe_in_pasid_leaf_table(s, pasid, pt_base, &pe)
>>>> + && vtd_pe_present(&pe)) {
>>>> + int bus_n = pci_bus_num(info->bus), devfn = info->devfn;
>>>> + uint16_t sid = PCI_BUILD_BDF(bus_n, devfn);
>>>> + VTDPASIDCacheEntry *pc_entry;
>>>> + VTDAddressSpace *vtd_as;
>>>> +
>>>> + vtd_iommu_lock(s);
>>>> + /*
>>>> + * When indexed by rid2pasid, vtd_as should have been
>>> created,
>>>> + * e.g., by PCI subsystem. For other iommu pasid, we
>need
>>> to
>>>> + * create vtd_as dynamically. Other iommu pasid is same
>>> value
>>> since you don't support somthing else than rid2pasid, I would drop that
>>> and simplify the code. See below.
>>>> + * as PCI's pasid, so it's used as input of
>vtd_find_add_as().
>>>> + */
>>>> + vtd_as = vtd_as_from_iommu_pasid_locked(s, sid, pasid);
>>>> + vtd_iommu_unlock(s);
>>>> + if (!vtd_as) {
>>>> + vtd_as = vtd_find_add_as(s, info->bus, devfn, pasid);
>>> you could check the vtd_as already exists here per the rid2pasid support
>>> limitation
>>
>> In this series, I do include some basic codes for non-rid2pasid because they
>share some common code with rid2pasid and we already have emulated
>rid2pasid support in vIOMMU for a long time, it's not bad to accumulate some
>supporting code for non-rid2pasid for passthrough device. But I can do the
>factor out if you insist to have only rid_pasid code.
>
>I think it's a reasonable ask. :)
OK, will do.
>
>>
>>>> + }
>>>> +
>>>> + if ((info->type == VTD_PASID_CACHE_DOMSI ||
>>>> + info->type == VTD_PASID_CACHE_PASIDSI) &&
>>>> + (info->did != VTD_SM_PASID_ENTRY_DID(&pe))) {
>>>> + /*
>>>> + * VTD_PASID_CACHE_DOMSI and
>>> VTD_PASID_CACHE_PASIDSI
>>>> + * requires domain id check. If domain id check fail,
>>> fails
>>>> + * go to next pasid.
>>>> + */
>>>> + pasid++;
>>>> + continue;
>>>> + }
>>>> +
>>>> + pc_entry = &vtd_as->pasid_cache_entry;
>>>> + /*
>>>> + * pasid cache update and clear are handled in
>>>> + * vtd_flush_pasid_locked(), only care new pasid entry
>>> here.
>>>> + */
>>>> + if (!pc_entry->valid) {
>>>> + pc_entry->pasid_entry = pe;
>>>> + pc_entry->valid = true;
>>>> + }
>>>> + }
>>>> + pasid++;
>>>> + }
>>>> +}
>>>> +
>>>> +/*
>>>> + * In VT-d scalable mode translation, PASID dir + PASID table is used.
>>>> + * This function aims at looping over a range of PASIDs in the given
>>>> + * two level table to identify the pasid config in guest.
>>>> + */
>>>> +static void vtd_sm_pasid_table_walk(IntelIOMMUState *s,
>>>> + dma_addr_t pdt_base,
>>>> + int start, int end,
>>>> + VTDPASIDCacheInfo *info)
>>>> +{
>>>> + VTDPASIDDirEntry pdire;
>>>> + int pasid = start;
>>>> + int pasid_next;
>>>> + dma_addr_t pt_base;
>>>> +
>>>> + while (pasid < end) {
>>>> + pasid_next =
>>>> + (pasid + VTD_PASID_TBL_ENTRY_NUM) &
>>> ~(VTD_PASID_TBL_ENTRY_NUM - 1);
>>>> + pasid_next = pasid_next < end ? pasid_next : end;
>>>> +
>>>> + if (!vtd_get_pdire_from_pdir_table(pdt_base, pasid, &pdire)
>>>> + && vtd_pdire_present(&pdire)) {
>>>> + pt_base = pdire.val &
>>> VTD_PASID_TABLE_BASE_ADDR_MASK;
>>>> + vtd_sm_pasid_table_walk_one(s, pt_base, pasid,
>>> pasid_next, info);
>>>> + }
>>>> + pasid = pasid_next;
>>>> + }
>>>> +}
>>>> +
>>>> +static void vtd_replay_pasid_bind_for_dev(IntelIOMMUState *s,
>>>> + int start, int end,
>>>> + VTDPASIDCacheInfo
>>> *info)
>>>> +{
>>>> + VTDContextEntry ce;
>>>> +
>>>> + if (!vtd_dev_to_context_entry(s, pci_bus_num(info->bus),
>>> info->devfn,
>>>> + &ce)) {
>>>> + uint32_t max_pasid;
>>>> +
>>>> + max_pasid = vtd_sm_ce_get_pdt_entry_num(&ce) *
>>> VTD_PASID_TBL_ENTRY_NUM;
>>>> + if (end > max_pasid) {
>>>> + end = max_pasid;
>>>> + }
>>>> + vtd_sm_pasid_table_walk(s,
>>>> +
>>> VTD_CE_GET_PASID_DIR_TABLE(&ce),
>>>> + start,
>>>> + end,
>>>> + info);
>>>> + }
>>>> +}
>>>> +
>>>> +/*
>>>> + * This function replays the guest pasid bindings by walking the two level
>>>> + * guest PASID table. For each valid pasid entry, it finds or creates a
>>>> + * vtd_as and caches pasid entry in vtd_as.
>>>> + */
>>>> +static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>>>> +
>VTDPASIDCacheInfo
>>> *pc_info)
>>>> +{
>>>> + /*
>>>> + * Currently only Requests-without-PASID is supported, as
>vIOMMU
>>> doesn't
>>>> + * support RPS(RID-PASID Support), pasid scope is fixed to [0, 1).
>>>> + */
>>>> + int start = 0, end = 1;
>>>> + VTDHostIOMMUDevice *vtd_hiod;
>>>> + VTDPASIDCacheInfo walk_info;
>>>> + GHashTableIter as_it;
>>>> +
>>>> + switch (pc_info->type) {
>>>> + case VTD_PASID_CACHE_PASIDSI:
>>>> + start = pc_info->pasid;
>>>> + end = pc_info->pasid + 1;
>>> if you never replay a range, you could simplify the code for now because
>>> some code paths are not properly tested
>>
>> OK. Instead of assignment of start and end variable, maybe just an
>assert(!pc_info->pasid).
>
>I think there are two reasons for this range replay.
>
>1) as a preparation for patch 16 of this series.
>2) support domain selective or global pasid cache invalidation
Exactly.
>
>>
>>>> + /* fall through */
>>>> + case VTD_PASID_CACHE_DOMSI:
>>> Why can't we have other invalidation types along with request without
>>> PASID? It is not obvious to me at least why it couldn't be used by the
>>> guest. Would deserve a comment in the commit desc I think.
>>
>> Other invalidation types are indeed used, just in pasid scope [0, 1), because
>[start, end) are already initialized to [0, 1), nothing more here, so just break.
>
>hmmm. The fixed scope makes the range replay a fake one. It's better
>holding the range replay logic for now and add it when there is
>non-rid_pasid support for passthrough devices.
Yes, will delete the non-rid_pasid code.
Thanks
Zhenzhong
© 2016 - 2025 Red Hat, Inc.