Kernel allows user to switch IOMMU domain, e.g., switch between DMA
and identity domain. When this happen in IOMMU scalable mode, a pasid
cache invalidation request is sent, this request is ignored by vIOMMU
which leads to device binding to wrong address space, then DMA fails.
This issue exists in scalable mode with both first stage and second
stage translations, both emulated and passthrough devices.
Take network device for example, below sequence trigger issue:
1. start a guest with iommu=pt
2. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/unbind
3. echo DMA > /sys/kernel/iommu_groups/6/type
4. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/bind
5. Ping test
Fix it by switching address space in invalidation handler.
Fixes: 4a4f219e8a10 ("intel_iommu: add scalable-mode option to make scalable mode work")
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++--
1 file changed, 27 insertions(+), 2 deletions(-)
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 07bc0a749c..c402643b56 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3087,6 +3087,11 @@ static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
}
+static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
+{
+ return memcmp(p1, p2, sizeof(*p1));
+}
+
/* Update or invalidate pasid cache based on the pasid entry in guest memory. */
static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
gpointer user_data)
@@ -3095,15 +3100,28 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
VTDAddressSpace *vtd_as = value;
VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
VTDPASIDEntry pe;
+ IOMMUNotifier *n;
uint16_t did;
if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
+ if (!pc_entry->valid) {
+ return;
+ }
/*
* No valid pasid entry in guest memory. e.g. pasid entry was modified
* to be either all-zero or non-present. Either case means existing
* pasid cache should be invalidated.
*/
pc_entry->valid = false;
+
+ /*
+ * When a pasid entry isn't valid any more, we should unmap all
+ * mappings in shadow pages instantly to ensure DMA security.
+ */
+ IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
+ vtd_address_space_unmap(vtd_as, n);
+ }
+ vtd_switch_address_space(vtd_as);
return;
}
@@ -3129,8 +3147,15 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
}
}
- pc_entry->pasid_entry = pe;
- pc_entry->valid = true;
+ if (!pc_entry->valid) {
+ pc_entry->pasid_entry = pe;
+ pc_entry->valid = true;
+ } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
+ return;
+ }
+
+ vtd_switch_address_space(vtd_as);
+ vtd_address_space_sync(vtd_as);
}
static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
--
2.47.1
On 2025/10/17 17:36, Zhenzhong Duan wrote:
> Kernel allows user to switch IOMMU domain, e.g., switch between DMA
> and identity domain. When this happen in IOMMU scalable mode, a pasid
> cache invalidation request is sent, this request is ignored by vIOMMU
> which leads to device binding to wrong address space, then DMA fails.
>
> This issue exists in scalable mode with both first stage and second
> stage translations, both emulated and passthrough devices.
>
> Take network device for example, below sequence trigger issue:
>
> 1. start a guest with iommu=pt
> 2. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/unbind
> 3. echo DMA > /sys/kernel/iommu_groups/6/type
> 4. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/bind
> 5. Ping test
>
> Fix it by switching address space in invalidation handler.
>
> Fixes: 4a4f219e8a10 ("intel_iommu: add scalable-mode option to make scalable mode work")
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
> hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++--
> 1 file changed, 27 insertions(+), 2 deletions(-)
>
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 07bc0a749c..c402643b56 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3087,6 +3087,11 @@ static inline int vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
> return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
> }
>
> +static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry *p2)
> +{
> + return memcmp(p1, p2, sizeof(*p1));
> +}
> +
> /* Update or invalidate pasid cache based on the pasid entry in guest memory. */
> static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> gpointer user_data)
> @@ -3095,15 +3100,28 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> VTDAddressSpace *vtd_as = value;
> VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
> VTDPASIDEntry pe;
> + IOMMUNotifier *n;
> uint16_t did;
>
> if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
> + if (!pc_entry->valid) {
> + return;
> + }
> /*
> * No valid pasid entry in guest memory. e.g. pasid entry was modified
> * to be either all-zero or non-present. Either case means existing
> * pasid cache should be invalidated.
> */
> pc_entry->valid = false;
> +
> + /*
> + * When a pasid entry isn't valid any more, we should unmap all
> + * mappings in shadow pages instantly to ensure DMA security.
> + */
> + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
> + vtd_address_space_unmap(vtd_as, n);
> + }
I just realized that if the MR is nodmar MR, the notifier is not
registered at all. is it? So the above loop and below does not
duplicate.
> + vtd_switch_address_space(vtd_as);
> return;
> }
>
> @@ -3129,8 +3147,15 @@ static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
> }
> }
>
> - pc_entry->pasid_entry = pe;
> - pc_entry->valid = true;
> + if (!pc_entry->valid) {
> + pc_entry->pasid_entry = pe;
> + pc_entry->valid = true;
> + } else if (!vtd_pasid_entry_compare(&pe, &pc_entry->pasid_entry)) {
> + return;
> + }
> +
> + vtd_switch_address_space(vtd_as);
> + vtd_address_space_sync(vtd_as);
> }
>
> static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Regards,
Yi Liu
>-----Original Message-----
>From: Liu, Yi L <yi.l.liu@intel.com>
>Subject: Re: [PATCH v3 3/3] intel_iommu: Fix DMA failure when guest
>switches IOMMU domain
>
>On 2025/10/17 17:36, Zhenzhong Duan wrote:
>> Kernel allows user to switch IOMMU domain, e.g., switch between DMA
>> and identity domain. When this happen in IOMMU scalable mode, a pasid
>> cache invalidation request is sent, this request is ignored by vIOMMU
>> which leads to device binding to wrong address space, then DMA fails.
>>
>> This issue exists in scalable mode with both first stage and second
>> stage translations, both emulated and passthrough devices.
>>
>> Take network device for example, below sequence trigger issue:
>>
>> 1. start a guest with iommu=pt
>> 2. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/unbind
>> 3. echo DMA > /sys/kernel/iommu_groups/6/type
>> 4. echo 0000:01:00.0 > /sys/bus/pci/drivers/virtio-pci/bind
>> 5. Ping test
>>
>> Fix it by switching address space in invalidation handler.
>>
>> Fixes: 4a4f219e8a10 ("intel_iommu: add scalable-mode option to make
>scalable mode work")
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>> hw/i386/intel_iommu.c | 29 +++++++++++++++++++++++++++--
>> 1 file changed, 27 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 07bc0a749c..c402643b56 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3087,6 +3087,11 @@ static inline int
>vtd_dev_get_pe_from_pasid(VTDAddressSpace *vtd_as,
>> return vtd_ce_get_rid2pasid_entry(s, &ce, pe, vtd_as->pasid);
>> }
>>
>> +static int vtd_pasid_entry_compare(VTDPASIDEntry *p1, VTDPASIDEntry
>*p2)
>> +{
>> + return memcmp(p1, p2, sizeof(*p1));
>> +}
>> +
>> /* Update or invalidate pasid cache based on the pasid entry in guest
>memory. */
>> static void vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>> gpointer user_data)
>> @@ -3095,15 +3100,28 @@ static void
>vtd_pasid_cache_sync_locked(gpointer key, gpointer value,
>> VTDAddressSpace *vtd_as = value;
>> VTDPASIDCacheEntry *pc_entry = &vtd_as->pasid_cache_entry;
>> VTDPASIDEntry pe;
>> + IOMMUNotifier *n;
>> uint16_t did;
>>
>> if (vtd_dev_get_pe_from_pasid(vtd_as, &pe)) {
>> + if (!pc_entry->valid) {
>> + return;
>> + }
>> /*
>> * No valid pasid entry in guest memory. e.g. pasid entry was
>modified
>> * to be either all-zero or non-present. Either case means
>existing
>> * pasid cache should be invalidated.
>> */
>> pc_entry->valid = false;
>> +
>> + /*
>> + * When a pasid entry isn't valid any more, we should unmap all
>> + * mappings in shadow pages instantly to ensure DMA security.
>> + */
>> + IOMMU_NOTIFIER_FOREACH(n, &vtd_as->iommu) {
>> + vtd_address_space_unmap(vtd_as, n);
>> + }
>
>I just realized that if the MR is nodmar MR, the notifier is not
>registered at all. is it? So the above loop and below does not
>duplicate.
Yes, iommu notifier is a mechanism only for iommu MR.
Thanks
Zhenzhong
© 2016 - 2025 Red Hat, Inc.