[PATCH v2 15/19] intel_iommu: Replay pasid binds after context cache invalidation

Zhenzhong Duan posted 19 patches 4 months, 4 weeks ago
Maintainers: "Michael S. Tsirkin" <mst@redhat.com>, Jason Wang <jasowang@redhat.com>, Yi Liu <yi.l.liu@intel.com>, "Clément Mathieu--Drif" <clement.mathieu--drif@eviden.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>
There is a newer version of this series
[PATCH v2 15/19] intel_iommu: Replay pasid binds after context cache invalidation
Posted by Zhenzhong Duan 4 months, 4 weeks ago
From: Yi Liu <yi.l.liu@intel.com>

This replays guest pasid attachments after context cache invalidation.
This is a behavior to ensure safety. Actually, programmer should issue
pasid cache invalidation with proper granularity after issuing a context
cache invalidation.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 hw/i386/intel_iommu_internal.h |  1 +
 hw/i386/intel_iommu.c          | 51 ++++++++++++++++++++++++++++++++--
 hw/i386/trace-events           |  1 +
 3 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
index 92a533db54..b3e4aa23f1 100644
--- a/hw/i386/intel_iommu_internal.h
+++ b/hw/i386/intel_iommu_internal.h
@@ -575,6 +575,7 @@ typedef enum VTDPCInvType {
     VTD_PASID_CACHE_FORCE_RESET = 0,
     /* pasid cache invalidation rely on guest PASID entry */
     VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */
+    VTD_PASID_CACHE_DEVSI,      /* pasid cache device selective invalidation */
     VTD_PASID_CACHE_DOMSI,      /* pasid cache domain selective invalidation */
     VTD_PASID_CACHE_PASIDSI,    /* pasid cache pasid selective invalidation */
 } VTDPCInvType;
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 1c94a0033c..621b07aa02 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -92,6 +92,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s);
 static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
 
 static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
+static void vtd_pasid_cache_sync(IntelIOMMUState *s,
+                                 VTDPASIDCacheInfo *pc_info);
+static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
+                                  PCIBus *bus, uint16_t devfn);
 
 static void vtd_panic_require_caching_mode(void)
 {
@@ -2437,6 +2441,8 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
 
 static void vtd_context_global_invalidate(IntelIOMMUState *s)
 {
+    VTDPASIDCacheInfo pc_info = { .error_happened = false, };
+
     trace_vtd_inv_desc_cc_global();
     /* Protects context cache */
     vtd_iommu_lock(s);
@@ -2454,6 +2460,9 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
      * VT-d emulation codes.
      */
     vtd_iommu_replay_all(s);
+
+    pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
+    vtd_pasid_cache_sync(s, &pc_info);
 }
 
 #ifdef CONFIG_IOMMUFD
@@ -2696,6 +2705,21 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
              * happened.
              */
             vtd_address_space_sync(vtd_as);
+            /*
+             * Per spec, context flush should also followed with PASID
+             * cache and iotlb flush. Regards to a device selective
+             * context cache invalidation:
+             * if (emaulted_device)
+             *    invalidate pasid cache and pasid-based iotlb
+             * else if (assigned_device)
+             *    check if the device has been bound to any pasid
+             *    invoke pasid_unbind regards to each bound pasid
+             * Here, we have vtd_pasid_cache_devsi() to invalidate pasid
+             * caches, while for piotlb in QEMU, we don't have it yet, so
+             * no handling. For assigned device, host iommu driver would
+             * flush piotlb when a pasid unbind is pass down to it.
+             */
+             vtd_pasid_cache_devsi(s, vtd_as->bus, devfn);
         }
     }
 }
@@ -3447,6 +3471,11 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer value,
         /* Fall through */
     case VTD_PASID_CACHE_GLOBAL_INV:
         break;
+    case VTD_PASID_CACHE_DEVSI:
+        if (pc_info->bus != vtd_as->bus || pc_info->devfn != vtd_as->devfn) {
+            return false;
+        }
+        break;
     default:
         error_report("invalid pc_info->type");
         abort();
@@ -3640,6 +3669,11 @@ static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
     case VTD_PASID_CACHE_GLOBAL_INV:
         /* loop all assigned devices */
         break;
+    case VTD_PASID_CACHE_DEVSI:
+        walk_info.bus = pc_info->bus;
+        walk_info.devfn = pc_info->devfn;
+        vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
+        return;
     case VTD_PASID_CACHE_FORCE_RESET:
         /* For force reset, no need to go further replay */
         return;
@@ -3675,8 +3709,7 @@ static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
  * It includes updating the pasid cache in vIOMMU and updating the
  * pasid bindings per guest's latest pasid entry presence.
  */
-static void vtd_pasid_cache_sync(IntelIOMMUState *s,
-                                 VTDPASIDCacheInfo *pc_info)
+static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
 {
     if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
         return;
@@ -3737,6 +3770,20 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s,
     vtd_replay_guest_pasid_bindings(s, pc_info);
 }
 
+static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
+                                  PCIBus *bus, uint16_t devfn)
+{
+    VTDPASIDCacheInfo pc_info = { .error_happened = false, };
+
+    trace_vtd_pasid_cache_devsi(devfn);
+
+    pc_info.type = VTD_PASID_CACHE_DEVSI;
+    pc_info.bus = bus;
+    pc_info.devfn = devfn;
+
+    vtd_pasid_cache_sync(s, &pc_info);
+}
+
 static bool vtd_process_pasid_desc(IntelIOMMUState *s,
                                    VTDInvDesc *inv_desc)
 {
diff --git a/hw/i386/trace-events b/hw/i386/trace-events
index 1c31b9a873..830b11f68b 100644
--- a/hw/i386/trace-events
+++ b/hw/i386/trace-events
@@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
 vtd_pasid_cache_gsi(void) ""
 vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
 vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
+vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev: 0x%"PRIx16
 vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
 vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
 vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
-- 
2.34.1
Re: [PATCH v2 15/19] intel_iommu: Replay pasid binds after context cache invalidation
Posted by Eric Auger 4 months, 3 weeks ago

On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> This replays guest pasid attachments after context cache invalidation.
> This is a behavior to ensure safety. Actually, programmer should issue
> pasid cache invalidation with proper granularity after issuing a context
> cache invalidation.
so is it mandated to do the invalidation twice?
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  hw/i386/intel_iommu_internal.h |  1 +
>  hw/i386/intel_iommu.c          | 51 ++++++++++++++++++++++++++++++++--
>  hw/i386/trace-events           |  1 +
>  3 files changed, 51 insertions(+), 2 deletions(-)
>
> diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h
> index 92a533db54..b3e4aa23f1 100644
> --- a/hw/i386/intel_iommu_internal.h
> +++ b/hw/i386/intel_iommu_internal.h
> @@ -575,6 +575,7 @@ typedef enum VTDPCInvType {
>      VTD_PASID_CACHE_FORCE_RESET = 0,
>      /* pasid cache invalidation rely on guest PASID entry */
>      VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation */
> +    VTD_PASID_CACHE_DEVSI,      /* pasid cache device selective invalidation */
>      VTD_PASID_CACHE_DOMSI,      /* pasid cache domain selective invalidation */
>      VTD_PASID_CACHE_PASIDSI,    /* pasid cache pasid selective invalidation */
>  } VTDPCInvType;
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 1c94a0033c..621b07aa02 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -92,6 +92,10 @@ static void vtd_address_space_refresh_all(IntelIOMMUState *s);
>  static void vtd_address_space_unmap(VTDAddressSpace *as, IOMMUNotifier *n);
>  
>  static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> +                                 VTDPASIDCacheInfo *pc_info);
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> +                                  PCIBus *bus, uint16_t devfn);
>  
>  static void vtd_panic_require_caching_mode(void)
>  {
> @@ -2437,6 +2441,8 @@ static void vtd_iommu_replay_all(IntelIOMMUState *s)
>  
>  static void vtd_context_global_invalidate(IntelIOMMUState *s)
>  {
> +    VTDPASIDCacheInfo pc_info = { .error_happened = false, };
> +
>      trace_vtd_inv_desc_cc_global();
>      /* Protects context cache */
>      vtd_iommu_lock(s);
> @@ -2454,6 +2460,9 @@ static void vtd_context_global_invalidate(IntelIOMMUState *s)
>       * VT-d emulation codes.
>       */
>      vtd_iommu_replay_all(s);
> +
> +    pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
> +    vtd_pasid_cache_sync(s, &pc_info);
>  }
>  
>  #ifdef CONFIG_IOMMUFD
> @@ -2696,6 +2705,21 @@ static void vtd_context_device_invalidate(IntelIOMMUState *s,
>               * happened.
>               */
>              vtd_address_space_sync(vtd_as);
> +            /*
> +             * Per spec, context flush should also followed with PASID
be followed
> +             * cache and iotlb flush. Regards to a device selective
regarding to?
> +             * context cache invalidation:
> +             * if (emaulted_device)
emulatted
> +             *    invalidate pasid cache and pasid-based iotlb
> +             * else if (assigned_device)
> +             *    check if the device has been bound to any pasid
> +             *    invoke pasid_unbind regards to each bound pasid
> +             * Here, we have vtd_pasid_cache_devsi() to invalidate pasid
> +             * caches, while for piotlb in QEMU, we don't have it yet, so
> +             * no handling. For assigned device, host iommu driver would
> +             * flush piotlb when a pasid unbind is pass down to it.
> +             */
> +             vtd_pasid_cache_devsi(s, vtd_as->bus, devfn);
>          }
>      }
>  }
> @@ -3447,6 +3471,11 @@ static gboolean vtd_flush_pasid(gpointer key, gpointer value,
>          /* Fall through */
>      case VTD_PASID_CACHE_GLOBAL_INV:
>          break;
> +    case VTD_PASID_CACHE_DEVSI:
> +        if (pc_info->bus != vtd_as->bus || pc_info->devfn != vtd_as->devfn) {
> +            return false;
> +        }
> +        break;
>      default:
>          error_report("invalid pc_info->type");
>          abort();
> @@ -3640,6 +3669,11 @@ static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>      case VTD_PASID_CACHE_GLOBAL_INV:
>          /* loop all assigned devices */
>          break;
> +    case VTD_PASID_CACHE_DEVSI:
> +        walk_info.bus = pc_info->bus;
> +        walk_info.devfn = pc_info->devfn;
> +        vtd_replay_pasid_bind_for_dev(s, start, end, &walk_info);
> +        return;
>      case VTD_PASID_CACHE_FORCE_RESET:
>          /* For force reset, no need to go further replay */
>          return;
> @@ -3675,8 +3709,7 @@ static void vtd_replay_guest_pasid_bindings(IntelIOMMUState *s,
>   * It includes updating the pasid cache in vIOMMU and updating the
>   * pasid bindings per guest's latest pasid entry presence.
>   */
> -static void vtd_pasid_cache_sync(IntelIOMMUState *s,
> -                                 VTDPASIDCacheInfo *pc_info)
> +static void vtd_pasid_cache_sync(IntelIOMMUState *s, VTDPASIDCacheInfo *pc_info)
>  {
>      if (!s->flts || !s->root_scalable || !s->dmar_enabled) {
>          return;
> @@ -3737,6 +3770,20 @@ static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>      vtd_replay_guest_pasid_bindings(s, pc_info);
>  }
>  
> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
> +                                  PCIBus *bus, uint16_t devfn)
> +{
> +    VTDPASIDCacheInfo pc_info = { .error_happened = false, };
> +
> +    trace_vtd_pasid_cache_devsi(devfn);
> +
> +    pc_info.type = VTD_PASID_CACHE_DEVSI;
> +    pc_info.bus = bus;
> +    pc_info.devfn = devfn;
> +
> +    vtd_pasid_cache_sync(s, &pc_info);
> +}
> +
>  static bool vtd_process_pasid_desc(IntelIOMMUState *s,
>                                     VTDInvDesc *inv_desc)
>  {
> diff --git a/hw/i386/trace-events b/hw/i386/trace-events
> index 1c31b9a873..830b11f68b 100644
> --- a/hw/i386/trace-events
> +++ b/hw/i386/trace-events
> @@ -28,6 +28,7 @@ vtd_pasid_cache_reset(void) ""
>  vtd_pasid_cache_gsi(void) ""
>  vtd_pasid_cache_dsi(uint16_t domain) "Domain selective PC invalidation domain 0x%"PRIx16
>  vtd_pasid_cache_psi(uint16_t domain, uint32_t pasid) "PASID selective PC invalidation domain 0x%"PRIx16" pasid 0x%"PRIx32
> +vtd_pasid_cache_devsi(uint16_t devfn) "Dev selective PC invalidation dev: 0x%"PRIx16
>  vtd_re_not_present(uint8_t bus) "Root entry bus %"PRIu8" not present"
>  vtd_ce_not_present(uint8_t bus, uint8_t devfn) "Context entry bus %"PRIu8" devfn %"PRIu8" not present"
>  vtd_iotlb_page_hit(uint16_t sid, uint64_t addr, uint64_t slpte, uint16_t domain) "IOTLB page hit sid 0x%"PRIx16" iova 0x%"PRIx64" slpte 0x%"PRIx64" domain 0x%"PRIx16
Eric
RE: [PATCH v2 15/19] intel_iommu: Replay pasid binds after context cache invalidation
Posted by Duan, Zhenzhong 4 months, 3 weeks ago

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v2 15/19] intel_iommu: Replay pasid binds after context
>cache invalidation
>
>
>
>On 6/20/25 9:18 AM, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> This replays guest pasid attachments after context cache invalidation.
>> This is a behavior to ensure safety. Actually, programmer should issue
>> pasid cache invalidation with proper granularity after issuing a context
>> cache invalidation.
>so is it mandated to do the invalidation twice?

Yes, because we don't have a way to know if pasid cache invalidation following
context cache invalidation is duplicate one from guest.

It depends on if we want to support a guest with such flaw, if no need to support,
we can drop this patch.

>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  hw/i386/intel_iommu_internal.h |  1 +
>>  hw/i386/intel_iommu.c          | 51
>++++++++++++++++++++++++++++++++--
>>  hw/i386/trace-events           |  1 +
>>  3 files changed, 51 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/i386/intel_iommu_internal.h
>b/hw/i386/intel_iommu_internal.h
>> index 92a533db54..b3e4aa23f1 100644
>> --- a/hw/i386/intel_iommu_internal.h
>> +++ b/hw/i386/intel_iommu_internal.h
>> @@ -575,6 +575,7 @@ typedef enum VTDPCInvType {
>>      VTD_PASID_CACHE_FORCE_RESET = 0,
>>      /* pasid cache invalidation rely on guest PASID entry */
>>      VTD_PASID_CACHE_GLOBAL_INV, /* pasid cache global invalidation
>*/
>> +    VTD_PASID_CACHE_DEVSI,      /* pasid cache device selective
>invalidation */
>>      VTD_PASID_CACHE_DOMSI,      /* pasid cache domain selective
>invalidation */
>>      VTD_PASID_CACHE_PASIDSI,    /* pasid cache pasid selective
>invalidation */
>>  } VTDPCInvType;
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index 1c94a0033c..621b07aa02 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -92,6 +92,10 @@ static void
>vtd_address_space_refresh_all(IntelIOMMUState *s);
>>  static void vtd_address_space_unmap(VTDAddressSpace *as,
>IOMMUNotifier *n);
>>
>>  static void vtd_pasid_cache_reset_locked(IntelIOMMUState *s);
>> +static void vtd_pasid_cache_sync(IntelIOMMUState *s,
>> +                                 VTDPASIDCacheInfo *pc_info);
>> +static void vtd_pasid_cache_devsi(IntelIOMMUState *s,
>> +                                  PCIBus *bus, uint16_t devfn);
>>
>>  static void vtd_panic_require_caching_mode(void)
>>  {
>> @@ -2437,6 +2441,8 @@ static void
>vtd_iommu_replay_all(IntelIOMMUState *s)
>>
>>  static void vtd_context_global_invalidate(IntelIOMMUState *s)
>>  {
>> +    VTDPASIDCacheInfo pc_info = { .error_happened = false, };
>> +
>>      trace_vtd_inv_desc_cc_global();
>>      /* Protects context cache */
>>      vtd_iommu_lock(s);
>> @@ -2454,6 +2460,9 @@ static void
>vtd_context_global_invalidate(IntelIOMMUState *s)
>>       * VT-d emulation codes.
>>       */
>>      vtd_iommu_replay_all(s);
>> +
>> +    pc_info.type = VTD_PASID_CACHE_GLOBAL_INV;
>> +    vtd_pasid_cache_sync(s, &pc_info);
>>  }
>>
>>  #ifdef CONFIG_IOMMUFD
>> @@ -2696,6 +2705,21 @@ static void
>vtd_context_device_invalidate(IntelIOMMUState *s,
>>               * happened.
>>               */
>>              vtd_address_space_sync(vtd_as);
>> +            /*
>> +             * Per spec, context flush should also followed with PASID
>be followed
>> +             * cache and iotlb flush. Regards to a device selective
>regarding to?
>> +             * context cache invalidation:
>> +             * if (emaulted_device)
>emulated

Will fix above three.

Thanks
Zhenzhong