[PATCH rfcv2 14/18] intel_iommu: Add a framework to check and sync host IOMMU cap/ecap

Zhenzhong Duan posted 18 patches 9 months, 4 weeks ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, "Michael S. Tsirkin" <mst@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Ani Sinha <anisinha@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Peter Xu <peterx@redhat.com>, Jason Wang <jasowang@redhat.com>, Thomas Huth <thuth@redhat.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Tony Krowiak <akrowiak@linux.ibm.com>, Halil Pasic <pasic@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>
There is a newer version of this series
[PATCH rfcv2 14/18] intel_iommu: Add a framework to check and sync host IOMMU cap/ecap
Posted by Zhenzhong Duan 9 months, 4 weeks ago
From: Yi Liu <yi.l.liu@intel.com>

Add a framework to check and synchronize host IOMMU cap/ecap with
vIOMMU cap/ecap.

The sequence will be:

vtd_cap_init() initializes iommu->cap/ecap.
vtd_check_hdev() update iommu->cap/ecap based on host cap/ecap.
iommu->cap_frozen set when machine create done, iommu->cap/ecap become readonly.

Implementation details for different backends will be in following patches.

Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
 include/hw/i386/intel_iommu.h |  1 +
 hw/i386/intel_iommu.c         | 41 ++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
index bbc7b96add..c71a133820 100644
--- a/include/hw/i386/intel_iommu.h
+++ b/include/hw/i386/intel_iommu.h
@@ -283,6 +283,7 @@ struct IntelIOMMUState {
 
     uint64_t cap;                   /* The value of capability reg */
     uint64_t ecap;                  /* The value of extended capability reg */
+    bool cap_frozen;                /* cap/ecap become read-only after frozen */
 
     uint32_t context_cache_gen;     /* Should be in [1,MAX] */
     GHashTable *iotlb;              /* IOTLB */
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index ffa1ad6429..7ed2b79669 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3819,6 +3819,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
     return vtd_dev_as;
 }
 
+static int vtd_check_legacy_hdev(IntelIOMMUState *s,
+                                 IOMMULegacyDevice *ldev,
+                                 Error **errp)
+{
+    return 0;
+}
+
+static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
+                                  IOMMUFDDevice *idev,
+                                  Error **errp)
+{
+    return 0;
+}
+
+static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hdev,
+                          Error **errp)
+{
+    HostIOMMUDevice *base_dev = vtd_hdev->dev;
+
+    if (base_dev->type == HID_LEGACY) {
+        return vtd_check_legacy_hdev(s, vtd_hdev->ldev, errp);
+    }
+    return vtd_check_iommufd_hdev(s, vtd_hdev->idev, errp);
+}
+
 static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
                                     HostIOMMUDevice *base_dev, Error **errp)
 {
@@ -3829,6 +3854,7 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
         .devfn = devfn,
     };
     struct vtd_as_key *new_key;
+    int ret;
 
     assert(base_dev);
 
@@ -3848,6 +3874,13 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
     vtd_hdev->iommu_state = s;
     vtd_hdev->dev = base_dev;
 
+    ret = vtd_check_hdev(s, vtd_hdev, errp);
+    if (ret) {
+        g_free(vtd_hdev);
+        vtd_iommu_unlock(s);
+        return ret;
+    }
+
     new_key = g_malloc(sizeof(*new_key));
     new_key->bus = bus;
     new_key->devfn = devfn;
@@ -4083,7 +4116,9 @@ static void vtd_init(IntelIOMMUState *s)
     s->iq_dw = false;
     s->next_frcd_reg = 0;
 
-    vtd_cap_init(s);
+    if (!s->cap_frozen) {
+        vtd_cap_init(s);
+    }
 
     /*
      * Rsvd field masks for spte
@@ -4254,6 +4289,10 @@ static int vtd_machine_done_notify_one(Object *child, void *unused)
 
 static void vtd_machine_done_hook(Notifier *notifier, void *unused)
 {
+    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
+
+    iommu->cap_frozen = true;
+
     object_child_foreach_recursive(object_get_root(),
                                    vtd_machine_done_notify_one, NULL);
 }
-- 
2.34.1
Re: [PATCH rfcv2 14/18] intel_iommu: Add a framework to check and sync host IOMMU cap/ecap
Posted by Eric Auger 9 months, 1 week ago

On 2/1/24 08:28, Zhenzhong Duan wrote:
> From: Yi Liu <yi.l.liu@intel.com>
>
> Add a framework to check and synchronize host IOMMU cap/ecap with
> vIOMMU cap/ecap.
>
> The sequence will be:
>
> vtd_cap_init() initializes iommu->cap/ecap.
> vtd_check_hdev() update iommu->cap/ecap based on host cap/ecap.
> iommu->cap_frozen set when machine create done, iommu->cap/ecap become readonly.
>
> Implementation details for different backends will be in following patches.
>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
> ---
>  include/hw/i386/intel_iommu.h |  1 +
>  hw/i386/intel_iommu.c         | 41 ++++++++++++++++++++++++++++++++++-
>  2 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h
> index bbc7b96add..c71a133820 100644
> --- a/include/hw/i386/intel_iommu.h
> +++ b/include/hw/i386/intel_iommu.h
> @@ -283,6 +283,7 @@ struct IntelIOMMUState {
>  
>      uint64_t cap;                   /* The value of capability reg */
>      uint64_t ecap;                  /* The value of extended capability reg */
> +    bool cap_frozen;                /* cap/ecap become read-only after frozen */
>  
>      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>      GHashTable *iotlb;              /* IOTLB */
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index ffa1ad6429..7ed2b79669 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3819,6 +3819,31 @@ VTDAddressSpace *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>      return vtd_dev_as;
>  }
>  
> +static int vtd_check_legacy_hdev(IntelIOMMUState *s,
> +                                 IOMMULegacyDevice *ldev,
> +                                 Error **errp)
> +{
> +    return 0;
> +}
> +
> +static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
> +                                  IOMMUFDDevice *idev,
> +                                  Error **errp)
> +{
> +    return 0;
> +}
> +
> +static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice *vtd_hdev,
> +                          Error **errp)
> +{
> +    HostIOMMUDevice *base_dev = vtd_hdev->dev;
> +
> +    if (base_dev->type == HID_LEGACY) {
> +        return vtd_check_legacy_hdev(s, vtd_hdev->ldev, errp);
> +    }
> +    return vtd_check_iommufd_hdev(s, vtd_hdev->idev, errp);
Couldn't we have HostIOMMUDevice ops instead of having this check here?

Eric
> +}
> +
>  static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>                                      HostIOMMUDevice *base_dev, Error **errp)
>  {
> @@ -3829,6 +3854,7 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>          .devfn = devfn,
>      };
>      struct vtd_as_key *new_key;
> +    int ret;
>  
>      assert(base_dev);
>  
> @@ -3848,6 +3874,13 @@ static int vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>      vtd_hdev->iommu_state = s;
>      vtd_hdev->dev = base_dev;
>  
> +    ret = vtd_check_hdev(s, vtd_hdev, errp);
> +    if (ret) {
> +        g_free(vtd_hdev);
> +        vtd_iommu_unlock(s);
> +        return ret;
> +    }
> +
>      new_key = g_malloc(sizeof(*new_key));
>      new_key->bus = bus;
>      new_key->devfn = devfn;
> @@ -4083,7 +4116,9 @@ static void vtd_init(IntelIOMMUState *s)
>      s->iq_dw = false;
>      s->next_frcd_reg = 0;
>  
> -    vtd_cap_init(s);
> +    if (!s->cap_frozen) {
> +        vtd_cap_init(s);
> +    }
>  
>      /*
>       * Rsvd field masks for spte
> @@ -4254,6 +4289,10 @@ static int vtd_machine_done_notify_one(Object *child, void *unused)
>  
>  static void vtd_machine_done_hook(Notifier *notifier, void *unused)
>  {
> +    IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
> +
> +    iommu->cap_frozen = true;
> +
>      object_child_foreach_recursive(object_get_root(),
>                                     vtd_machine_done_notify_one, NULL);
>  }
RE: [PATCH rfcv2 14/18] intel_iommu: Add a framework to check and sync host IOMMU cap/ecap
Posted by Duan, Zhenzhong 9 months ago

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH rfcv2 14/18] intel_iommu: Add a framework to check
>and sync host IOMMU cap/ecap
>
>
>
>On 2/1/24 08:28, Zhenzhong Duan wrote:
>> From: Yi Liu <yi.l.liu@intel.com>
>>
>> Add a framework to check and synchronize host IOMMU cap/ecap with
>> vIOMMU cap/ecap.
>>
>> The sequence will be:
>>
>> vtd_cap_init() initializes iommu->cap/ecap.
>> vtd_check_hdev() update iommu->cap/ecap based on host cap/ecap.
>> iommu->cap_frozen set when machine create done, iommu->cap/ecap
>become readonly.
>>
>> Implementation details for different backends will be in following patches.
>>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>> ---
>>  include/hw/i386/intel_iommu.h |  1 +
>>  hw/i386/intel_iommu.c         | 41
>++++++++++++++++++++++++++++++++++-
>>  2 files changed, 41 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/hw/i386/intel_iommu.h
>b/include/hw/i386/intel_iommu.h
>> index bbc7b96add..c71a133820 100644
>> --- a/include/hw/i386/intel_iommu.h
>> +++ b/include/hw/i386/intel_iommu.h
>> @@ -283,6 +283,7 @@ struct IntelIOMMUState {
>>
>>      uint64_t cap;                   /* The value of capability reg */
>>      uint64_t ecap;                  /* The value of extended capability reg */
>> +    bool cap_frozen;                /* cap/ecap become read-only after frozen */
>>
>>      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>>      GHashTable *iotlb;              /* IOTLB */
>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>> index ffa1ad6429..7ed2b79669 100644
>> --- a/hw/i386/intel_iommu.c
>> +++ b/hw/i386/intel_iommu.c
>> @@ -3819,6 +3819,31 @@ VTDAddressSpace
>*vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>      return vtd_dev_as;
>>  }
>>
>> +static int vtd_check_legacy_hdev(IntelIOMMUState *s,
>> +                                 IOMMULegacyDevice *ldev,
>> +                                 Error **errp)
>> +{
>> +    return 0;
>> +}
>> +
>> +static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
>> +                                  IOMMUFDDevice *idev,
>> +                                  Error **errp)
>> +{
>> +    return 0;
>> +}
>> +
>> +static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice
>*vtd_hdev,
>> +                          Error **errp)
>> +{
>> +    HostIOMMUDevice *base_dev = vtd_hdev->dev;
>> +
>> +    if (base_dev->type == HID_LEGACY) {
>> +        return vtd_check_legacy_hdev(s, vtd_hdev->ldev, errp);
>> +    }
>> +    return vtd_check_iommufd_hdev(s, vtd_hdev->idev, errp);
>Couldn't we have HostIOMMUDevice ops instead of having this check here?

Hmm, not sure if this is deserved. If we define such ops, it has only one function
check_hdev and we still need to check base_dev->type to assign different
function to HostIOMMUDevice.ops.check_hdev in vtd_dev_set_iommu_device().

Thanks
Zhenzhong
Re: [PATCH rfcv2 14/18] intel_iommu: Add a framework to check and sync host IOMMU cap/ecap
Posted by Eric Auger 9 months ago
Hi Zhenzhong,

On 2/26/24 08:36, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH rfcv2 14/18] intel_iommu: Add a framework to check
>> and sync host IOMMU cap/ecap
>>
>>
>>
>> On 2/1/24 08:28, Zhenzhong Duan wrote:
>>> From: Yi Liu <yi.l.liu@intel.com>
>>>
>>> Add a framework to check and synchronize host IOMMU cap/ecap with
>>> vIOMMU cap/ecap.
>>>
>>> The sequence will be:
>>>
>>> vtd_cap_init() initializes iommu->cap/ecap.
>>> vtd_check_hdev() update iommu->cap/ecap based on host cap/ecap.
>>> iommu->cap_frozen set when machine create done, iommu->cap/ecap
>> become readonly.
>>> Implementation details for different backends will be in following patches.
>>>
>>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>>> Signed-off-by: Yi Sun <yi.y.sun@linux.intel.com>
>>> Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
>>> ---
>>>  include/hw/i386/intel_iommu.h |  1 +
>>>  hw/i386/intel_iommu.c         | 41
>> ++++++++++++++++++++++++++++++++++-
>>>  2 files changed, 41 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/hw/i386/intel_iommu.h
>> b/include/hw/i386/intel_iommu.h
>>> index bbc7b96add..c71a133820 100644
>>> --- a/include/hw/i386/intel_iommu.h
>>> +++ b/include/hw/i386/intel_iommu.h
>>> @@ -283,6 +283,7 @@ struct IntelIOMMUState {
>>>
>>>      uint64_t cap;                   /* The value of capability reg */
>>>      uint64_t ecap;                  /* The value of extended capability reg */
>>> +    bool cap_frozen;                /* cap/ecap become read-only after frozen */
>>>
>>>      uint32_t context_cache_gen;     /* Should be in [1,MAX] */
>>>      GHashTable *iotlb;              /* IOTLB */
>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
>>> index ffa1ad6429..7ed2b79669 100644
>>> --- a/hw/i386/intel_iommu.c
>>> +++ b/hw/i386/intel_iommu.c
>>> @@ -3819,6 +3819,31 @@ VTDAddressSpace
>> *vtd_find_add_as(IntelIOMMUState *s, PCIBus *bus,
>>>      return vtd_dev_as;
>>>  }
>>>
>>> +static int vtd_check_legacy_hdev(IntelIOMMUState *s,
>>> +                                 IOMMULegacyDevice *ldev,
>>> +                                 Error **errp)
>>> +{
>>> +    return 0;
>>> +}
>>> +
>>> +static int vtd_check_iommufd_hdev(IntelIOMMUState *s,
>>> +                                  IOMMUFDDevice *idev,
>>> +                                  Error **errp)
>>> +{
>>> +    return 0;
>>> +}
>>> +
>>> +static int vtd_check_hdev(IntelIOMMUState *s, VTDHostIOMMUDevice
>> *vtd_hdev,
>>> +                          Error **errp)
>>> +{
>>> +    HostIOMMUDevice *base_dev = vtd_hdev->dev;
>>> +
>>> +    if (base_dev->type == HID_LEGACY) {
>>> +        return vtd_check_legacy_hdev(s, vtd_hdev->ldev, errp);
>>> +    }
>>> +    return vtd_check_iommufd_hdev(s, vtd_hdev->idev, errp);
>> Couldn't we have HostIOMMUDevice ops instead of having this check here?
> Hmm, not sure if this is deserved. If we define such ops, it has only one function
> check_hdev and we still need to check base_dev->type to assign different
> function to HostIOMMUDevice.ops.check_hdev in vtd_dev_set_iommu_device().
OK maybe this is over engineered. Drop that comment for now

Thanks

Eric
>
> Thanks
> Zhenzhong