[PATCH v3 4/7] virtio-iommu: Compute host reserved regions

Eric Auger posted 7 patches 5 months, 2 weeks ago
There is a newer version of this series
[PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Eric Auger 5 months, 2 weeks ago
Compute the host reserved regions in virtio_iommu_set_iommu_device().
The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
The virtio_iommu_set_host_iova_ranges() helper turns usable regions
into complementary reserved regions while testing the inclusion
into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
implementation of virtio_iommu_set_iova_ranges() which will be
removed in subsequent patches. rebuild_resv_regions() is just moved.

Signed-off-by: Eric Auger <eric.auger@redhat.com>

---

- added g_assert(!sdev->probe_done)
---
 hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++---------
 1 file changed, 112 insertions(+), 34 deletions(-)

diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
index db842555c8..04474ebd74 100644
--- a/hw/virtio/virtio-iommu.c
+++ b/hw/virtio/virtio-iommu.c
@@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU *viommu, PCIBus *bus, int devfn) {
     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
 }
 
+/**
+ * rebuild_resv_regions: rebuild resv regions with both the
+ * info of host resv ranges and property set resv ranges
+ */
+static int rebuild_resv_regions(IOMMUDevice *sdev)
+{
+    GList *l;
+    int i = 0;
+
+    /* free the existing list and rebuild it from scratch */
+    g_list_free_full(sdev->resv_regions, g_free);
+    sdev->resv_regions = NULL;
+
+    /* First add host reserved regions if any, all tagged as RESERVED */
+    for (l = sdev->host_resv_ranges; l; l = l->next) {
+        ReservedRegion *reg = g_new0(ReservedRegion, 1);
+        Range *r = (Range *)l->data;
+
+        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
+        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
+        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
+        trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
+                                             range_lob(&reg->range),
+                                             range_upb(&reg->range));
+        i++;
+    }
+    /*
+     * then add higher priority reserved regions set by the machine
+     * through properties
+     */
+    add_prop_resv_regions(sdev);
+    return 0;
+}
+
+static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus *bus,
+                                             int devfn, GList *iova_ranges,
+                                             Error **errp)
+{
+    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
+    IOMMUDevice *sdev;
+    GList *current_ranges;
+    GList *l, *tmp, *new_ranges = NULL;
+    int ret = -EINVAL;
+
+    if (!sbus) {
+        error_report("%s no sbus", __func__);
+    }
+
+    sdev = sbus->pbdev[devfn];
+
+    current_ranges = sdev->host_resv_ranges;
+
+    g_assert(!sdev->probe_done);
+
+    /* check that each new resv region is included in an existing one */
+    if (sdev->host_resv_ranges) {
+        range_inverse_array(iova_ranges,
+                            &new_ranges,
+                            0, UINT64_MAX);
+
+        for (tmp = new_ranges; tmp; tmp = tmp->next) {
+            Range *newr = (Range *)tmp->data;
+            bool included = false;
+
+            for (l = current_ranges; l; l = l->next) {
+                Range * r = (Range *)l->data;
+
+                if (range_contains_range(r, newr)) {
+                    included = true;
+                    break;
+                }
+            }
+            if (!included) {
+                goto error;
+            }
+        }
+        /* all new reserved ranges are included in existing ones */
+        ret = 0;
+        goto out;
+    }
+
+    range_inverse_array(iova_ranges,
+                        &sdev->host_resv_ranges,
+                        0, UINT64_MAX);
+    rebuild_resv_regions(sdev);
+
+    return 0;
+error:
+    error_setg(errp, "%s Conflicting host reserved ranges set!",
+               __func__);
+out:
+    g_list_free_full(new_ranges, g_free);
+    return ret;
+}
+
 static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
                                           HostIOMMUDevice *hiod, Error **errp)
 {
     VirtIOIOMMU *viommu = opaque;
     VirtioHostIOMMUDevice *vhiod;
+    HostIOMMUDeviceClass *hiodc = HOST_IOMMU_DEVICE_GET_CLASS(hiod);
     struct hiod_key *new_key;
+    GList *host_iova_ranges = NULL;
 
     assert(hiod);
 
@@ -513,6 +610,20 @@ static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
         return false;
     }
 
+    if (hiodc->get_iova_ranges) {
+        int ret;
+        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
+        if (!host_iova_ranges) {
+            return true; /* some old kernels may not support that capability */
+        }
+        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
+                                                host_iova_ranges, errp);
+        if (ret) {
+            g_list_free_full(host_iova_ranges, g_free);
+            return false;
+        }
+    }
+
     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
     vhiod->bus = bus;
     vhiod->devfn = (uint8_t)devfn;
@@ -525,6 +636,7 @@ static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
 
     object_ref(hiod);
     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
+    g_list_free_full(host_iova_ranges, g_free);
 
     return true;
 }
@@ -1246,40 +1358,6 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
     return 0;
 }
 
-/**
- * rebuild_resv_regions: rebuild resv regions with both the
- * info of host resv ranges and property set resv ranges
- */
-static int rebuild_resv_regions(IOMMUDevice *sdev)
-{
-    GList *l;
-    int i = 0;
-
-    /* free the existing list and rebuild it from scratch */
-    g_list_free_full(sdev->resv_regions, g_free);
-    sdev->resv_regions = NULL;
-
-    /* First add host reserved regions if any, all tagged as RESERVED */
-    for (l = sdev->host_resv_ranges; l; l = l->next) {
-        ReservedRegion *reg = g_new0(ReservedRegion, 1);
-        Range *r = (Range *)l->data;
-
-        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
-        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
-        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
-        trace_virtio_iommu_host_resv_regions(sdev->iommu_mr.parent_obj.name, i,
-                                             range_lob(&reg->range),
-                                             range_upb(&reg->range));
-        i++;
-    }
-    /*
-     * then add higher priority reserved regions set by the machine
-     * through properties
-     */
-    add_prop_resv_regions(sdev);
-    return 0;
-}
-
 /**
  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
  *
-- 
2.41.0
RE: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Duan, Zhenzhong 5 months, 2 weeks ago
Hi Eric,

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>
>Compute the host reserved regions in virtio_iommu_set_iommu_device().
>The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
>The virtio_iommu_set_host_iova_ranges() helper turns usable regions
>into complementary reserved regions while testing the inclusion
>into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
>implementation of virtio_iommu_set_iova_ranges() which will be
>removed in subsequent patches. rebuild_resv_regions() is just moved.
>
>Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
>---
>
>- added g_assert(!sdev->probe_done)
>---
> hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++--------
>-
> 1 file changed, 112 insertions(+), 34 deletions(-)
>
>diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>index db842555c8..04474ebd74 100644
>--- a/hw/virtio/virtio-iommu.c
>+++ b/hw/virtio/virtio-iommu.c
>@@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU
>*viommu, PCIBus *bus, int devfn) {
>     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
> }
>
>+/**
>+ * rebuild_resv_regions: rebuild resv regions with both the
>+ * info of host resv ranges and property set resv ranges
>+ */
>+static int rebuild_resv_regions(IOMMUDevice *sdev)
>+{
>+    GList *l;
>+    int i = 0;
>+
>+    /* free the existing list and rebuild it from scratch */
>+    g_list_free_full(sdev->resv_regions, g_free);
>+    sdev->resv_regions = NULL;
>+
>+    /* First add host reserved regions if any, all tagged as RESERVED */
>+    for (l = sdev->host_resv_ranges; l; l = l->next) {
>+        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>+        Range *r = (Range *)l->data;
>+
>+        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>+        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>+        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
>+        trace_virtio_iommu_host_resv_regions(sdev-
>>iommu_mr.parent_obj.name, i,
>+                                             range_lob(&reg->range),
>+                                             range_upb(&reg->range));
>+        i++;
>+    }
>+    /*
>+     * then add higher priority reserved regions set by the machine
>+     * through properties
>+     */
>+    add_prop_resv_regions(sdev);
>+    return 0;
>+}
>+
>+static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus
>*bus,
>+                                             int devfn, GList *iova_ranges,
>+                                             Error **errp)
>+{
>+    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);

Here the bus/devfn parameters are real device BDF not aliased one,
But used to index s->as_by_busptr which expect aliased bus/devfn.

Do we need a translation of bus/devfn?

Thanks
Zhenzhong

>+    IOMMUDevice *sdev;
>+    GList *current_ranges;
>+    GList *l, *tmp, *new_ranges = NULL;
>+    int ret = -EINVAL;
>+
>+    if (!sbus) {
>+        error_report("%s no sbus", __func__);
>+    }
>+
>+    sdev = sbus->pbdev[devfn];
>+
>+    current_ranges = sdev->host_resv_ranges;
>+
>+    g_assert(!sdev->probe_done);
>+
>+    /* check that each new resv region is included in an existing one */
>+    if (sdev->host_resv_ranges) {
>+        range_inverse_array(iova_ranges,
>+                            &new_ranges,
>+                            0, UINT64_MAX);
>+
>+        for (tmp = new_ranges; tmp; tmp = tmp->next) {
>+            Range *newr = (Range *)tmp->data;
>+            bool included = false;
>+
>+            for (l = current_ranges; l; l = l->next) {
>+                Range * r = (Range *)l->data;
>+
>+                if (range_contains_range(r, newr)) {
>+                    included = true;
>+                    break;
>+                }
>+            }
>+            if (!included) {
>+                goto error;
>+            }
>+        }
>+        /* all new reserved ranges are included in existing ones */
>+        ret = 0;
>+        goto out;
>+    }
>+
>+    range_inverse_array(iova_ranges,
>+                        &sdev->host_resv_ranges,
>+                        0, UINT64_MAX);
>+    rebuild_resv_regions(sdev);
>+
>+    return 0;
>+error:
>+    error_setg(errp, "%s Conflicting host reserved ranges set!",
>+               __func__);
>+out:
>+    g_list_free_full(new_ranges, g_free);
>+    return ret;
>+}
>+
> static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque,
>int devfn,
>                                           HostIOMMUDevice *hiod, Error **errp)
> {
>     VirtIOIOMMU *viommu = opaque;
>     VirtioHostIOMMUDevice *vhiod;
>+    HostIOMMUDeviceClass *hiodc =
>HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>     struct hiod_key *new_key;
>+    GList *host_iova_ranges = NULL;
>
>     assert(hiod);
>
>@@ -513,6 +610,20 @@ static bool
>virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>         return false;
>     }
>
>+    if (hiodc->get_iova_ranges) {
>+        int ret;
>+        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
>+        if (!host_iova_ranges) {
>+            return true; /* some old kernels may not support that capability */
>+        }
>+        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
>+                                                host_iova_ranges, errp);
>+        if (ret) {
>+            g_list_free_full(host_iova_ranges, g_free);
>+            return false;
>+        }
>+    }
>+
>     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
>     vhiod->bus = bus;
>     vhiod->devfn = (uint8_t)devfn;
>@@ -525,6 +636,7 @@ static bool
>virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>
>     object_ref(hiod);
>     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
>+    g_list_free_full(host_iova_ranges, g_free);
>
>     return true;
> }
>@@ -1246,40 +1358,6 @@ static int
>virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
>     return 0;
> }
>
>-/**
>- * rebuild_resv_regions: rebuild resv regions with both the
>- * info of host resv ranges and property set resv ranges
>- */
>-static int rebuild_resv_regions(IOMMUDevice *sdev)
>-{
>-    GList *l;
>-    int i = 0;
>-
>-    /* free the existing list and rebuild it from scratch */
>-    g_list_free_full(sdev->resv_regions, g_free);
>-    sdev->resv_regions = NULL;
>-
>-    /* First add host reserved regions if any, all tagged as RESERVED */
>-    for (l = sdev->host_resv_ranges; l; l = l->next) {
>-        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>-        Range *r = (Range *)l->data;
>-
>-        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>-        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>-        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
>-        trace_virtio_iommu_host_resv_regions(sdev-
>>iommu_mr.parent_obj.name, i,
>-                                             range_lob(&reg->range),
>-                                             range_upb(&reg->range));
>-        i++;
>-    }
>-    /*
>-     * then add higher priority reserved regions set by the machine
>-     * through properties
>-     */
>-    add_prop_resv_regions(sdev);
>-    return 0;
>-}
>-
> /**
>  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
>  *
>--
>2.41.0
Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Eric Auger 5 months, 2 weeks ago

On 6/13/24 12:00, Duan, Zhenzhong wrote:
> Hi Eric,
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>
>> Compute the host reserved regions in virtio_iommu_set_iommu_device().
>> The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
>> The virtio_iommu_set_host_iova_ranges() helper turns usable regions
>> into complementary reserved regions while testing the inclusion
>> into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
>> implementation of virtio_iommu_set_iova_ranges() which will be
>> removed in subsequent patches. rebuild_resv_regions() is just moved.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> - added g_assert(!sdev->probe_done)
>> ---
>> hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++--------
>> -
>> 1 file changed, 112 insertions(+), 34 deletions(-)
>>
>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>> index db842555c8..04474ebd74 100644
>> --- a/hw/virtio/virtio-iommu.c
>> +++ b/hw/virtio/virtio-iommu.c
>> @@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU
>> *viommu, PCIBus *bus, int devfn) {
>>     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
>> }
>>
>> +/**
>> + * rebuild_resv_regions: rebuild resv regions with both the
>> + * info of host resv ranges and property set resv ranges
>> + */
>> +static int rebuild_resv_regions(IOMMUDevice *sdev)
>> +{
>> +    GList *l;
>> +    int i = 0;
>> +
>> +    /* free the existing list and rebuild it from scratch */
>> +    g_list_free_full(sdev->resv_regions, g_free);
>> +    sdev->resv_regions = NULL;
>> +
>> +    /* First add host reserved regions if any, all tagged as RESERVED */
>> +    for (l = sdev->host_resv_ranges; l; l = l->next) {
>> +        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>> +        Range *r = (Range *)l->data;
>> +
>> +        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>> +        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>> +        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
>> +        trace_virtio_iommu_host_resv_regions(sdev-
>>> iommu_mr.parent_obj.name, i,
>> +                                             range_lob(&reg->range),
>> +                                             range_upb(&reg->range));
>> +        i++;
>> +    }
>> +    /*
>> +     * then add higher priority reserved regions set by the machine
>> +     * through properties
>> +     */
>> +    add_prop_resv_regions(sdev);
>> +    return 0;
>> +}
>> +
>> +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus
>> *bus,
>> +                                             int devfn, GList *iova_ranges,
>> +                                             Error **errp)
>> +{
>> +    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
> Here the bus/devfn parameters are real device BDF not aliased one,
> But used to index s->as_by_busptr which expect aliased bus/devfn.
>
> Do we need a translation of bus/devfn?

Hum that's a good point actually. I need to further study that. that's
not easy to translate, is it?

Now I am not totally sure why we don't use the alias as well for
HostIOMMUDevices or at least store the aliased bdf.

Thanks

Eric
>
> Thanks
> Zhenzhong
>
>> +    IOMMUDevice *sdev;
>> +    GList *current_ranges;
>> +    GList *l, *tmp, *new_ranges = NULL;
>> +    int ret = -EINVAL;
>> +
>> +    if (!sbus) {
>> +        error_report("%s no sbus", __func__);
>> +    }
>> +
>> +    sdev = sbus->pbdev[devfn];
>> +
>> +    current_ranges = sdev->host_resv_ranges;
>> +
>> +    g_assert(!sdev->probe_done);
>> +
>> +    /* check that each new resv region is included in an existing one */
>> +    if (sdev->host_resv_ranges) {
>> +        range_inverse_array(iova_ranges,
>> +                            &new_ranges,
>> +                            0, UINT64_MAX);
>> +
>> +        for (tmp = new_ranges; tmp; tmp = tmp->next) {
>> +            Range *newr = (Range *)tmp->data;
>> +            bool included = false;
>> +
>> +            for (l = current_ranges; l; l = l->next) {
>> +                Range * r = (Range *)l->data;
>> +
>> +                if (range_contains_range(r, newr)) {
>> +                    included = true;
>> +                    break;
>> +                }
>> +            }
>> +            if (!included) {
>> +                goto error;
>> +            }
>> +        }
>> +        /* all new reserved ranges are included in existing ones */
>> +        ret = 0;
>> +        goto out;
>> +    }
>> +
>> +    range_inverse_array(iova_ranges,
>> +                        &sdev->host_resv_ranges,
>> +                        0, UINT64_MAX);
>> +    rebuild_resv_regions(sdev);
>> +
>> +    return 0;
>> +error:
>> +    error_setg(errp, "%s Conflicting host reserved ranges set!",
>> +               __func__);
>> +out:
>> +    g_list_free_full(new_ranges, g_free);
>> +    return ret;
>> +}
>> +
>> static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque,
>> int devfn,
>>                                           HostIOMMUDevice *hiod, Error **errp)
>> {
>>     VirtIOIOMMU *viommu = opaque;
>>     VirtioHostIOMMUDevice *vhiod;
>> +    HostIOMMUDeviceClass *hiodc =
>> HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>     struct hiod_key *new_key;
>> +    GList *host_iova_ranges = NULL;
>>
>>     assert(hiod);
>>
>> @@ -513,6 +610,20 @@ static bool
>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>         return false;
>>     }
>>
>> +    if (hiodc->get_iova_ranges) {
>> +        int ret;
>> +        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
>> +        if (!host_iova_ranges) {
>> +            return true; /* some old kernels may not support that capability */
>> +        }
>> +        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
>> +                                                host_iova_ranges, errp);
>> +        if (ret) {
>> +            g_list_free_full(host_iova_ranges, g_free);
>> +            return false;
>> +        }
>> +    }
>> +
>>     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
>>     vhiod->bus = bus;
>>     vhiod->devfn = (uint8_t)devfn;
>> @@ -525,6 +636,7 @@ static bool
>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>
>>     object_ref(hiod);
>>     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
>> +    g_list_free_full(host_iova_ranges, g_free);
>>
>>     return true;
>> }
>> @@ -1246,40 +1358,6 @@ static int
>> virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
>>     return 0;
>> }
>>
>> -/**
>> - * rebuild_resv_regions: rebuild resv regions with both the
>> - * info of host resv ranges and property set resv ranges
>> - */
>> -static int rebuild_resv_regions(IOMMUDevice *sdev)
>> -{
>> -    GList *l;
>> -    int i = 0;
>> -
>> -    /* free the existing list and rebuild it from scratch */
>> -    g_list_free_full(sdev->resv_regions, g_free);
>> -    sdev->resv_regions = NULL;
>> -
>> -    /* First add host reserved regions if any, all tagged as RESERVED */
>> -    for (l = sdev->host_resv_ranges; l; l = l->next) {
>> -        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>> -        Range *r = (Range *)l->data;
>> -
>> -        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>> -        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>> -        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions, reg);
>> -        trace_virtio_iommu_host_resv_regions(sdev-
>>> iommu_mr.parent_obj.name, i,
>> -                                             range_lob(&reg->range),
>> -                                             range_upb(&reg->range));
>> -        i++;
>> -    }
>> -    /*
>> -     * then add higher priority reserved regions set by the machine
>> -     * through properties
>> -     */
>> -    add_prop_resv_regions(sdev);
>> -    return 0;
>> -}
>> -
>> /**
>>  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
>>  *
>> --
>> 2.41.0
RE: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Duan, Zhenzhong 5 months, 2 weeks ago

>-----Original Message-----
>From: Eric Auger <eric.auger@redhat.com>
>Subject: Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>
>
>
>On 6/13/24 12:00, Duan, Zhenzhong wrote:
>> Hi Eric,
>>
>>> -----Original Message-----
>>> From: Eric Auger <eric.auger@redhat.com>
>>> Subject: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>>
>>> Compute the host reserved regions in virtio_iommu_set_iommu_device().
>>> The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
>>> The virtio_iommu_set_host_iova_ranges() helper turns usable regions
>>> into complementary reserved regions while testing the inclusion
>>> into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
>>> implementation of virtio_iommu_set_iova_ranges() which will be
>>> removed in subsequent patches. rebuild_resv_regions() is just moved.
>>>
>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>
>>> ---
>>>
>>> - added g_assert(!sdev->probe_done)
>>> ---
>>> hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++----
>----
>>> -
>>> 1 file changed, 112 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>> index db842555c8..04474ebd74 100644
>>> --- a/hw/virtio/virtio-iommu.c
>>> +++ b/hw/virtio/virtio-iommu.c
>>> @@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU
>>> *viommu, PCIBus *bus, int devfn) {
>>>     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
>>> }
>>>
>>> +/**
>>> + * rebuild_resv_regions: rebuild resv regions with both the
>>> + * info of host resv ranges and property set resv ranges
>>> + */
>>> +static int rebuild_resv_regions(IOMMUDevice *sdev)
>>> +{
>>> +    GList *l;
>>> +    int i = 0;
>>> +
>>> +    /* free the existing list and rebuild it from scratch */
>>> +    g_list_free_full(sdev->resv_regions, g_free);
>>> +    sdev->resv_regions = NULL;
>>> +
>>> +    /* First add host reserved regions if any, all tagged as RESERVED */
>>> +    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>> +        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>> +        Range *r = (Range *)l->data;
>>> +
>>> +        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>> +        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>> +        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>reg);
>>> +        trace_virtio_iommu_host_resv_regions(sdev-
>>>> iommu_mr.parent_obj.name, i,
>>> +                                             range_lob(&reg->range),
>>> +                                             range_upb(&reg->range));
>>> +        i++;
>>> +    }
>>> +    /*
>>> +     * then add higher priority reserved regions set by the machine
>>> +     * through properties
>>> +     */
>>> +    add_prop_resv_regions(sdev);
>>> +    return 0;
>>> +}
>>> +
>>> +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus
>>> *bus,
>>> +                                             int devfn, GList *iova_ranges,
>>> +                                             Error **errp)
>>> +{
>>> +    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
>> Here the bus/devfn parameters are real device BDF not aliased one,
>> But used to index s->as_by_busptr which expect aliased bus/devfn.
>>
>> Do we need a translation of bus/devfn?
>
>Hum that's a good point actually. I need to further study that. that's
>not easy to translate, is it?

Yes, may need a path to call into pci_device_get_iommu_bus_devfn().
Maybe a new HostIOMMUDevice callback.

>
>Now I am not totally sure why we don't use the alias as well for
>HostIOMMUDevices or at least store the aliased bdf.

Because we need to store HostIOMMUDevice in vIOMMU in real bdf granularity,
Not aliased bdf granularity. Virtual vtd calls VFIO device uAPI on real device not
the aliased device, i.e., VFIO_DEVICE_[ATTACH|DETACH]_IOMMUFD_PT.

I also have a question, could we define host_resv_ranges in VirtioHostIOMMUDevice
to avoid translation?

Thanks
Zhenzhong

>
>Thanks
>
>Eric
>>
>> Thanks
>> Zhenzhong
>>
>>> +    IOMMUDevice *sdev;
>>> +    GList *current_ranges;
>>> +    GList *l, *tmp, *new_ranges = NULL;
>>> +    int ret = -EINVAL;
>>> +
>>> +    if (!sbus) {
>>> +        error_report("%s no sbus", __func__);
>>> +    }
>>> +
>>> +    sdev = sbus->pbdev[devfn];
>>> +
>>> +    current_ranges = sdev->host_resv_ranges;
>>> +
>>> +    g_assert(!sdev->probe_done);
>>> +
>>> +    /* check that each new resv region is included in an existing one */
>>> +    if (sdev->host_resv_ranges) {
>>> +        range_inverse_array(iova_ranges,
>>> +                            &new_ranges,
>>> +                            0, UINT64_MAX);
>>> +
>>> +        for (tmp = new_ranges; tmp; tmp = tmp->next) {
>>> +            Range *newr = (Range *)tmp->data;
>>> +            bool included = false;
>>> +
>>> +            for (l = current_ranges; l; l = l->next) {
>>> +                Range * r = (Range *)l->data;
>>> +
>>> +                if (range_contains_range(r, newr)) {
>>> +                    included = true;
>>> +                    break;
>>> +                }
>>> +            }
>>> +            if (!included) {
>>> +                goto error;
>>> +            }
>>> +        }
>>> +        /* all new reserved ranges are included in existing ones */
>>> +        ret = 0;
>>> +        goto out;
>>> +    }
>>> +
>>> +    range_inverse_array(iova_ranges,
>>> +                        &sdev->host_resv_ranges,
>>> +                        0, UINT64_MAX);
>>> +    rebuild_resv_regions(sdev);
>>> +
>>> +    return 0;
>>> +error:
>>> +    error_setg(errp, "%s Conflicting host reserved ranges set!",
>>> +               __func__);
>>> +out:
>>> +    g_list_free_full(new_ranges, g_free);
>>> +    return ret;
>>> +}
>>> +
>>> static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque,
>>> int devfn,
>>>                                           HostIOMMUDevice *hiod, Error **errp)
>>> {
>>>     VirtIOIOMMU *viommu = opaque;
>>>     VirtioHostIOMMUDevice *vhiod;
>>> +    HostIOMMUDeviceClass *hiodc =
>>> HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>     struct hiod_key *new_key;
>>> +    GList *host_iova_ranges = NULL;
>>>
>>>     assert(hiod);
>>>
>>> @@ -513,6 +610,20 @@ static bool
>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>         return false;
>>>     }
>>>
>>> +    if (hiodc->get_iova_ranges) {
>>> +        int ret;
>>> +        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
>>> +        if (!host_iova_ranges) {
>>> +            return true; /* some old kernels may not support that capability
>*/
>>> +        }
>>> +        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
>>> +                                                host_iova_ranges, errp);
>>> +        if (ret) {
>>> +            g_list_free_full(host_iova_ranges, g_free);
>>> +            return false;
>>> +        }
>>> +    }
>>> +
>>>     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
>>>     vhiod->bus = bus;
>>>     vhiod->devfn = (uint8_t)devfn;
>>> @@ -525,6 +636,7 @@ static bool
>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>
>>>     object_ref(hiod);
>>>     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
>>> +    g_list_free_full(host_iova_ranges, g_free);
>>>
>>>     return true;
>>> }
>>> @@ -1246,40 +1358,6 @@ static int
>>> virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
>>>     return 0;
>>> }
>>>
>>> -/**
>>> - * rebuild_resv_regions: rebuild resv regions with both the
>>> - * info of host resv ranges and property set resv ranges
>>> - */
>>> -static int rebuild_resv_regions(IOMMUDevice *sdev)
>>> -{
>>> -    GList *l;
>>> -    int i = 0;
>>> -
>>> -    /* free the existing list and rebuild it from scratch */
>>> -    g_list_free_full(sdev->resv_regions, g_free);
>>> -    sdev->resv_regions = NULL;
>>> -
>>> -    /* First add host reserved regions if any, all tagged as RESERVED */
>>> -    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>> -        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>> -        Range *r = (Range *)l->data;
>>> -
>>> -        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>> -        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>> -        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>reg);
>>> -        trace_virtio_iommu_host_resv_regions(sdev-
>>>> iommu_mr.parent_obj.name, i,
>>> -                                             range_lob(&reg->range),
>>> -                                             range_upb(&reg->range));
>>> -        i++;
>>> -    }
>>> -    /*
>>> -     * then add higher priority reserved regions set by the machine
>>> -     * through properties
>>> -     */
>>> -    add_prop_resv_regions(sdev);
>>> -    return 0;
>>> -}
>>> -
>>> /**
>>>  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
>>>  *
>>> --
>>> 2.41.0

Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Eric Auger 5 months, 1 week ago
Hi Zhenzhong,

On 6/14/24 05:05, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>
>>
>>
>> On 6/13/24 12:00, Duan, Zhenzhong wrote:
>>> Hi Eric,
>>>
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>> Subject: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>>>
>>>> Compute the host reserved regions in virtio_iommu_set_iommu_device().
>>>> The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
>>>> The virtio_iommu_set_host_iova_ranges() helper turns usable regions
>>>> into complementary reserved regions while testing the inclusion
>>>> into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
>>>> implementation of virtio_iommu_set_iova_ranges() which will be
>>>> removed in subsequent patches. rebuild_resv_regions() is just moved.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>>
>>>> - added g_assert(!sdev->probe_done)
>>>> ---
>>>> hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++----
>> ----
>>>> -
>>>> 1 file changed, 112 insertions(+), 34 deletions(-)
>>>>
>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>> index db842555c8..04474ebd74 100644
>>>> --- a/hw/virtio/virtio-iommu.c
>>>> +++ b/hw/virtio/virtio-iommu.c
>>>> @@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU
>>>> *viommu, PCIBus *bus, int devfn) {
>>>>     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
>>>> }
>>>>
>>>> +/**
>>>> + * rebuild_resv_regions: rebuild resv regions with both the
>>>> + * info of host resv ranges and property set resv ranges
>>>> + */
>>>> +static int rebuild_resv_regions(IOMMUDevice *sdev)
>>>> +{
>>>> +    GList *l;
>>>> +    int i = 0;
>>>> +
>>>> +    /* free the existing list and rebuild it from scratch */
>>>> +    g_list_free_full(sdev->resv_regions, g_free);
>>>> +    sdev->resv_regions = NULL;
>>>> +
>>>> +    /* First add host reserved regions if any, all tagged as RESERVED */
>>>> +    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>>> +        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>>> +        Range *r = (Range *)l->data;
>>>> +
>>>> +        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>>> +        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>>> +        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>> reg);
>>>> +        trace_virtio_iommu_host_resv_regions(sdev-
>>>>> iommu_mr.parent_obj.name, i,
>>>> +                                             range_lob(&reg->range),
>>>> +                                             range_upb(&reg->range));
>>>> +        i++;
>>>> +    }
>>>> +    /*
>>>> +     * then add higher priority reserved regions set by the machine
>>>> +     * through properties
>>>> +     */
>>>> +    add_prop_resv_regions(sdev);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus
>>>> *bus,
>>>> +                                             int devfn, GList *iova_ranges,
>>>> +                                             Error **errp)
>>>> +{
>>>> +    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
>>> Here the bus/devfn parameters are real device BDF not aliased one,
>>> But used to index s->as_by_busptr which expect aliased bus/devfn.
>>>
>>> Do we need a translation of bus/devfn?
>> Hum that's a good point actually. I need to further study that. that's
>> not easy to translate, is it?
> Yes, may need a path to call into pci_device_get_iommu_bus_devfn().
> Maybe a new HostIOMMUDevice callback.
>
>> Now I am not totally sure why we don't use the alias as well for
>> HostIOMMUDevices or at least store the aliased bdf.
> Because we need to store HostIOMMUDevice in vIOMMU in real bdf granularity,
> Not aliased bdf granularity. Virtual vtd calls VFIO device uAPI on real device not
> the aliased device, i.e., VFIO_DEVICE_[ATTACH|DETACH]_IOMMUFD_PT.
>
> I also have a question, could we define host_resv_ranges in VirtioHostIOMMUDevice
> to avoid translation?
I would suggest to pass the aliased info through the set_iommu_device()
and store them in the HostIOMMUDevice. I will submit a patch accordingly

Thanks

Eric


>
> Thanks
> Zhenzhong
>
>> Thanks
>>
>> Eric
>>> Thanks
>>> Zhenzhong
>>>
>>>> +    IOMMUDevice *sdev;
>>>> +    GList *current_ranges;
>>>> +    GList *l, *tmp, *new_ranges = NULL;
>>>> +    int ret = -EINVAL;
>>>> +
>>>> +    if (!sbus) {
>>>> +        error_report("%s no sbus", __func__);
>>>> +    }
>>>> +
>>>> +    sdev = sbus->pbdev[devfn];
>>>> +
>>>> +    current_ranges = sdev->host_resv_ranges;
>>>> +
>>>> +    g_assert(!sdev->probe_done);
>>>> +
>>>> +    /* check that each new resv region is included in an existing one */
>>>> +    if (sdev->host_resv_ranges) {
>>>> +        range_inverse_array(iova_ranges,
>>>> +                            &new_ranges,
>>>> +                            0, UINT64_MAX);
>>>> +
>>>> +        for (tmp = new_ranges; tmp; tmp = tmp->next) {
>>>> +            Range *newr = (Range *)tmp->data;
>>>> +            bool included = false;
>>>> +
>>>> +            for (l = current_ranges; l; l = l->next) {
>>>> +                Range * r = (Range *)l->data;
>>>> +
>>>> +                if (range_contains_range(r, newr)) {
>>>> +                    included = true;
>>>> +                    break;
>>>> +                }
>>>> +            }
>>>> +            if (!included) {
>>>> +                goto error;
>>>> +            }
>>>> +        }
>>>> +        /* all new reserved ranges are included in existing ones */
>>>> +        ret = 0;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    range_inverse_array(iova_ranges,
>>>> +                        &sdev->host_resv_ranges,
>>>> +                        0, UINT64_MAX);
>>>> +    rebuild_resv_regions(sdev);
>>>> +
>>>> +    return 0;
>>>> +error:
>>>> +    error_setg(errp, "%s Conflicting host reserved ranges set!",
>>>> +               __func__);
>>>> +out:
>>>> +    g_list_free_full(new_ranges, g_free);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque,
>>>> int devfn,
>>>>                                           HostIOMMUDevice *hiod, Error **errp)
>>>> {
>>>>     VirtIOIOMMU *viommu = opaque;
>>>>     VirtioHostIOMMUDevice *vhiod;
>>>> +    HostIOMMUDeviceClass *hiodc =
>>>> HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>     struct hiod_key *new_key;
>>>> +    GList *host_iova_ranges = NULL;
>>>>
>>>>     assert(hiod);
>>>>
>>>> @@ -513,6 +610,20 @@ static bool
>>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>>         return false;
>>>>     }
>>>>
>>>> +    if (hiodc->get_iova_ranges) {
>>>> +        int ret;
>>>> +        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
>>>> +        if (!host_iova_ranges) {
>>>> +            return true; /* some old kernels may not support that capability
>> */
>>>> +        }
>>>> +        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
>>>> +                                                host_iova_ranges, errp);
>>>> +        if (ret) {
>>>> +            g_list_free_full(host_iova_ranges, g_free);
>>>> +            return false;
>>>> +        }
>>>> +    }
>>>> +
>>>>     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
>>>>     vhiod->bus = bus;
>>>>     vhiod->devfn = (uint8_t)devfn;
>>>> @@ -525,6 +636,7 @@ static bool
>>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>>
>>>>     object_ref(hiod);
>>>>     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
>>>> +    g_list_free_full(host_iova_ranges, g_free);
>>>>
>>>>     return true;
>>>> }
>>>> @@ -1246,40 +1358,6 @@ static int
>>>> virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
>>>>     return 0;
>>>> }
>>>>
>>>> -/**
>>>> - * rebuild_resv_regions: rebuild resv regions with both the
>>>> - * info of host resv ranges and property set resv ranges
>>>> - */
>>>> -static int rebuild_resv_regions(IOMMUDevice *sdev)
>>>> -{
>>>> -    GList *l;
>>>> -    int i = 0;
>>>> -
>>>> -    /* free the existing list and rebuild it from scratch */
>>>> -    g_list_free_full(sdev->resv_regions, g_free);
>>>> -    sdev->resv_regions = NULL;
>>>> -
>>>> -    /* First add host reserved regions if any, all tagged as RESERVED */
>>>> -    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>>> -        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>>> -        Range *r = (Range *)l->data;
>>>> -
>>>> -        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>>> -        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>>> -        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>> reg);
>>>> -        trace_virtio_iommu_host_resv_regions(sdev-
>>>>> iommu_mr.parent_obj.name, i,
>>>> -                                             range_lob(&reg->range),
>>>> -                                             range_upb(&reg->range));
>>>> -        i++;
>>>> -    }
>>>> -    /*
>>>> -     * then add higher priority reserved regions set by the machine
>>>> -     * through properties
>>>> -     */
>>>> -    add_prop_resv_regions(sdev);
>>>> -    return 0;
>>>> -}
>>>> -
>>>> /**
>>>>  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
>>>>  *
>>>> --
>>>> 2.41.0
Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
Posted by Eric Auger 5 months, 1 week ago

On 6/14/24 05:05, Duan, Zhenzhong wrote:
>
>> -----Original Message-----
>> From: Eric Auger <eric.auger@redhat.com>
>> Subject: Re: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>
>>
>>
>> On 6/13/24 12:00, Duan, Zhenzhong wrote:
>>> Hi Eric,
>>>
>>>> -----Original Message-----
>>>> From: Eric Auger <eric.auger@redhat.com>
>>>> Subject: [PATCH v3 4/7] virtio-iommu: Compute host reserved regions
>>>>
>>>> Compute the host reserved regions in virtio_iommu_set_iommu_device().
>>>> The usable IOVA regions are retrieved from the HOSTIOMMUDevice.
>>>> The virtio_iommu_set_host_iova_ranges() helper turns usable regions
>>>> into complementary reserved regions while testing the inclusion
>>>> into existing ones. virtio_iommu_set_host_iova_ranges() reuse the
>>>> implementation of virtio_iommu_set_iova_ranges() which will be
>>>> removed in subsequent patches. rebuild_resv_regions() is just moved.
>>>>
>>>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>>>
>>>> ---
>>>>
>>>> - added g_assert(!sdev->probe_done)
>>>> ---
>>>> hw/virtio/virtio-iommu.c | 146 ++++++++++++++++++++++++++++++----
>> ----
>>>> -
>>>> 1 file changed, 112 insertions(+), 34 deletions(-)
>>>>
>>>> diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c
>>>> index db842555c8..04474ebd74 100644
>>>> --- a/hw/virtio/virtio-iommu.c
>>>> +++ b/hw/virtio/virtio-iommu.c
>>>> @@ -498,12 +498,109 @@ get_host_iommu_device(VirtIOIOMMU
>>>> *viommu, PCIBus *bus, int devfn) {
>>>>     return g_hash_table_lookup(viommu->host_iommu_devices, &key);
>>>> }
>>>>
>>>> +/**
>>>> + * rebuild_resv_regions: rebuild resv regions with both the
>>>> + * info of host resv ranges and property set resv ranges
>>>> + */
>>>> +static int rebuild_resv_regions(IOMMUDevice *sdev)
>>>> +{
>>>> +    GList *l;
>>>> +    int i = 0;
>>>> +
>>>> +    /* free the existing list and rebuild it from scratch */
>>>> +    g_list_free_full(sdev->resv_regions, g_free);
>>>> +    sdev->resv_regions = NULL;
>>>> +
>>>> +    /* First add host reserved regions if any, all tagged as RESERVED */
>>>> +    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>>> +        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>>> +        Range *r = (Range *)l->data;
>>>> +
>>>> +        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>>> +        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>>> +        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>> reg);
>>>> +        trace_virtio_iommu_host_resv_regions(sdev-
>>>>> iommu_mr.parent_obj.name, i,
>>>> +                                             range_lob(&reg->range),
>>>> +                                             range_upb(&reg->range));
>>>> +        i++;
>>>> +    }
>>>> +    /*
>>>> +     * then add higher priority reserved regions set by the machine
>>>> +     * through properties
>>>> +     */
>>>> +    add_prop_resv_regions(sdev);
>>>> +    return 0;
>>>> +}
>>>> +
>>>> +static int virtio_iommu_set_host_iova_ranges(VirtIOIOMMU *s, PCIBus
>>>> *bus,
>>>> +                                             int devfn, GList *iova_ranges,
>>>> +                                             Error **errp)
>>>> +{
>>>> +    IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus);
>>> Here the bus/devfn parameters are real device BDF not aliased one,
>>> But used to index s->as_by_busptr which expect aliased bus/devfn.
>>>
>>> Do we need a translation of bus/devfn?
>> Hum that's a good point actually. I need to further study that. that's
>> not easy to translate, is it?
> Yes, may need a path to call into pci_device_get_iommu_bus_devfn().
> Maybe a new HostIOMMUDevice callback.
>
>> Now I am not totally sure why we don't use the alias as well for
>> HostIOMMUDevices or at least store the aliased bdf.
> Because we need to store HostIOMMUDevice in vIOMMU in real bdf granularity,
> Not aliased bdf granularity. Virtual vtd calls VFIO device uAPI on real device not
> the aliased device, i.e., VFIO_DEVICE_[ATTACH|DETACH]_IOMMUFD_PT.
>
> I also have a question, could we define host_resv_ranges in VirtioHostIOMMUDevice
> to avoid translation?
well reserved iova regions are rather associated to iommu groups
(associated to the concept of alias bdfs), no? Also potentially emulated
devices could expose some.

Eric
>
> Thanks
> Zhenzhong
>
>> Thanks
>>
>> Eric
>>> Thanks
>>> Zhenzhong
>>>
>>>> +    IOMMUDevice *sdev;
>>>> +    GList *current_ranges;
>>>> +    GList *l, *tmp, *new_ranges = NULL;
>>>> +    int ret = -EINVAL;
>>>> +
>>>> +    if (!sbus) {
>>>> +        error_report("%s no sbus", __func__);
>>>> +    }
>>>> +
>>>> +    sdev = sbus->pbdev[devfn];
>>>> +
>>>> +    current_ranges = sdev->host_resv_ranges;
>>>> +
>>>> +    g_assert(!sdev->probe_done);
>>>> +
>>>> +    /* check that each new resv region is included in an existing one */
>>>> +    if (sdev->host_resv_ranges) {
>>>> +        range_inverse_array(iova_ranges,
>>>> +                            &new_ranges,
>>>> +                            0, UINT64_MAX);
>>>> +
>>>> +        for (tmp = new_ranges; tmp; tmp = tmp->next) {
>>>> +            Range *newr = (Range *)tmp->data;
>>>> +            bool included = false;
>>>> +
>>>> +            for (l = current_ranges; l; l = l->next) {
>>>> +                Range * r = (Range *)l->data;
>>>> +
>>>> +                if (range_contains_range(r, newr)) {
>>>> +                    included = true;
>>>> +                    break;
>>>> +                }
>>>> +            }
>>>> +            if (!included) {
>>>> +                goto error;
>>>> +            }
>>>> +        }
>>>> +        /* all new reserved ranges are included in existing ones */
>>>> +        ret = 0;
>>>> +        goto out;
>>>> +    }
>>>> +
>>>> +    range_inverse_array(iova_ranges,
>>>> +                        &sdev->host_resv_ranges,
>>>> +                        0, UINT64_MAX);
>>>> +    rebuild_resv_regions(sdev);
>>>> +
>>>> +    return 0;
>>>> +error:
>>>> +    error_setg(errp, "%s Conflicting host reserved ranges set!",
>>>> +               __func__);
>>>> +out:
>>>> +    g_list_free_full(new_ranges, g_free);
>>>> +    return ret;
>>>> +}
>>>> +
>>>> static bool virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque,
>>>> int devfn,
>>>>                                           HostIOMMUDevice *hiod, Error **errp)
>>>> {
>>>>     VirtIOIOMMU *viommu = opaque;
>>>>     VirtioHostIOMMUDevice *vhiod;
>>>> +    HostIOMMUDeviceClass *hiodc =
>>>> HOST_IOMMU_DEVICE_GET_CLASS(hiod);
>>>>     struct hiod_key *new_key;
>>>> +    GList *host_iova_ranges = NULL;
>>>>
>>>>     assert(hiod);
>>>>
>>>> @@ -513,6 +610,20 @@ static bool
>>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>>         return false;
>>>>     }
>>>>
>>>> +    if (hiodc->get_iova_ranges) {
>>>> +        int ret;
>>>> +        host_iova_ranges = hiodc->get_iova_ranges(hiod, errp);
>>>> +        if (!host_iova_ranges) {
>>>> +            return true; /* some old kernels may not support that capability
>> */
>>>> +        }
>>>> +        ret = virtio_iommu_set_host_iova_ranges(viommu, bus, devfn,
>>>> +                                                host_iova_ranges, errp);
>>>> +        if (ret) {
>>>> +            g_list_free_full(host_iova_ranges, g_free);
>>>> +            return false;
>>>> +        }
>>>> +    }
>>>> +
>>>>     vhiod = g_malloc0(sizeof(VirtioHostIOMMUDevice));
>>>>     vhiod->bus = bus;
>>>>     vhiod->devfn = (uint8_t)devfn;
>>>> @@ -525,6 +636,7 @@ static bool
>>>> virtio_iommu_set_iommu_device(PCIBus *bus, void *opaque, int devfn,
>>>>
>>>>     object_ref(hiod);
>>>>     g_hash_table_insert(viommu->host_iommu_devices, new_key, vhiod);
>>>> +    g_list_free_full(host_iova_ranges, g_free);
>>>>
>>>>     return true;
>>>> }
>>>> @@ -1246,40 +1358,6 @@ static int
>>>> virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr,
>>>>     return 0;
>>>> }
>>>>
>>>> -/**
>>>> - * rebuild_resv_regions: rebuild resv regions with both the
>>>> - * info of host resv ranges and property set resv ranges
>>>> - */
>>>> -static int rebuild_resv_regions(IOMMUDevice *sdev)
>>>> -{
>>>> -    GList *l;
>>>> -    int i = 0;
>>>> -
>>>> -    /* free the existing list and rebuild it from scratch */
>>>> -    g_list_free_full(sdev->resv_regions, g_free);
>>>> -    sdev->resv_regions = NULL;
>>>> -
>>>> -    /* First add host reserved regions if any, all tagged as RESERVED */
>>>> -    for (l = sdev->host_resv_ranges; l; l = l->next) {
>>>> -        ReservedRegion *reg = g_new0(ReservedRegion, 1);
>>>> -        Range *r = (Range *)l->data;
>>>> -
>>>> -        reg->type = VIRTIO_IOMMU_RESV_MEM_T_RESERVED;
>>>> -        range_set_bounds(&reg->range, range_lob(r), range_upb(r));
>>>> -        sdev->resv_regions = resv_region_list_insert(sdev->resv_regions,
>> reg);
>>>> -        trace_virtio_iommu_host_resv_regions(sdev-
>>>>> iommu_mr.parent_obj.name, i,
>>>> -                                             range_lob(&reg->range),
>>>> -                                             range_upb(&reg->range));
>>>> -        i++;
>>>> -    }
>>>> -    /*
>>>> -     * then add higher priority reserved regions set by the machine
>>>> -     * through properties
>>>> -     */
>>>> -    add_prop_resv_regions(sdev);
>>>> -    return 0;
>>>> -}
>>>> -
>>>> /**
>>>>  * virtio_iommu_set_iova_ranges: Conveys the usable IOVA ranges
>>>>  *
>>>> --
>>>> 2.41.0