[PATCH v2 18/20] vfio/common: Optimize device dirty page tracking with vIOMMU

Avihai Horon posted 20 patches 2 years, 11 months ago
Maintainers: Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Peter Xu <peterx@redhat.com>, Jason Wang <jasowang@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Paolo Bonzini <pbonzini@redhat.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, Thomas Huth <thuth@redhat.com>, David Hildenbrand <david@redhat.com>, Ilya Leoshkevich <iii@linux.ibm.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Juan Quintela <quintela@redhat.com>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Eric Blake <eblake@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, John Snow <jsnow@redhat.com>, Stefan Hajnoczi <stefanha@redhat.com>, Fam Zheng <fam@euphon.net>
[PATCH v2 18/20] vfio/common: Optimize device dirty page tracking with vIOMMU
Posted by Avihai Horon 2 years, 11 months ago
When vIOMMU is enabled, syncing dirty page bitmaps is done by replaying
the vIOMMU mappings and querying the dirty bitmap for each mapping.

With device dirty tracking this causes a lot of overhead, since the HW
is queried many times (even with small idle guest this can end up with
thousands of calls to HW).

Optimize this by de-coupling dirty bitmap query from vIOMMU replay.
Now a single dirty bitmap is queried per vIOMMU MR section, which is
then used for all corresponding vIOMMU mappings within that MR section.

Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
 hw/vfio/common.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 83 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 1024788bcc..f16a57d42b 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1946,8 +1946,42 @@ out:
 typedef struct {
     IOMMUNotifier n;
     VFIOGuestIOMMU *giommu;
+    VFIOBitmap *vbmap;
 } vfio_giommu_dirty_notifier;
 
+static int vfio_iommu_set_dirty_bitmap(VFIOContainer *container,
+                                       vfio_giommu_dirty_notifier *gdn,
+                                       hwaddr iova, hwaddr size,
+                                       ram_addr_t ram_addr)
+{
+    VFIOBitmap *vbmap = gdn->vbmap;
+    VFIOBitmap *dst_vbmap;
+    hwaddr start_iova = REAL_HOST_PAGE_ALIGN(gdn->n.start);
+    hwaddr copy_offset;
+
+    dst_vbmap = vfio_bitmap_alloc(size);
+    if (!dst_vbmap) {
+        return -errno;
+    }
+
+    if (!vfio_iommu_range_is_device_tracked(container, iova, size)) {
+        bitmap_set(dst_vbmap->bitmap, 0, dst_vbmap->pages);
+
+        goto out;
+    }
+
+    copy_offset = (iova - start_iova) / qemu_real_host_page_size();
+    bitmap_copy_with_src_offset(dst_vbmap->bitmap, vbmap->bitmap, copy_offset,
+                                dst_vbmap->pages);
+
+out:
+    cpu_physical_memory_set_dirty_lebitmap(dst_vbmap->bitmap, ram_addr,
+                                           dst_vbmap->pages);
+    vfio_bitmap_dealloc(dst_vbmap);
+
+    return 0;
+}
+
 static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 {
     vfio_giommu_dirty_notifier *gdn = container_of(n,
@@ -1968,8 +2002,15 @@ static void vfio_iommu_map_dirty_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
 
     rcu_read_lock();
     if (vfio_get_xlat_addr(iotlb, NULL, &translated_addr, NULL)) {
-        ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
-                                    translated_addr);
+        if (gdn->vbmap) {
+            ret = vfio_iommu_set_dirty_bitmap(container, gdn, iova,
+                                              iotlb->addr_mask + 1,
+                                              translated_addr);
+        } else {
+            ret = vfio_get_dirty_bitmap(container, iova, iotlb->addr_mask + 1,
+                                        translated_addr);
+        }
+
         if (ret) {
             error_report("vfio_iommu_map_dirty_notify(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx") = %d (%s)",
@@ -2033,6 +2074,7 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *container,
 {
     VFIOGuestIOMMU *giommu;
     bool found = false;
+    VFIOBitmap *vbmap = NULL;
     Int128 llend;
     vfio_giommu_dirty_notifier gdn;
     int idx;
@@ -2050,6 +2092,7 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *container,
     }
 
     gdn.giommu = giommu;
+    gdn.vbmap = NULL;
     idx = memory_region_iommu_attrs_to_index(giommu->iommu_mr,
                                              MEMTXATTRS_UNSPECIFIED);
 
@@ -2057,11 +2100,49 @@ static int vfio_sync_iommu_dirty_bitmap(VFIOContainer *container,
                        section->size);
     llend = int128_sub(llend, int128_one());
 
+    /*
+     * Optimize device dirty tracking if the MR section is at least partially
+     * tracked. Optimization is done by querying a single dirty bitmap for the
+     * entire range instead of querying dirty bitmap for each vIOMMU mapping.
+     */
+    if (vfio_devices_all_device_dirty_tracking(container)) {
+        hwaddr start = REAL_HOST_PAGE_ALIGN(section->offset_within_region);
+        hwaddr end = int128_get64(llend);
+        hwaddr size;
+        int ret;
+
+        if (start >= container->giommu_tracked_range) {
+            goto notifier_init;
+        }
+
+        size = REAL_HOST_PAGE_ALIGN(
+            MIN(container->giommu_tracked_range - 1, end) - start);
+
+        vbmap = vfio_bitmap_alloc(size);
+        if (!vbmap) {
+            return -errno;
+        }
+
+        ret = vfio_devices_query_dirty_bitmap(container, vbmap, start, size);
+        if (ret) {
+            vfio_bitmap_dealloc(vbmap);
+
+            return ret;
+        }
+
+        gdn.vbmap = vbmap;
+    }
+
+notifier_init:
     iommu_notifier_init(&gdn.n, vfio_iommu_map_dirty_notify, IOMMU_NOTIFIER_MAP,
                         section->offset_within_region, int128_get64(llend),
                         idx);
     memory_region_iommu_replay(giommu->iommu_mr, &gdn.n);
 
+    if (vbmap) {
+        vfio_bitmap_dealloc(vbmap);
+    }
+
     return 0;
 }
 
-- 
2.26.3