[PATCH V9 34/46] vfio-pci: recover from unmap-all-vaddr failure

Steve Sistare posted 46 patches 3 years, 1 month ago
Maintainers: Stefano Stabellini <sstabellini@kernel.org>, Anthony Perard <anthony.perard@citrix.com>, Paul Durrant <paul@xen.org>, David Hildenbrand <david@redhat.com>, Igor Mammedov <imammedo@redhat.com>, Stefan Berger <stefanb@linux.vnet.ibm.com>, Stefan Hajnoczi <stefanha@redhat.com>, "Denis V. Lunev" <den@openvz.org>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, Kevin Wolf <kwolf@redhat.com>, Hanna Reitz <hreitz@redhat.com>, Stefan Weil <sw@weilnetz.de>, Jeff Cody <codyprime@gmail.com>, Fam Zheng <fam@euphon.net>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Alex Bennée" <alex.bennee@linaro.org>, "Philippe Mathieu-Daudé" <f4bug@amsat.org>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, Greg Kurz <groug@kaod.org>, Christian Schoenebeck <qemu_oss@crudebyte.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Gerd Hoffmann <kraxel@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Jason Wang <jasowang@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Cédric Le Goater" <clg@kaod.org>, Daniel Henrique Barboza <danielhb413@gmail.com>, David Gibson <david@gibson.dropbear.id.au>, Elena Ufimtseva <elena.ufimtseva@oracle.com>, Jagannathan Raman <jag.raman@oracle.com>, John G Johnson <john.g.johnson@oracle.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, Richard Henderson <richard.henderson@linaro.org>, Cornelia Huck <cohuck@redhat.com>, Thomas Huth <thuth@redhat.com>, Alex Williamson <alex.williamson@redhat.com>, Steve Sistare <steven.sistare@oracle.com>, Mark Kanda <mark.kanda@oracle.com>, Peter Xu <peterx@redhat.com>, Juan Quintela <quintela@redhat.com>, Markus Armbruster <armbru@redhat.com>, Michael Roth <michael.roth@amd.com>, John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, Beraldo Leal <bleal@redhat.com>, Eric Blake <eblake@redhat.com>, Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>, Marcelo Tosatti <mtosatti@redhat.com>, Kamil Rytarowski <kamil@netbsd.org>, Reinoud Zandijk <reinoud@netbsd.org>, Sunil Muthuswamy <sunilmut@microsoft.com>, Wainer dos Santos Moschetta <wainersm@redhat.com>
[PATCH V9 34/46] vfio-pci: recover from unmap-all-vaddr failure
Posted by Steve Sistare 3 years, 1 month ago
If there are multiple containers and unmap-all fails for some container, we
need to remap vaddr for the other containers for which unmap-all succeeded.
Recover by walking all flat sections of all containers to restore the vaddr
for each.  Do so by invoking the vfio listener callback, and passing a new
"remap" flag that tells it to restore a mapping without re-allocating new
userland data structures.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 hw/vfio/common.c              | 79 ++++++++++++++++++++++++++++++++++---------
 hw/vfio/cpr.c                 | 36 ++++++++++++++++++++
 include/hw/vfio/vfio-common.h |  2 +-
 3 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index b7b35d4..de821e1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -895,15 +895,35 @@ static bool vfio_known_safe_misalignment(MemoryRegionSection *section)
     return true;
 }
 
+static VFIORamDiscardListener *vfio_find_ram_discard_listener(
+    VFIOContainer *container, MemoryRegionSection *section)
+{
+    VFIORamDiscardListener *vrdl;
+
+    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
+        if (vrdl->mr == section->mr &&
+            vrdl->offset_within_address_space ==
+            section->offset_within_address_space) {
+            break;
+        }
+    }
+
+    if (!vrdl) {
+        hw_error("vfio: Trying to sync missing RAM discard listener");
+        /* does not return */
+    }
+    return vrdl;
+}
+
 static void vfio_listener_region_add(MemoryListener *listener,
                                      MemoryRegionSection *section)
 {
     VFIOContainer *container = container_of(listener, VFIOContainer, listener);
-    vfio_container_region_add(container, section);
+    vfio_container_region_add(container, section, false);
 }
 
 void vfio_container_region_add(VFIOContainer *container,
-                               MemoryRegionSection *section)
+                               MemoryRegionSection *section, bool remap)
 {
     hwaddr iova, end;
     Int128 llend, llsize;
@@ -1033,6 +1053,30 @@ void vfio_container_region_add(VFIOContainer *container,
         int iommu_idx;
 
         trace_vfio_listener_region_add_iommu(iova, end);
+
+        /*
+         * If remap, then VFIO_DMA_UNMAP_FLAG_VADDR has been called, and we
+         * want to remap the vaddr.  vfio_container_region_add was already
+         * called in the past, so the giommu already exists.  Find it and
+         * replay it, which calls vfio_dma_map further down the stack.
+         */
+
+        if (remap) {
+            hwaddr as_offset = section->offset_within_address_space;
+            hwaddr iommu_offset = as_offset - section->offset_within_region;
+
+            QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
+                if (giommu->iommu_mr == iommu_mr &&
+                    giommu->iommu_offset == iommu_offset) {
+                    memory_region_iommu_replay(giommu->iommu_mr, &giommu->n);
+                    return;
+                }
+            }
+            error_report("Container cannot find iommu region %s offset %lx",
+                memory_region_name(section->mr), iommu_offset);
+            goto fail;
+        }
+
         /*
          * FIXME: For VFIO iommu types which have KVM acceleration to
          * avoid bouncing all map/unmaps through qemu this way, this
@@ -1083,7 +1127,21 @@ void vfio_container_region_add(VFIOContainer *container,
      * about changes.
      */
     if (memory_region_has_ram_discard_manager(section->mr)) {
-        vfio_register_ram_discard_listener(container, section);
+        /*
+         * If remap, then VFIO_DMA_UNMAP_FLAG_VADDR has been called, and we
+         * want to remap the vaddr.  vfio_container_region_add was already
+         * called in the past, so the ram discard listener already exists.
+         * Call its populate function directly, which calls vfio_dma_map.
+         */
+        if (remap)  {
+            VFIORamDiscardListener *vrdl =
+                vfio_find_ram_discard_listener(container, section);
+            if (vrdl->listener.notify_populate(&vrdl->listener, section)) {
+                error_report("listener.notify_populate failed");
+            }
+        } else {
+            vfio_register_ram_discard_listener(container, section);
+        }
         return;
     }
 
@@ -1417,19 +1475,8 @@ static int vfio_sync_ram_discard_listener_dirty_bitmap(VFIOContainer *container,
                                                    MemoryRegionSection *section)
 {
     RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
-    VFIORamDiscardListener *vrdl = NULL;
-
-    QLIST_FOREACH(vrdl, &container->vrdl_list, next) {
-        if (vrdl->mr == section->mr &&
-            vrdl->offset_within_address_space ==
-            section->offset_within_address_space) {
-            break;
-        }
-    }
-
-    if (!vrdl) {
-        hw_error("vfio: Trying to sync missing RAM discard listener");
-    }
+    VFIORamDiscardListener *vrdl =
+        vfio_find_ram_discard_listener(container, section);
 
     /*
      * We only want/can synchronize the bitmap for actually mapped parts -
diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
index 83f7872..1f682cb 100644
--- a/hw/vfio/cpr.c
+++ b/hw/vfio/cpr.c
@@ -34,6 +34,15 @@ vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
     return 0;
 }
 
+static int
+vfio_region_remap(MemoryRegionSection *section, void *handle, Error **errp)
+{
+    VFIOContainer *container = handle;
+    vfio_container_region_add(container, section, true);
+    container->vaddr_unmapped = false;
+    return 0;
+}
+
 static bool vfio_is_cpr_capable(VFIOContainer *container, Error **errp)
 {
     if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR) ||
@@ -99,6 +108,30 @@ static const VMStateDescription vfio_container_vmstate = {
     }
 };
 
+static void vfio_cpr_fail_notifier(Notifier *notifier, void *data)
+{
+    MigrationState *s = data;
+    VFIOContainer *container;
+    Error *err = NULL;
+
+    if (!migration_has_failed(s) || migrate_mode_of(s) != MIG_MODE_CPR_EXEC) {
+        return;
+    }
+
+    container = container_of(notifier, VFIOContainer, cpr_notifier);
+    if (container->vaddr_unmapped) {
+
+        /* Set reused so vfio_dma_map restores vaddr */
+        container->reused = true;
+        if (address_space_flat_for_each_section(container->space->as,
+                                                vfio_region_remap,
+                                                container, &err)) {
+            error_report_err(err);
+        }
+        container->reused = false;
+    }
+}
+
 int vfio_cpr_register_container(VFIOContainer *container, Error **errp)
 {
     container->cpr_blocker = NULL;
@@ -109,6 +142,7 @@ int vfio_cpr_register_container(VFIOContainer *container, Error **errp)
 
     vmstate_register(NULL, -1, &vfio_container_vmstate, container);
 
+    migration_add_notifier(&container->cpr_notifier, vfio_cpr_fail_notifier);
     return 0;
 }
 
@@ -117,4 +151,6 @@ void vfio_cpr_unregister_container(VFIOContainer *container)
     migrate_del_blocker(&container->cpr_blocker);
 
     vmstate_unregister(NULL, &vfio_container_vmstate, container);
+
+    migration_remove_notifier(&container->cpr_notifier);
 }
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 17ad9ba..b682645 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -244,7 +244,7 @@ vfio_get_device_info_cap(struct vfio_device_info *info, uint16_t id);
 extern const MemoryListener vfio_prereg_listener;
 void vfio_listener_register(VFIOContainer *container);
 void vfio_container_region_add(VFIOContainer *container,
-                               MemoryRegionSection *section);
+                               MemoryRegionSection *section, bool remap);
 
 int vfio_spapr_create_window(VFIOContainer *container,
                              MemoryRegionSection *section,
-- 
1.8.3.1