[v3] migration: propagate vTPM errors using Error objects

[PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions

Posted by Arun Menon 7 months, 1 week ago

- This is an incremental step in converting vmstate loading
  code to report errors.
- Minimal changes to the signature and body of the following
  functions are done,
  - vmstate_load()
  - vmstate_load_state()
  - vmstate_subsection_load()
  - qemu_load_device_state()
  - qemu_loadvm_state()
  - qemu_loadvm_section_start_full()
  - qemu_loadvm_section_part_end()
  - qemu_loadvm_state_header()
  - qemu_loadvm_state_main()
- error_report() has been replaced with error_setg();
  and in places where error has been already set,
  error_prepend() is used to not lose information.

Signed-off-by: Arun Menon <armenon@redhat.com>
---
 hw/display/virtio-gpu.c     |   2 +-
 hw/pci/pci.c                |   2 +-
 hw/s390x/virtio-ccw.c       |   2 +-
 hw/scsi/spapr_vscsi.c       |   2 +-
 hw/vfio/pci.c               |   2 +-
 hw/virtio/virtio-mmio.c     |   2 +-
 hw/virtio/virtio-pci.c      |   2 +-
 hw/virtio/virtio.c          |   4 +-
 include/migration/vmstate.h |   2 +-
 migration/colo.c            |  13 +++--
 migration/cpr.c             |   2 +-
 migration/migration.c       |  19 ++++--
 migration/savevm.c          | 137 +++++++++++++++++++++++++++-----------------
 migration/savevm.h          |   7 ++-
 migration/vmstate-types.c   |  10 ++--
 migration/vmstate.c         |  40 +++++++------
 tests/unit/test-vmstate.c   |  18 +++---
 17 files changed, 158 insertions(+), 108 deletions(-)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 0a1a625b0ea6cf26cb0d799171a57ed3d3ab2442..5d2ca8d8b864350133a674802d7316abd379591c 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1343,7 +1343,7 @@ static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,
     }
 
     /* load & apply scanout state */
-    vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1);
+    vmstate_load_state(f, &vmstate_virtio_gpu_scanouts, g, 1, NULL);
 
     return 0;
 }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index c70b5ceebaf1f2b10768bd030526cbb518da2b8d..2ab5d30bb3c319ac1c7bfc9a2acf6a2b38082066 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -934,7 +934,7 @@ void pci_device_save(PCIDevice *s, QEMUFile *f)
 int pci_device_load(PCIDevice *s, QEMUFile *f)
 {
     int ret;
-    ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id);
+    ret = vmstate_load_state(f, &vmstate_pci_device, s, s->version_id, NULL);
     /* Restore the interrupt status bit. */
     pci_update_irq_status(s);
     return ret;
diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c
index d2f85b39f30f7fc82e0c600144c0a958e1269b2c..2f6feff2b0a22d7d7f6aecfd7e7870d8362f1a73 100644
--- a/hw/s390x/virtio-ccw.c
+++ b/hw/s390x/virtio-ccw.c
@@ -1136,7 +1136,7 @@ static void virtio_ccw_save_config(DeviceState *d, QEMUFile *f)
 static int virtio_ccw_load_config(DeviceState *d, QEMUFile *f)
 {
     VirtioCcwDevice *dev = VIRTIO_CCW_DEVICE(d);
-    return vmstate_load_state(f, &vmstate_virtio_ccw_dev, dev, 1);
+    return vmstate_load_state(f, &vmstate_virtio_ccw_dev, dev, 1, NULL);
 }
 
 static void virtio_ccw_pre_plugged(DeviceState *d, Error **errp)
diff --git a/hw/scsi/spapr_vscsi.c b/hw/scsi/spapr_vscsi.c
index 20f70fb2729de78b9636a6b8c869695dab4f8902..573fdea668536b464bca11f001e9e0288e781493 100644
--- a/hw/scsi/spapr_vscsi.c
+++ b/hw/scsi/spapr_vscsi.c
@@ -648,7 +648,7 @@ static void *vscsi_load_request(QEMUFile *f, SCSIRequest *sreq)
     assert(!req->active);
 
     memset(req, 0, sizeof(*req));
-    rc = vmstate_load_state(f, &vmstate_spapr_vscsi_req, req, 1);
+    rc = vmstate_load_state(f, &vmstate_spapr_vscsi_req, req, 1, NULL);
     if (rc) {
         fprintf(stderr, "VSCSI: failed loading request tag#%u\n", sreq->tag);
         return NULL;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index fa25bded25c51f8efb6c5ad31bd90506cd69745c..87aee0a5701087f9a68ea435bb96e9d6b07b0c24 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2715,7 +2715,7 @@ static int vfio_pci_load_config(VFIODevice *vbasedev, QEMUFile *f)
         old_addr[bar] = pdev->io_regions[bar].addr;
     }
 
-    ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1);
+    ret = vmstate_load_state(f, &vmstate_vfio_pci_config, vdev, 1, NULL);
     if (ret) {
         return ret;
     }
diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 532c67107ba1d2978a76cf49f9cdc1de1dea3e11..9058b1563462d4464dcba799643a583c93fb5683 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -619,7 +619,7 @@ static int virtio_mmio_load_extra_state(DeviceState *opaque, QEMUFile *f)
 {
     VirtIOMMIOProxy *proxy = VIRTIO_MMIO(opaque);
 
-    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1);
+    return vmstate_load_state(f, &vmstate_virtio_mmio, proxy, 1, NULL);
 }
 
 static bool virtio_mmio_has_extra_state(DeviceState *opaque)
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index fba2372c93bfd648736b07e4bc83e7097baa58cb..50a1f5701754b88e8a1ee062d6eeedfd848cb4f5 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -160,7 +160,7 @@ static int virtio_pci_load_extra_state(DeviceState *d, QEMUFile *f)
 {
     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
 
-    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1);
+    return vmstate_load_state(f, &vmstate_virtio_pci, proxy, 1, NULL);
 }
 
 static void virtio_pci_save_queue(DeviceState *d, int n, QEMUFile *f)
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 82a285a31d1c0427d55f7cb73398adfc94e678fe..66d5941f68a4b9e1e5390bb0aa45fc6cd34e2a1e 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3317,14 +3317,14 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
     }
 
     if (vdc->vmsd) {
-        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
+        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id, NULL);
         if (ret) {
             return ret;
         }
     }
 
     /* Subsections */
-    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
+    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1, NULL);
     if (ret) {
         return ret;
     }
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1ff7bd9ac425ba67cd5ca7ad97bcf570f9e19abe..056781b1c21e737583f081594d9f88b32adfd674 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -1196,7 +1196,7 @@ extern const VMStateInfo vmstate_info_qlist;
     }
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
-                       void *opaque, int version_id);
+                       void *opaque, int version_id, Error **errp);
 int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
                        void *opaque, JSONWriter *vmdesc);
 int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
diff --git a/migration/colo.c b/migration/colo.c
index e0f713c837f5da25d67afbd02ceb6c54024ca3af..808c980b4af3199968771cdc6cca3c2451a2f4a6 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -659,6 +659,7 @@ void migrate_start_colo_process(MigrationState *s)
 static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
                       QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
 {
+    ERRP_GUARD();
     uint64_t total_size;
     uint64_t value;
     Error *local_err = NULL;
@@ -686,11 +687,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 
     bql_lock();
     cpu_synchronize_all_states();
-    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
+    ret = qemu_loadvm_state_main(mis->from_src_file, mis, &local_err);
     bql_unlock();
 
     if (ret < 0) {
-        error_setg(errp, "Load VM's live state (ram) error");
+        if (local_err != NULL) {
+            error_prepend(errp, "Load VM's live state (ram) error");
+        }
         return;
     }
 
@@ -729,9 +732,11 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
     bql_lock();
     vmstate_loading = true;
     colo_flush_ram_cache();
-    ret = qemu_load_device_state(fb);
+    ret = qemu_load_device_state(fb, &local_err);
     if (ret < 0) {
-        error_setg(errp, "COLO: load device state failed");
+        if (*errp != NULL) {
+            error_prepend(errp, "COLO: load device state failed");
+        }
         vmstate_loading = false;
         bql_unlock();
         return;
diff --git a/migration/cpr.c b/migration/cpr.c
index a50a57edca754b50e68fa9c294b3c89791e62ba8..0fb9fadac905c83689eed2b1193b282da679b6ef 100644
--- a/migration/cpr.c
+++ b/migration/cpr.c
@@ -235,7 +235,7 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
         return -ENOTSUP;
     }
 
-    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
+    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1, errp);
     if (ret) {
         error_setg(errp, "vmstate_load_state error %d", ret);
         qemu_fclose(f);
diff --git a/migration/migration.c b/migration/migration.c
index 4098870bce33ffdc57b5972fc5b106d88abb237e..ac06eb7b0195bfbcfe7662c91f109b2ff982146f 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -876,7 +876,7 @@ process_incoming_migration_co(void *opaque)
                       MIGRATION_STATUS_ACTIVE);
 
     mis->loadvm_co = qemu_coroutine_self();
-    ret = qemu_loadvm_state(mis->from_src_file);
+    ret = qemu_loadvm_state(mis->from_src_file, &local_err);
     mis->loadvm_co = NULL;
 
     trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
@@ -903,7 +903,10 @@ process_incoming_migration_co(void *opaque)
     }
 
     if (ret < 0) {
-        error_setg(&local_err, "load of migration failed: %s", strerror(-ret));
+        if (local_err != NULL) {
+            error_prepend(&local_err, "load of migration failed: %s: ",
+                          strerror(-ret));
+        }
         goto fail;
     }
 
@@ -918,15 +921,19 @@ process_incoming_migration_co(void *opaque)
 fail:
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
                       MIGRATION_STATUS_FAILED);
-    migrate_set_error(s, local_err);
-    error_free(local_err);
+    if (local_err) {
+        migrate_set_error(s, local_err);
+        error_free(local_err);
+    }
 
     migration_incoming_state_destroy();
 
     if (mis->exit_on_error) {
         WITH_QEMU_LOCK_GUARD(&s->error_mutex) {
-            error_report_err(s->error);
-            s->error = NULL;
+            if (s->error) {
+                error_report_err(s->error);
+                s->error = NULL;
+            }
         }
 
         exit(EXIT_FAILURE);
diff --git a/migration/savevm.c b/migration/savevm.c
index bb04a4520df9a443d90cf6cb52a383a5f053aaff..09329ba9a9f13e9bbed52def5245151c2c2ee803 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -963,13 +963,14 @@ void vmstate_unregister(VMStateIf *obj, const VMStateDescription *vmsd,
     }
 }
 
-static int vmstate_load(QEMUFile *f, SaveStateEntry *se)
+static int vmstate_load(QEMUFile *f, SaveStateEntry *se, Error **errp)
 {
     trace_vmstate_load(se->idstr, se->vmsd ? se->vmsd->name : "(old)");
     if (!se->vmsd) {         /* Old style */
         return se->ops->load_state(f, se->opaque, se->load_version_id);
     }
-    return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id);
+    return vmstate_load_state(f, se->vmsd, se->opaque, se->load_version_id,
+                              errp);
 }
 
 static void vmstate_save_old_style(QEMUFile *f, SaveStateEntry *se,
@@ -2071,6 +2072,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     QEMUFile *f = mis->from_src_file;
+    Error *local_err = NULL;
     int load_res;
     MigrationState *migr = migrate_get_current();
 
@@ -2089,7 +2091,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
     qemu_file_set_blocking(f, true);
 
     /* TODO: sanity check that only postcopiable data will be loaded here */
-    load_res = qemu_loadvm_state_main(f, mis);
+    load_res = qemu_loadvm_state_main(f, mis, &local_err);
 
     /*
      * This is tricky, but, mis->from_src_file can change after it
@@ -2115,7 +2117,11 @@ static void *postcopy_ram_listen_thread(void *opaque)
                          __func__, load_res);
             load_res = 0; /* prevent further exit() */
         } else {
-            error_report("%s: loadvm failed: %d", __func__, load_res);
+            if (local_err != NULL) {
+                error_prepend(&local_err, "%s: loadvm failed: %d", __func__,
+                              load_res);
+                migrate_set_error(migr, local_err);
+            }
             migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
                                            MIGRATION_STATUS_FAILED);
         }
@@ -2394,6 +2400,8 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
     int ret;
     size_t length;
     QIOChannelBuffer *bioc;
+    Error *local_error;
+    MigrationState *migr = migrate_get_current();
 
     length = qemu_get_be32(mis->from_src_file);
     trace_loadvm_handle_cmd_packaged(length);
@@ -2440,7 +2448,12 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis)
         qemu_coroutine_yield();
     } while (1);
 
-    ret = qemu_loadvm_state_main(packf, mis);
+    ret = qemu_loadvm_state_main(packf, mis, &local_error);
+    if (ret < 0 && local_error != NULL) {
+        error_prepend(&local_error, "%s: loadvm failed: %d", __func__,
+                      ret);
+        migrate_set_error(migr, local_error);
+    }
     trace_loadvm_handle_cmd_packaged_main(ret);
     qemu_fclose(packf);
     object_unref(OBJECT(bioc));
@@ -2674,8 +2687,9 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
 }
 
 static int
-qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
+qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type, Error **errp)
 {
+    ERRP_GUARD();
     bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
     uint32_t instance_id, version_id, section_id;
     int64_t start_ts, end_ts;
@@ -2686,8 +2700,8 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
     /* Read section start */
     section_id = qemu_get_be32(f);
     if (!qemu_get_counted_string(f, idstr)) {
-        error_report("Unable to read ID string for section %u",
-                     section_id);
+        error_setg(errp, "Unable to read ID string for section %u",
+                   section_id);
         return -EINVAL;
     }
     instance_id = qemu_get_be32(f);
@@ -2695,8 +2709,8 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
 
     ret = qemu_file_get_error(f);
     if (ret) {
-        error_report("%s: Failed to read instance/version ID: %d",
-                     __func__, ret);
+        error_setg(errp, "%s: Failed to read instance/version ID: %d",
+                   __func__, ret);
         return ret;
     }
 
@@ -2705,17 +2719,17 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
     /* Find savevm section */
     se = find_se(idstr, instance_id);
     if (se == NULL) {
-        error_report("Unknown savevm section or instance '%s' %"PRIu32". "
-                     "Make sure that your current VM setup matches your "
-                     "saved VM setup, including any hotplugged devices",
-                     idstr, instance_id);
+        error_setg(errp, "Unknown savevm section or instance '%s' %"PRIu32". "
+                   "Make sure that your current VM setup matches your "
+                   "saved VM setup, including any hotplugged devices",
+                   idstr, instance_id);
         return -EINVAL;
     }
 
     /* Validate version */
     if (version_id > se->version_id) {
-        error_report("savevm: unsupported version %d for '%s' v%d",
-                     version_id, idstr, se->version_id);
+        error_setg(errp, "savevm: unsupported version %d for '%s' v%d",
+                   version_id, idstr, se->version_id);
         return -EINVAL;
     }
     se->load_version_id = version_id;
@@ -2723,7 +2737,7 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
 
     /* Validate if it is a device's state */
     if (xen_enabled() && se->is_ram) {
-        error_report("loadvm: %s RAM loading not allowed on Xen", idstr);
+        error_setg(errp, "loadvm: %s RAM loading not allowed on Xen", idstr);
         return -EINVAL;
     }
 
@@ -2731,10 +2745,13 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
         start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
     }
 
-    ret = vmstate_load(f, se);
+    ret = vmstate_load(f, se, errp);
     if (ret < 0) {
-        error_report("error while loading state for instance 0x%"PRIx32" of"
-                     " device '%s'", instance_id, idstr);
+        if (*errp != NULL) {
+            error_prepend(errp, "error while loading state for "
+                          "instance 0x%"PRIx32" of"
+                          " device '%s'", instance_id, idstr);
+        }
         return ret;
     }
 
@@ -2752,8 +2769,9 @@ qemu_loadvm_section_start_full(QEMUFile *f, uint8_t type)
 }
 
 static int
-qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
+qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type, Error **errp)
 {
+    ERRP_GUARD();
     bool trace_downtime = (type == QEMU_VM_SECTION_END);
     int64_t start_ts, end_ts;
     uint32_t section_id;
@@ -2764,8 +2782,8 @@ qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
 
     ret = qemu_file_get_error(f);
     if (ret) {
-        error_report("%s: Failed to read section ID: %d",
-                     __func__, ret);
+        error_setg(errp, "%s: Failed to read section ID: %d",
+                   __func__, ret);
         return ret;
     }
 
@@ -2776,7 +2794,7 @@ qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
         }
     }
     if (se == NULL) {
-        error_report("Unknown savevm section %d", section_id);
+        error_setg(errp, "Unknown savevm section %d", section_id);
         return -EINVAL;
     }
 
@@ -2784,10 +2802,12 @@ qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
         start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
     }
 
-    ret = vmstate_load(f, se);
+    ret = vmstate_load(f, se, errp);
     if (ret < 0) {
-        error_report("error while loading state section id %d(%s)",
-                     section_id, se->idstr);
+        if (*errp != NULL) {
+            error_prepend(errp, "error while loading state section id %d(%s)",
+                          section_id, se->idstr);
+        }
         return ret;
     }
 
@@ -2804,33 +2824,38 @@ qemu_loadvm_section_part_end(QEMUFile *f, uint8_t type)
     return 0;
 }
 
-static int qemu_loadvm_state_header(QEMUFile *f)
+static int qemu_loadvm_state_header(QEMUFile *f, Error **errp)
 {
     unsigned int v;
     int ret;
 
     v = qemu_get_be32(f);
     if (v != QEMU_VM_FILE_MAGIC) {
-        error_report("Not a migration stream");
+        error_setg(errp, "Not a migration stream magic %x != %x",
+                   v, QEMU_VM_FILE_MAGIC);
         return -EINVAL;
     }
 
     v = qemu_get_be32(f);
     if (v == QEMU_VM_FILE_VERSION_COMPAT) {
-        error_report("SaveVM v2 format is obsolete and don't work anymore");
+        error_setg(errp, "SaveVM v2 format is obsolete and don't work anymore");
         return -ENOTSUP;
     }
     if (v != QEMU_VM_FILE_VERSION) {
-        error_report("Unsupported migration stream version");
+        error_setg(errp, "Unsupported migration stream version %x != %x",
+                   v, QEMU_VM_FILE_VERSION);
         return -ENOTSUP;
     }
 
     if (migrate_get_current()->send_configuration) {
-        if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
-            error_report("Configuration section missing");
+        v = qemu_get_byte(f);
+        if (v != QEMU_VM_CONFIGURATION) {
+            error_setg(errp, "Configuration section missing, %x != %x",
+                       v, QEMU_VM_CONFIGURATION);
             return -EINVAL;
         }
-        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
+        ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0,
+                                 errp);
 
         if (ret) {
             return ret;
@@ -3019,7 +3044,8 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
     return true;
 }
 
-int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis)
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
+                           Error **errp)
 {
     uint8_t section_type;
     int ret = 0;
@@ -3037,14 +3063,14 @@ retry:
         switch (section_type) {
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
-            ret = qemu_loadvm_section_start_full(f, section_type);
+            ret = qemu_loadvm_section_start_full(f, section_type, errp);
             if (ret < 0) {
                 goto out;
             }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
-            ret = qemu_loadvm_section_part_end(f, section_type);
+            ret = qemu_loadvm_section_part_end(f, section_type, errp);
             if (ret < 0) {
                 goto out;
             }
@@ -3060,7 +3086,7 @@ retry:
             /* This is the end of migration */
             goto out;
         default:
-            error_report("Unknown savevm section type %d", section_type);
+            error_setg(errp, "Unknown savevm section type %d", section_type);
             ret = -EINVAL;
             goto out;
         }
@@ -3094,7 +3120,7 @@ out:
     return ret;
 }
 
-int qemu_loadvm_state(QEMUFile *f)
+int qemu_loadvm_state(QEMUFile *f, Error **errp)
 {
     MigrationState *s = migrate_get_current();
     MigrationIncomingState *mis = migration_incoming_get_current();
@@ -3102,19 +3128,18 @@ int qemu_loadvm_state(QEMUFile *f)
     int ret;
 
     if (qemu_savevm_state_blocked(&local_err)) {
-        error_report_err(local_err);
+        error_propagate(errp, local_err);
         return -EINVAL;
     }
 
     qemu_loadvm_thread_pool_create(mis);
 
-    ret = qemu_loadvm_state_header(f);
+    ret = qemu_loadvm_state_header(f, errp);
     if (ret) {
         return ret;
     }
 
-    if (qemu_loadvm_state_setup(f, &local_err) != 0) {
-        error_report_err(local_err);
+    if (qemu_loadvm_state_setup(f, errp) != 0) {
         return -EINVAL;
     }
 
@@ -3124,7 +3149,7 @@ int qemu_loadvm_state(QEMUFile *f)
 
     cpu_synchronize_all_pre_loadvm();
 
-    ret = qemu_loadvm_state_main(f, mis);
+    ret = qemu_loadvm_state_main(f, mis, errp);
     qemu_event_set(&mis->main_thread_load_event);
 
     trace_qemu_loadvm_state_post_main(ret);
@@ -3192,15 +3217,18 @@ int qemu_loadvm_state(QEMUFile *f)
     return ret;
 }
 
-int qemu_load_device_state(QEMUFile *f)
+int qemu_load_device_state(QEMUFile *f, Error **errp)
 {
+    ERRP_GUARD();
     MigrationIncomingState *mis = migration_incoming_get_current();
     int ret;
 
     /* Load QEMU_VM_SECTION_FULL section */
-    ret = qemu_loadvm_state_main(f, mis);
+    ret = qemu_loadvm_state_main(f, mis, errp);
     if (ret < 0) {
-        error_report("Failed to load device state: %d", ret);
+        if (*errp != NULL) {
+            error_prepend(errp, "Failed to load device state: %d", ret);
+        }
         return ret;
     }
 
@@ -3408,6 +3436,7 @@ void qmp_xen_save_devices_state(const char *filename, bool has_live, bool live,
 
 void qmp_xen_load_devices_state(const char *filename, Error **errp)
 {
+    ERRP_GUARD();
     QEMUFile *f;
     QIOChannelFile *ioc;
     int ret;
@@ -3429,10 +3458,10 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
     f = qemu_file_new_input(QIO_CHANNEL(ioc));
     object_unref(OBJECT(ioc));
 
-    ret = qemu_loadvm_state(f);
+    ret = qemu_loadvm_state(f, errp);
     qemu_fclose(f);
-    if (ret < 0) {
-        error_setg(errp, "loading Xen device state failed");
+    if (ret < 0 && *errp != NULL) {
+        error_prepend(errp, "loading Xen device state failed: ");
     }
     migration_incoming_state_destroy();
 }
@@ -3440,6 +3469,7 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
 bool load_snapshot(const char *name, const char *vmstate,
                    bool has_devices, strList *devices, Error **errp)
 {
+    ERRP_GUARD();
     BlockDriverState *bs_vm_state;
     QEMUSnapshotInfo sn;
     QEMUFile *f;
@@ -3503,16 +3533,17 @@ bool load_snapshot(const char *name, const char *vmstate,
         ret = -EINVAL;
         goto err_drain;
     }
-    ret = qemu_loadvm_state(f);
+    ret = qemu_loadvm_state(f, errp);
     migration_incoming_state_destroy();
 
     bdrv_drain_all_end();
 
     if (ret < 0) {
-        error_setg(errp, "Error %d while loading VM state", ret);
+        if (*errp != NULL) {
+            error_prepend(errp, "Error %d while loading VM state: ", ret);
+        }
         return false;
     }
-
     return true;
 
 err_drain:
diff --git a/migration/savevm.h b/migration/savevm.h
index 2d5e9c716686f06720325e82fe90c75335ced1de..c337e3e3d111a7f28a57b90f61e8f70b71803d4e 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -64,10 +64,11 @@ void qemu_savevm_send_colo_enable(QEMUFile *f);
 void qemu_savevm_live_state(QEMUFile *f);
 int qemu_save_device_state(QEMUFile *f);
 
-int qemu_loadvm_state(QEMUFile *f);
+int qemu_loadvm_state(QEMUFile *f, Error **errp);
 void qemu_loadvm_state_cleanup(MigrationIncomingState *mis);
-int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis);
-int qemu_load_device_state(QEMUFile *f);
+int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
+                           Error **errp);
+int qemu_load_device_state(QEMUFile *f, Error **errp);
 int qemu_loadvm_approve_switchover(void);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
         bool in_postcopy);
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 741a588b7e18c6d37724b08a0101edc8bc74a0a5..1c5b76e1dd198030847971bc35637867c9d54fc0 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -549,7 +549,7 @@ static int get_tmp(QEMUFile *f, void *pv, size_t size,
 
     /* Writes the parent field which is at the start of the tmp */
     *(void **)tmp = pv;
-    ret = vmstate_load_state(f, vmsd, tmp, version_id);
+    ret = vmstate_load_state(f, vmsd, tmp, version_id, NULL);
     g_free(tmp);
     return ret;
 }
@@ -649,7 +649,7 @@ static int get_qtailq(QEMUFile *f, void *pv, size_t unused_size,
 
     while (qemu_get_byte(f)) {
         elm = g_malloc(size);
-        ret = vmstate_load_state(f, vmsd, elm, version_id);
+        ret = vmstate_load_state(f, vmsd, elm, version_id, NULL);
         if (ret) {
             return ret;
         }
@@ -803,7 +803,7 @@ static int get_gtree(QEMUFile *f, void *pv, size_t unused_size,
             key = (void *)(uintptr_t)qemu_get_be64(f);
         } else {
             key = g_malloc0(key_size);
-            ret = vmstate_load_state(f, key_vmsd, key, version_id);
+            ret = vmstate_load_state(f, key_vmsd, key, version_id, NULL);
             if (ret) {
                 error_report("%s : failed to load %s (%d)",
                              field->name, key_vmsd->name, ret);
@@ -811,7 +811,7 @@ static int get_gtree(QEMUFile *f, void *pv, size_t unused_size,
             }
         }
         val = g_malloc0(val_size);
-        ret = vmstate_load_state(f, val_vmsd, val, version_id);
+        ret = vmstate_load_state(f, val_vmsd, val, version_id, NULL);
         if (ret) {
             error_report("%s : failed to load %s (%d)",
                          field->name, val_vmsd->name, ret);
@@ -892,7 +892,7 @@ static int get_qlist(QEMUFile *f, void *pv, size_t unused_size,
 
     while (qemu_get_byte(f)) {
         elm = g_malloc(size);
-        ret = vmstate_load_state(f, vmsd, elm, version_id);
+        ret = vmstate_load_state(f, vmsd, elm, version_id, NULL);
         if (ret) {
             error_report("%s: failed to load %s (%d)", field->name,
                          vmsd->name, ret);
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 5feaa3244d259874f03048326b2497e7db32e47c..158dcc3fcddfe70ab268dc5ace6e4573fa1ccbab 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -25,7 +25,7 @@ static int vmstate_subsection_save(QEMUFile *f, const VMStateDescription *vmsd,
                                    void *opaque, JSONWriter *vmdesc,
                                    Error **errp);
 static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
-                                   void *opaque);
+                                   void *opaque, Error **errp);
 
 /* Whether this field should exist for either save or load the VM? */
 static bool
@@ -132,23 +132,23 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
 }
 
 int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
-                       void *opaque, int version_id)
+                       void *opaque, int version_id, Error **errp)
 {
     const VMStateField *field = vmsd->fields;
     int ret = 0;
 
     trace_vmstate_load_state(vmsd->name, version_id);
     if (version_id > vmsd->version_id) {
-        error_report("%s: incoming version_id %d is too new "
-                     "for local version_id %d",
-                     vmsd->name, version_id, vmsd->version_id);
+        error_setg(errp, "%s: incoming version_id %d is too new "
+                   "for local version_id %d",
+                   vmsd->name, version_id, vmsd->version_id);
         trace_vmstate_load_state_end(vmsd->name, "too new", -EINVAL);
         return -EINVAL;
     }
     if  (version_id < vmsd->minimum_version_id) {
-        error_report("%s: incoming version_id %d is too old "
-                     "for local minimum version_id  %d",
-                     vmsd->name, version_id, vmsd->minimum_version_id);
+        error_setg(errp, "%s: incoming version_id %d is too old "
+                   "for local minimum version_id  %d",
+                   vmsd->name, version_id, vmsd->minimum_version_id);
         trace_vmstate_load_state_end(vmsd->name, "too old", -EINVAL);
         return -EINVAL;
     }
@@ -192,10 +192,12 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
 
                 if (inner_field->flags & VMS_STRUCT) {
                     ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
-                                             inner_field->vmsd->version_id);
+                                             inner_field->vmsd->version_id,
+                                             errp);
                 } else if (inner_field->flags & VMS_VSTRUCT) {
                     ret = vmstate_load_state(f, inner_field->vmsd, curr_elem,
-                                             inner_field->struct_version_id);
+                                             inner_field->struct_version_id,
+                                             errp);
                 } else {
                     ret = inner_field->info->get(f, curr_elem, size,
                                                  inner_field);
@@ -211,23 +213,27 @@ int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
                 }
                 if (ret < 0) {
                     qemu_file_set_error(f, ret);
-                    error_report("Failed to load %s:%s", vmsd->name,
-                                 field->name);
+                    error_setg(errp, "Failed to load %s:%s", vmsd->name,
+                               field->name);
                     trace_vmstate_load_field_error(field->name, ret);
                     return ret;
                 }
             }
         } else if (field->flags & VMS_MUST_EXIST) {
-            error_report("Input validation failed: %s/%s",
-                         vmsd->name, field->name);
+            error_setg(errp, "Input validation failed: %s/%s",
+                       vmsd->name, field->name);
             return -1;
         }
         field++;
     }
     assert(field->flags == VMS_END);
-    ret = vmstate_subsection_load(f, vmsd, opaque);
+    ret = vmstate_subsection_load(f, vmsd, opaque, errp);
     if (ret != 0) {
         qemu_file_set_error(f, ret);
+        if (*errp == NULL) {
+            error_setg(errp, "Load of field %s/%s failed",
+                       vmsd->name, field->name);
+        }
         return ret;
     }
     if (vmsd->post_load) {
@@ -566,7 +572,7 @@ vmstate_get_subsection(const VMStateDescription * const *sub,
 }
 
 static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
-                                   void *opaque)
+                                   void *opaque, Error **errp)
 {
     trace_vmstate_subsection_load(vmsd->name);
 
@@ -605,7 +611,7 @@ static int vmstate_subsection_load(QEMUFile *f, const VMStateDescription *vmsd,
         qemu_file_skip(f, len); /* idstr */
         version_id = qemu_get_be32(f);
 
-        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id);
+        ret = vmstate_load_state(f, sub_vmsd, opaque, version_id, errp);
         if (ret) {
             trace_vmstate_subsection_load_bad(vmsd->name, idstr, "(child)");
             return ret;
diff --git a/tests/unit/test-vmstate.c b/tests/unit/test-vmstate.c
index 63f28f26f45691a70936d33e7341d16477a3471f..ca5e0ba1e3e5e2bb0a1ce39143a292f2c6f9420a 100644
--- a/tests/unit/test-vmstate.c
+++ b/tests/unit/test-vmstate.c
@@ -114,7 +114,7 @@ static int load_vmstate_one(const VMStateDescription *desc, void *obj,
     qemu_fclose(f);
 
     f = open_test_file(false);
-    ret = vmstate_load_state(f, desc, obj, version);
+    ret = vmstate_load_state(f, desc, obj, version, NULL);
     if (ret) {
         g_assert(qemu_file_get_error(f));
     } else{
@@ -365,7 +365,7 @@ static void test_load_v1(void)
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .b = 200, .e = 500, .f = 600 };
-    vmstate_load_state(loading, &vmstate_versioned, &obj, 1);
+    vmstate_load_state(loading, &vmstate_versioned, &obj, 1, NULL);
     g_assert(!qemu_file_get_error(loading));
     g_assert_cmpint(obj.a, ==, 10);
     g_assert_cmpint(obj.b, ==, 200);
@@ -391,7 +391,7 @@ static void test_load_v2(void)
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj;
-    vmstate_load_state(loading, &vmstate_versioned, &obj, 2);
+    vmstate_load_state(loading, &vmstate_versioned, &obj, 2, NULL);
     g_assert_cmpint(obj.a, ==, 10);
     g_assert_cmpint(obj.b, ==, 20);
     g_assert_cmpint(obj.c, ==, 30);
@@ -480,7 +480,7 @@ static void test_load_noskip(void)
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = false };
-    vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
+    vmstate_load_state(loading, &vmstate_skipping, &obj, 2, NULL);
     g_assert(!qemu_file_get_error(loading));
     g_assert_cmpint(obj.a, ==, 10);
     g_assert_cmpint(obj.b, ==, 20);
@@ -504,7 +504,7 @@ static void test_load_skip(void)
 
     QEMUFile *loading = open_test_file(false);
     TestStruct obj = { .skip_c_e = true, .c = 300, .e = 500 };
-    vmstate_load_state(loading, &vmstate_skipping, &obj, 2);
+    vmstate_load_state(loading, &vmstate_skipping, &obj, 2, NULL);
     g_assert(!qemu_file_get_error(loading));
     g_assert_cmpint(obj.a, ==, 10);
     g_assert_cmpint(obj.b, ==, 20);
@@ -773,7 +773,7 @@ static void test_load_q(void)
     TestQtailq tgt;
 
     QTAILQ_INIT(&tgt.q);
-    vmstate_load_state(fload, &vmstate_q, &tgt, 1);
+    vmstate_load_state(fload, &vmstate_q, &tgt, 1, NULL);
     char eof = qemu_get_byte(fload);
     g_assert(!qemu_file_get_error(fload));
     g_assert_cmpint(tgt.i16, ==, obj_q.i16);
@@ -1127,7 +1127,7 @@ static void test_gtree_load_domain(void)
 
     fload = open_test_file(false);
 
-    vmstate_load_state(fload, &vmstate_domain, dest_domain, 1);
+    vmstate_load_state(fload, &vmstate_domain, dest_domain, 1, NULL);
     eof = qemu_get_byte(fload);
     g_assert(!qemu_file_get_error(fload));
     g_assert_cmpint(orig_domain->id, ==, dest_domain->id);
@@ -1241,7 +1241,7 @@ static void test_gtree_load_iommu(void)
     qemu_fclose(fsave);
 
     fload = open_test_file(false);
-    vmstate_load_state(fload, &vmstate_iommu, dest_iommu, 1);
+    vmstate_load_state(fload, &vmstate_iommu, dest_iommu, 1, NULL);
     eof = qemu_get_byte(fload);
     g_assert(!qemu_file_get_error(fload));
     g_assert_cmpint(orig_iommu->id, ==, dest_iommu->id);
@@ -1376,7 +1376,7 @@ static void test_load_qlist(void)
     qemu_fclose(fsave);
 
     fload = open_test_file(false);
-    vmstate_load_state(fload, &vmstate_container, dest_container, 1);
+    vmstate_load_state(fload, &vmstate_container, dest_container, 1, NULL);
     eof = qemu_get_byte(fload);
     g_assert(!qemu_file_get_error(fload));
     g_assert_cmpint(eof, ==, QEMU_VM_EOF);

-- 
2.49.0

Re: [PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions

Posted by Markus Armbruster 7 months, 1 week ago

Arun Menon <armenon@redhat.com> writes:

> - This is an incremental step in converting vmstate loading
>   code to report errors.
> - Minimal changes to the signature and body of the following
>   functions are done,
>   - vmstate_load()
>   - vmstate_load_state()
>   - vmstate_subsection_load()
>   - qemu_load_device_state()
>   - qemu_loadvm_state()
>   - qemu_loadvm_section_start_full()
>   - qemu_loadvm_section_part_end()
>   - qemu_loadvm_state_header()
>   - qemu_loadvm_state_main()
> - error_report() has been replaced with error_setg();
>   and in places where error has been already set,
>   error_prepend() is used to not lose information.
>
> Signed-off-by: Arun Menon <armenon@redhat.com>

[...]

> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 1ff7bd9ac425ba67cd5ca7ad97bcf570f9e19abe..056781b1c21e737583f081594d9f88b32adfd674 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -1196,7 +1196,7 @@ extern const VMStateInfo vmstate_info_qlist;
>      }
>  
>  int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
> -                       void *opaque, int version_id);
> +                       void *opaque, int version_id, Error **errp);
>  int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
>                         void *opaque, JSONWriter *vmdesc);
>  int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
> diff --git a/migration/colo.c b/migration/colo.c
> index e0f713c837f5da25d67afbd02ceb6c54024ca3af..808c980b4af3199968771cdc6cca3c2451a2f4a6 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -659,6 +659,7 @@ void migrate_start_colo_process(MigrationState *s)
>  static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>                        QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
>  {
> +    ERRP_GUARD();
>      uint64_t total_size;
>      uint64_t value;
>      Error *local_err = NULL;
       int ret;

       bql_lock();
       vm_stop_force_state(RUN_STATE_COLO);
       bql_unlock();
       trace_colo_vm_state_change("run", "stop");

       /* FIXME: This is unnecessary for periodic checkpoint mode */
       colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
                    &local_err);
       if (local_err) {
           error_propagate(errp, local_err);
           return;
       }

Avoid error_propagate() when you have ERRP_GUARD():

       colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
                    errp);
       if (*errp) {
           return;
       }

See the big comment in qapi/error.h for additional guidance.

       colo_receive_check_message(mis->from_src_file,
                          COLO_MESSAGE_VMSTATE_SEND, &local_err);
       if (local_err) {
           error_propagate(errp, local_err);
           return;
       }

Likewise.  More of the same below, not flagging it again.

> @@ -686,11 +687,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>  
>      bql_lock();
>      cpu_synchronize_all_states();
> -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, &local_err);
>      bql_unlock();
>  
>      if (ret < 0) {
> -        error_setg(errp, "Load VM's live state (ram) error");
> +        if (local_err != NULL) {

How can @local_err be null here?

> +            error_prepend(errp, "Load VM's live state (ram) error");

Since nothing has set @errp so far, it should still point to null.
error_prepend() crashes when passed a pointer to null.

> +        }
>          return;

Returns without setting an error, and leaks @local_err.

Can you pass @errp to qemu_loadvm_state_main() and call it a day?

>      }
>  
> @@ -729,9 +732,11 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>      bql_lock();
>      vmstate_loading = true;
>      colo_flush_ram_cache();
> -    ret = qemu_load_device_state(fb);
> +    ret = qemu_load_device_state(fb, &local_err);
>      if (ret < 0) {
> -        error_setg(errp, "COLO: load device state failed");
> +        if (*errp != NULL) {
> +            error_prepend(errp, "COLO: load device state failed");
> +        }

Since nothing has set @errp so far, it should still point to null, so
this will never prepend anything.

>          vmstate_loading = false;
>          bql_unlock();
>          return;

Returns without setting an error, and leaks @local_err.

Can you pass @errp to qemu_load_device_state()?

> diff --git a/migration/cpr.c b/migration/cpr.c
> index a50a57edca754b50e68fa9c294b3c89791e62ba8..0fb9fadac905c83689eed2b1193b282da679b6ef 100644
> --- a/migration/cpr.c
> +++ b/migration/cpr.c
> @@ -235,7 +235,7 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
>          return -ENOTSUP;
>      }
>  
> -    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
> +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1, errp);

If vmstate_load_state() fails, it should set @errp.

>      if (ret) {
>          error_setg(errp, "vmstate_load_state error %d", ret);

Setting it again will trip error_setv()'s assertion.

>          qemu_fclose(f);

I doubt you tested your changes to the error paths.  This is dangerous.

I did not look at the remainder of this patch.

[...]

Re: [PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions

Posted by Arun Menon 7 months, 1 week ago

Hi Markus,

Thanks for the review.

On Wed, Jul 02, 2025 at 03:01:51PM +0200, Markus Armbruster wrote:
> Arun Menon <armenon@redhat.com> writes:
> 
> > - This is an incremental step in converting vmstate loading
> >   code to report errors.
> > - Minimal changes to the signature and body of the following
> >   functions are done,
> >   - vmstate_load()
> >   - vmstate_load_state()
> >   - vmstate_subsection_load()
> >   - qemu_load_device_state()
> >   - qemu_loadvm_state()
> >   - qemu_loadvm_section_start_full()
> >   - qemu_loadvm_section_part_end()
> >   - qemu_loadvm_state_header()
> >   - qemu_loadvm_state_main()
> > - error_report() has been replaced with error_setg();
> >   and in places where error has been already set,
> >   error_prepend() is used to not lose information.
> >
> > Signed-off-by: Arun Menon <armenon@redhat.com>
> 
> [...]
> 
> > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> > index 1ff7bd9ac425ba67cd5ca7ad97bcf570f9e19abe..056781b1c21e737583f081594d9f88b32adfd674 100644
> > --- a/include/migration/vmstate.h
> > +++ b/include/migration/vmstate.h
> > @@ -1196,7 +1196,7 @@ extern const VMStateInfo vmstate_info_qlist;
> >      }
> >  
> >  int vmstate_load_state(QEMUFile *f, const VMStateDescription *vmsd,
> > -                       void *opaque, int version_id);
> > +                       void *opaque, int version_id, Error **errp);
> >  int vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
> >                         void *opaque, JSONWriter *vmdesc);
> >  int vmstate_save_state_with_err(QEMUFile *f, const VMStateDescription *vmsd,
> > diff --git a/migration/colo.c b/migration/colo.c
> > index e0f713c837f5da25d67afbd02ceb6c54024ca3af..808c980b4af3199968771cdc6cca3c2451a2f4a6 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -659,6 +659,7 @@ void migrate_start_colo_process(MigrationState *s)
> >  static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >                        QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
> >  {
> > +    ERRP_GUARD();
> >      uint64_t total_size;
> >      uint64_t value;
> >      Error *local_err = NULL;
>        int ret;
> 
>        bql_lock();
>        vm_stop_force_state(RUN_STATE_COLO);
>        bql_unlock();
>        trace_colo_vm_state_change("run", "stop");
> 
>        /* FIXME: This is unnecessary for periodic checkpoint mode */
>        colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
>                     &local_err);
>        if (local_err) {
>            error_propagate(errp, local_err);
>            return;
>        }
> 
> Avoid error_propagate() when you have ERRP_GUARD():

Something I missed; added ERRP_GUARD for using error_prepend() below.
I should have changed the preceeding error_propagate() calls.

> 
>        colo_send_message(mis->to_src_file, COLO_MESSAGE_CHECKPOINT_REPLY,
>                     errp);
>        if (*errp) {
>            return;
>        }
> 
> See the big comment in qapi/error.h for additional guidance.
I see.
> 
>        colo_receive_check_message(mis->from_src_file,
>                           COLO_MESSAGE_VMSTATE_SEND, &local_err);
>        if (local_err) {
>            error_propagate(errp, local_err);
>            return;
>        }
> 
> Likewise.  More of the same below, not flagging it again.
> 
> > @@ -686,11 +687,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >  
> >      bql_lock();
> >      cpu_synchronize_all_states();
> > -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> > +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, &local_err);
> >      bql_unlock();
> >  
> >      if (ret < 0) {
> > -        error_setg(errp, "Load VM's live state (ram) error");
> > +        if (local_err != NULL) {
> 
> How can @local_err be null here?
> 
> > +            error_prepend(errp, "Load VM's live state (ram) error");
> 
> Since nothing has set @errp so far, it should still point to null.
> error_prepend() crashes when passed a pointer to null.
> 
> > +        }
> >          return;
> 
> Returns without setting an error, and leaks @local_err.
> 
> Can you pass @errp to qemu_loadvm_state_main() and call it a day?

Yes, I am going to do that in the next version. Making sure errp is set
in all paths.
> 
> >      }
> >  
> > @@ -729,9 +732,11 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >      bql_lock();
> >      vmstate_loading = true;
> >      colo_flush_ram_cache();
> > -    ret = qemu_load_device_state(fb);
> > +    ret = qemu_load_device_state(fb, &local_err);
> >      if (ret < 0) {
> > -        error_setg(errp, "COLO: load device state failed");
> > +        if (*errp != NULL) {
> > +            error_prepend(errp, "COLO: load device state failed");
> > +        }
> 
> Since nothing has set @errp so far, it should still point to null, so
> this will never prepend anything.

I might have mixed up local_err and errp, sorry about that.
I am going to revise this.
> 
> >          vmstate_loading = false;
> >          bql_unlock();
> >          return;
> 
> Returns without setting an error, and leaks @local_err.
> 
> Can you pass @errp to qemu_load_device_state()?

Yes. I am going to do that.

> 
> > diff --git a/migration/cpr.c b/migration/cpr.c
> > index a50a57edca754b50e68fa9c294b3c89791e62ba8..0fb9fadac905c83689eed2b1193b282da679b6ef 100644
> > --- a/migration/cpr.c
> > +++ b/migration/cpr.c
> > @@ -235,7 +235,7 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
> >          return -ENOTSUP;
> >      }
> >  
> > -    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1);
> > +    ret = vmstate_load_state(f, &vmstate_cpr_state, &cpr_state, 1, errp);
> 
> If vmstate_load_state() fails, it should set @errp.
> 
> >      if (ret) {
> >          error_setg(errp, "vmstate_load_state error %d", ret);
> 
> Setting it again will trip error_setv()'s assertion.

Yes, I see. Should be changed.
> 
> >          qemu_fclose(f);
> 
> I doubt you tested your changes to the error paths.  This is dangerous.
> 
> I did not look at the remainder of this patch.
> 
> [...]
> 

I am going to revise the changes and send a new one. Thanks for the review.


Regards,
Arun.

Re: [PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions

Posted by Daniel P. Berrangé 7 months, 1 week ago

On Wed, Jul 02, 2025 at 05:06:50PM +0530, Arun Menon wrote:
> - This is an incremental step in converting vmstate loading
>   code to report errors.
> - Minimal changes to the signature and body of the following
>   functions are done,
>   - vmstate_load()
>   - vmstate_load_state()
>   - vmstate_subsection_load()
>   - qemu_load_device_state()
>   - qemu_loadvm_state()
>   - qemu_loadvm_section_start_full()
>   - qemu_loadvm_section_part_end()
>   - qemu_loadvm_state_header()
>   - qemu_loadvm_state_main()
> - error_report() has been replaced with error_setg();
>   and in places where error has been already set,
>   error_prepend() is used to not lose information.
> 
> Signed-off-by: Arun Menon <armenon@redhat.com>
> ---
>  hw/display/virtio-gpu.c     |   2 +-
>  hw/pci/pci.c                |   2 +-
>  hw/s390x/virtio-ccw.c       |   2 +-
>  hw/scsi/spapr_vscsi.c       |   2 +-
>  hw/vfio/pci.c               |   2 +-
>  hw/virtio/virtio-mmio.c     |   2 +-
>  hw/virtio/virtio-pci.c      |   2 +-
>  hw/virtio/virtio.c          |   4 +-
>  include/migration/vmstate.h |   2 +-
>  migration/colo.c            |  13 +++--
>  migration/cpr.c             |   2 +-
>  migration/migration.c       |  19 ++++--
>  migration/savevm.c          | 137 +++++++++++++++++++++++++++-----------------
>  migration/savevm.h          |   7 ++-
>  migration/vmstate-types.c   |  10 ++--
>  migration/vmstate.c         |  40 +++++++------
>  tests/unit/test-vmstate.c   |  18 +++---
>  17 files changed, 158 insertions(+), 108 deletions(-)
> 

> diff --git a/migration/colo.c b/migration/colo.c
> index e0f713c837f5da25d67afbd02ceb6c54024ca3af..808c980b4af3199968771cdc6cca3c2451a2f4a6 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -659,6 +659,7 @@ void migrate_start_colo_process(MigrationState *s)
>  static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>                        QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
>  {
> +    ERRP_GUARD();
>      uint64_t total_size;
>      uint64_t value;
>      Error *local_err = NULL;
> @@ -686,11 +687,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>  
>      bql_lock();
>      cpu_synchronize_all_states();
> -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, &local_err);
>      bql_unlock();
>  
>      if (ret < 0) {
> -        error_setg(errp, "Load VM's live state (ram) error");
> +        if (local_err != NULL) {
> +            error_prepend(errp, "Load VM's live state (ram) error");
> +        }

This doesn't look right to me.

The old code would unconditionally report an error, but this code
now only appends an error if 'qemu_loadvm_state_main' already
reported an error - if 'qemu_loadvm_state_main' was silent this
method reports nothing too.

IMHO this is a bad design for qemu_loadvm_state_main - when we
add an 'errp' to that method we *MUST* ensure it fills that
in *all* possible error code paths.

This code should then unconditionally use error_prepend().

>          return;
>      }
>  
> @@ -729,9 +732,11 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>      bql_lock();
>      vmstate_loading = true;
>      colo_flush_ram_cache();
> -    ret = qemu_load_device_state(fb);
> +    ret = qemu_load_device_state(fb, &local_err);
>      if (ret < 0) {
> -        error_setg(errp, "COLO: load device state failed");
> +        if (*errp != NULL) {
> +            error_prepend(errp, "COLO: load device state failed");
> +        }

Same flawed design here - qemu_load_device_state must guarantee it always
fills its 'errp' parameter on failure code paths.

There are more instances of this general error reporting problem through
this patch

IMHO this patch is changing too many methods at once to be confident when
reviewing it.

This would be better changing 1 single method and its *immediate* callers
only.

IOW, if we have a sequence a() -> b() -> c() -> d() all of which
currently use 'error_report', don't try to change the whole call
chain at once.

First add an "errp' to 'd()', and make 'c()' use 'error_report_err'.
Then add an "errp' to 'c'()', and make 'b()' use 'error_report_err',
...repeat..

This is what I tried todo previously to address this problem in
migration code

  https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01727.html

Though those patches are horribly outdated now, so can't be used as
is, IMHO they show the right kind of incremental conversion

With regards,
Daniel
-- 
|: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org         -o-            https://fstop138.berrange.com :|
|: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Re: [PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions

Posted by Arun Menon 7 months, 1 week ago

Hi Daniel,

Thanks for the review.

On Wed, Jul 02, 2025 at 01:08:51PM +0100, Daniel P. Berrangé wrote:
> On Wed, Jul 02, 2025 at 05:06:50PM +0530, Arun Menon wrote:
> > - This is an incremental step in converting vmstate loading
> >   code to report errors.
> > - Minimal changes to the signature and body of the following
> >   functions are done,
> >   - vmstate_load()
> >   - vmstate_load_state()
> >   - vmstate_subsection_load()
> >   - qemu_load_device_state()
> >   - qemu_loadvm_state()
> >   - qemu_loadvm_section_start_full()
> >   - qemu_loadvm_section_part_end()
> >   - qemu_loadvm_state_header()
> >   - qemu_loadvm_state_main()
> > - error_report() has been replaced with error_setg();
> >   and in places where error has been already set,
> >   error_prepend() is used to not lose information.
> > 
> > Signed-off-by: Arun Menon <armenon@redhat.com>
> > ---
> >  hw/display/virtio-gpu.c     |   2 +-
> >  hw/pci/pci.c                |   2 +-
> >  hw/s390x/virtio-ccw.c       |   2 +-
> >  hw/scsi/spapr_vscsi.c       |   2 +-
> >  hw/vfio/pci.c               |   2 +-
> >  hw/virtio/virtio-mmio.c     |   2 +-
> >  hw/virtio/virtio-pci.c      |   2 +-
> >  hw/virtio/virtio.c          |   4 +-
> >  include/migration/vmstate.h |   2 +-
> >  migration/colo.c            |  13 +++--
> >  migration/cpr.c             |   2 +-
> >  migration/migration.c       |  19 ++++--
> >  migration/savevm.c          | 137 +++++++++++++++++++++++++++-----------------
> >  migration/savevm.h          |   7 ++-
> >  migration/vmstate-types.c   |  10 ++--
> >  migration/vmstate.c         |  40 +++++++------
> >  tests/unit/test-vmstate.c   |  18 +++---
> >  17 files changed, 158 insertions(+), 108 deletions(-)
> > 
> 
> > diff --git a/migration/colo.c b/migration/colo.c
> > index e0f713c837f5da25d67afbd02ceb6c54024ca3af..808c980b4af3199968771cdc6cca3c2451a2f4a6 100644
> > --- a/migration/colo.c
> > +++ b/migration/colo.c
> > @@ -659,6 +659,7 @@ void migrate_start_colo_process(MigrationState *s)
> >  static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >                        QEMUFile *fb, QIOChannelBuffer *bioc, Error **errp)
> >  {
> > +    ERRP_GUARD();
> >      uint64_t total_size;
> >      uint64_t value;
> >      Error *local_err = NULL;
> > @@ -686,11 +687,13 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >  
> >      bql_lock();
> >      cpu_synchronize_all_states();
> > -    ret = qemu_loadvm_state_main(mis->from_src_file, mis);
> > +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, &local_err);
> >      bql_unlock();
> >  
> >      if (ret < 0) {
> > -        error_setg(errp, "Load VM's live state (ram) error");
> > +        if (local_err != NULL) {
> > +            error_prepend(errp, "Load VM's live state (ram) error");
> > +        }
> 
> This doesn't look right to me.
> 
> The old code would unconditionally report an error, but this code
> now only appends an error if 'qemu_loadvm_state_main' already
> reported an error - if 'qemu_loadvm_state_main' was silent this
> method reports nothing too.
> 
> IMHO this is a bad design for qemu_loadvm_state_main - when we
> add an 'errp' to that method we *MUST* ensure it fills that
> in *all* possible error code paths.

I understand.
In one of my previous patches, I had both the cases covered 
(in a different file though, migration.c).
Something like, https://lore.kernel.org/qemu-devel/20250624-propagate_tpm_error-v1-2-2171487a593d@redhat.com/

        if (local_err) {
            error_prepend(&local_err, "load of migration failed: %s: ",
                          strerror(-ret));
        } else {
            error_setg(&local_err, "load of migration failed: %s",
                       strerror(-ret));
        }

So in either case, the error will be set.
But now I see. We must to set errp, if it is passed in a method.
That way we can unconditionally use error_prepend().
Also, way we avoid the extra NULL checks.

> 
> This code should then unconditionally use error_prepend().
> 
> >          return;
> >      }
> >  
> > @@ -729,9 +732,11 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
> >      bql_lock();
> >      vmstate_loading = true;
> >      colo_flush_ram_cache();
> > -    ret = qemu_load_device_state(fb);
> > +    ret = qemu_load_device_state(fb, &local_err);
> >      if (ret < 0) {
> > -        error_setg(errp, "COLO: load device state failed");
> > +        if (*errp != NULL) {
> > +            error_prepend(errp, "COLO: load device state failed");
> > +        }
> 
> Same flawed design here - qemu_load_device_state must guarantee it always
> fills its 'errp' parameter on failure code paths.
> 
> There are more instances of this general error reporting problem through
> this patch
> 
> IMHO this patch is changing too many methods at once to be confident when
> reviewing it.
> 
> This would be better changing 1 single method and its *immediate* callers
> only.
> 
> IOW, if we have a sequence a() -> b() -> c() -> d() all of which
> currently use 'error_report', don't try to change the whole call
> chain at once.
> 
> First add an "errp' to 'd()', and make 'c()' use 'error_report_err'.
> Then add an "errp' to 'c'()', and make 'b()' use 'error_report_err',
> ...repeat..

I understand.

> 
> This is what I tried todo previously to address this problem in
> migration code
> 
>   https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg01727.html
> 
> Though those patches are horribly outdated now, so can't be used as
> is, IMHO they show the right kind of incremental conversion
> 
> With regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> 

I see your point. Thank you.
I am going to try that way of having a sequence of changes instead 
of changing the whole call chain at once. 
I think I will have to modify more methods in doing so.


Regards,
Arun

[PATCH v3 1/3] migration: Pass Error object errp into vm state loading functions
[PATCH v3 2/3] migration: Introduce a post_load_with_error hook
[PATCH v3 3/3] backends/tpm: Propagate vTPM error on migration failure