[PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side

Maciej S. Szmigiero posted 36 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Maciej S. Szmigiero 1 month, 1 week ago
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>

Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.

Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending on the
x-migration-multifd-transfer device property value.

Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
 hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
 hw/vfio/migration-multifd.h   |   5 ++
 hw/vfio/migration.c           |  26 +++++--
 hw/vfio/trace-events          |   2 +
 include/hw/vfio/vfio-common.h |   8 ++
 5 files changed, 174 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 7200f6f1c2a2..0cfa9d31732a 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
     return true;
 }
 
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+    assert(vfio_multifd_transfer_enabled(vbasedev));
+
+    /*
+     * Emit dummy NOP data on the main migration channel since the actual
+     * device state transfer is done via multifd channels.
+     */
+    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+                                               char *idstr,
+                                               uint32_t instance_id,
+                                               uint32_t idx,
+                                               Error **errp)
+{
+    g_autoptr(QIOChannelBuffer) bioc = NULL;
+    g_autoptr(QEMUFile) f = NULL;
+    int ret;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    size_t packet_len;
+
+    bioc = qio_channel_buffer_new(0);
+    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+    f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+    if (vfio_save_device_config_state(f, vbasedev, errp)) {
+        return false;
+    }
+
+    ret = qemu_fflush(f);
+    if (ret) {
+        error_setg(errp, "save config state flush failed: %d", ret);
+        return false;
+    }
+
+    packet_len = sizeof(*packet) + bioc->usage;
+    packet = g_malloc0(packet_len);
+    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+    packet->idx = idx;
+    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+    memcpy(&packet->data, bioc->data, bioc->usage);
+
+    if (!multifd_queue_device_state(idstr, instance_id,
+                                    (char *)packet, packet_len)) {
+        error_setg(errp, "multifd config data queuing failed");
+        return false;
+    }
+
+    vfio_add_bytes_transferred(packet_len);
+
+    return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ *   multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+                                       Error **errp)
+{
+    VFIODevice *vbasedev = d->handler_opaque;
+    VFIOMigration *migration = vbasedev->migration;
+    bool ret;
+    g_autofree VFIODeviceStatePacket *packet = NULL;
+    uint32_t idx;
+
+    if (!vfio_multifd_transfer_enabled(vbasedev)) {
+        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+        return true;
+    }
+
+    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+                                                  d->idstr, d->instance_id);
+
+    /* We reach here with device state STOP or STOP_COPY only */
+    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+                                 VFIO_DEVICE_STATE_STOP, errp)) {
+        ret = false;
+        goto ret_finish;
+    }
+
+    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+
+    for (idx = 0; ; idx++) {
+        ssize_t data_size;
+        size_t packet_size;
+
+        if (multifd_device_state_save_thread_should_exit()) {
+            error_setg(errp, "operation cancelled");
+            ret = false;
+            goto ret_finish;
+        }
+
+        data_size = read(migration->data_fd, &packet->data,
+                         migration->data_buffer_size);
+        if (data_size < 0) {
+            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
+                       idx, errno);
+            ret = false;
+            goto ret_finish;
+        } else if (data_size == 0) {
+            break;
+        }
+
+        packet->idx = idx;
+        packet_size = sizeof(*packet) + data_size;
+
+        if (!multifd_queue_device_state(d->idstr, d->instance_id,
+                                        (char *)packet, packet_size)) {
+            error_setg(errp, "multifd data queuing failed");
+            ret = false;
+            goto ret_finish;
+        }
+
+        vfio_add_bytes_transferred(packet_size);
+    }
+
+    ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
+                                                         d->idstr,
+                                                         d->instance_id,
+                                                         idx, errp);
+
+ret_finish:
+    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+    return ret;
+}
+
 int vfio_multifd_switchover_start(VFIODevice *vbasedev)
 {
     VFIOMigration *migration = vbasedev->migration;
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 09cbb437d9d1..79780d7b5392 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
 bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
                             Error **errp);
 
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+                                       Error **errp);
+
 int vfio_multifd_switchover_start(VFIODevice *vbasedev);
 
 #endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b962309f7c27..69dcf2dac2fa 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
     vfio_migration_send_event(vbasedev);
 }
 
-static int vfio_migration_set_state(VFIODevice *vbasedev,
-                                    enum vfio_device_mig_state new_state,
-                                    enum vfio_device_mig_state recover_state,
-                                    Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+                             enum vfio_device_mig_state new_state,
+                             enum vfio_device_mig_state recover_state,
+                             Error **errp)
 {
     VFIOMigration *migration = vbasedev->migration;
     uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
     return ret;
 }
 
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
-                                         Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
 {
     VFIODevice *vbasedev = opaque;
     int ret;
@@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
     uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
     int ret;
 
+    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
+        return -EINVAL;
+    }
+
     qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
 
     vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
     int ret;
     Error *local_err = NULL;
 
+    if (vfio_multifd_transfer_enabled(vbasedev)) {
+        vfio_multifd_emit_dummy_eos(vbasedev, f);
+        return 0;
+    }
+
     trace_vfio_save_complete_precopy_start(vbasedev->name);
 
     /* We reach here with device state STOP or STOP_COPY only */
@@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
     Error *local_err = NULL;
     int ret;
 
+    if (vfio_multifd_transfer_enabled(vbasedev)) {
+        vfio_multifd_emit_dummy_eos(vbasedev, f);
+        return;
+    }
+
     ret = vfio_save_device_config_state(f, opaque, &local_err);
     if (ret) {
         error_prepend(&local_err,
@@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
     .is_active_iterate = vfio_is_active_iterate,
     .save_live_iterate = vfio_save_iterate,
     .save_live_complete_precopy = vfio_save_complete_precopy,
+    .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
     .save_state = vfio_save_state,
     .load_setup = vfio_load_setup,
     .load_cleanup = vfio_load_cleanup,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 418b378ebd29..039979bdd98f 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
 vfio_save_cleanup(const char *name) " (%s)"
 vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
 vfio_save_complete_precopy_start(const char *name) " (%s)"
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
 vfio_save_device_config_state(const char *name) " (%s)"
 vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
 vfio_save_iterate_start(const char *name) " (%s)"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ce2bdea8a2c2..ba851917f9fc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
 bool vfio_device_state_is_running(VFIODevice *vbasedev);
 bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
 
+#ifdef CONFIG_LINUX
+int vfio_migration_set_state(VFIODevice *vbasedev,
+                             enum vfio_device_mig_state new_state,
+                             enum vfio_device_mig_state recover_state,
+                             Error **errp);
+#endif
+
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
 int vfio_load_device_config_state(QEMUFile *f, void *opaque);
 
 #ifdef CONFIG_LINUX
Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Avihai Horon 1 month ago
On 19/02/2025 22:34, Maciej S. Szmigiero wrote:
> External email: Use caution opening links or attachments
>
>
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>
> Implement the multifd device state transfer via additional per-device
> thread inside save_live_complete_precopy_thread handler.
>
> Switch between doing the data transfer in the new handler and doing it
> in the old save_state handler depending on the
> x-migration-multifd-transfer device property value.

x-migration-multifd-transfer is not yet introduced. Maybe rephrase to:

... depending if VFIO multifd transfer is enabled or not.

>
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>   hw/vfio/migration-multifd.h   |   5 ++
>   hw/vfio/migration.c           |  26 +++++--
>   hw/vfio/trace-events          |   2 +
>   include/hw/vfio/vfio-common.h |   8 ++
>   5 files changed, 174 insertions(+), 6 deletions(-)
>
> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
> index 7200f6f1c2a2..0cfa9d31732a 100644
> --- a/hw/vfio/migration-multifd.c
> +++ b/hw/vfio/migration-multifd.c
> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>       return true;
>   }
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    assert(vfio_multifd_transfer_enabled(vbasedev));
> +
> +    /*
> +     * Emit dummy NOP data on the main migration channel since the actual
> +     * device state transfer is done via multifd channels.
> +     */
> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +}
> +
> +static bool
> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
> +                                               char *idstr,
> +                                               uint32_t instance_id,
> +                                               uint32_t idx,
> +                                               Error **errp)
> +{
> +    g_autoptr(QIOChannelBuffer) bioc = NULL;
> +    g_autoptr(QEMUFile) f = NULL;
> +    int ret;
> +    g_autofree VFIODeviceStatePacket *packet = NULL;
> +    size_t packet_len;
> +
> +    bioc = qio_channel_buffer_new(0);
> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
> +
> +    f = qemu_file_new_output(QIO_CHANNEL(bioc));
> +
> +    if (vfio_save_device_config_state(f, vbasedev, errp)) {
> +        return false;
> +    }
> +
> +    ret = qemu_fflush(f);
> +    if (ret) {
> +        error_setg(errp, "save config state flush failed: %d", ret);

Let's add vbasedev->name to the error message so we know which device 
caused the error.

> +        return false;
> +    }
> +
> +    packet_len = sizeof(*packet) + bioc->usage;
> +    packet = g_malloc0(packet_len);
> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +    packet->idx = idx;
> +    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
> +    memcpy(&packet->data, bioc->data, bioc->usage);
> +
> +    if (!multifd_queue_device_state(idstr, instance_id,
> +                                    (char *)packet, packet_len)) {
> +        error_setg(errp, "multifd config data queuing failed");

Ditto.

> +        return false;
> +    }
> +
> +    vfio_add_bytes_transferred(packet_len);
> +
> +    return true;
> +}
> +
> +/*
> + * This thread is spawned by the migration core directly via
> + * .save_live_complete_precopy_thread SaveVMHandler.
> + *
> + * It exits after either:
> + * * completing saving the remaining device state and device config, OR:
> + * * encountering some error while doing the above, OR:
> + * * being forcefully aborted by the migration core by
> + *   multifd_device_state_save_thread_should_exit() returning true.
> + */
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> +                                       Error **errp)
> +{
> +    VFIODevice *vbasedev = d->handler_opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    bool ret;
> +    g_autofree VFIODeviceStatePacket *packet = NULL;
> +    uint32_t idx;
> +
> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
> +        return true;
> +    }
> +
> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
> +                                                  d->idstr, d->instance_id);
> +
> +    /* We reach here with device state STOP or STOP_COPY only */
> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
> +        ret = false;
> +        goto ret_finish;
> +    }
> +
> +    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +
> +    for (idx = 0; ; idx++) {
> +        ssize_t data_size;
> +        size_t packet_size;
> +
> +        if (multifd_device_state_save_thread_should_exit()) {
> +            error_setg(errp, "operation cancelled");

Same comment as in patch #27:

IIUC, if multifd_device_state_save_thread_should_exit() returns true, it 
means that some other code part already failed and set migration error, no?
If so, shouldn't we return true here? After all, 
vfio_save_complete_precopy_thread didn't really fail, it just got signal 
to terminate itself

> +            ret = false;
> +            goto ret_finish;
> +        }
> +
> +        data_size = read(migration->data_fd, &packet->data,
> +                         migration->data_buffer_size);
> +        if (data_size < 0) {
> +            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
> +                       idx, errno);

Let's add vbasedev->name to the error message so we know which device 
caused the error.

> +            ret = false;
> +            goto ret_finish;
> +        } else if (data_size == 0) {
> +            break;
> +        }
> +
> +        packet->idx = idx;
> +        packet_size = sizeof(*packet) + data_size;
> +
> +        if (!multifd_queue_device_state(d->idstr, d->instance_id,
> +                                        (char *)packet, packet_size)) {
> +            error_setg(errp, "multifd data queuing failed");

Ditto.

Thanks.

> +            ret = false;
> +            goto ret_finish;
> +        }
> +
> +        vfio_add_bytes_transferred(packet_size);
> +    }
> +
> +    ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
> +                                                         d->idstr,
> +                                                         d->instance_id,
> +                                                         idx, errp);
> +
> +ret_finish:
> +    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
> +
> +    return ret;
> +}
> +
>   int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>   {
>       VFIOMigration *migration = vbasedev->migration;
> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
> index 09cbb437d9d1..79780d7b5392 100644
> --- a/hw/vfio/migration-multifd.h
> +++ b/hw/vfio/migration-multifd.h
> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>   bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>                               Error **errp);
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
> +
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> +                                       Error **errp);
> +
>   int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>
>   #endif
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b962309f7c27..69dcf2dac2fa 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
>       vfio_migration_send_event(vbasedev);
>   }
>
> -static int vfio_migration_set_state(VFIODevice *vbasedev,
> -                                    enum vfio_device_mig_state new_state,
> -                                    enum vfio_device_mig_state recover_state,
> -                                    Error **errp)
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> +                             enum vfio_device_mig_state new_state,
> +                             enum vfio_device_mig_state recover_state,
> +                             Error **errp)
>   {
>       VFIOMigration *migration = vbasedev->migration;
>       uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>       return ret;
>   }
>
> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
> -                                         Error **errp)
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>   {
>       VFIODevice *vbasedev = opaque;
>       int ret;
> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>       uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>       int ret;
>
> +    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
> +        return -EINVAL;
> +    }
> +
>       qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>
>       vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
> @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>       int ret;
>       Error *local_err = NULL;
>
> +    if (vfio_multifd_transfer_enabled(vbasedev)) {
> +        vfio_multifd_emit_dummy_eos(vbasedev, f);
> +        return 0;
> +    }
> +
>       trace_vfio_save_complete_precopy_start(vbasedev->name);
>
>       /* We reach here with device state STOP or STOP_COPY only */
> @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
>       Error *local_err = NULL;
>       int ret;
>
> +    if (vfio_multifd_transfer_enabled(vbasedev)) {
> +        vfio_multifd_emit_dummy_eos(vbasedev, f);
> +        return;
> +    }
> +
>       ret = vfio_save_device_config_state(f, opaque, &local_err);
>       if (ret) {
>           error_prepend(&local_err,
> @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
>       .is_active_iterate = vfio_is_active_iterate,
>       .save_live_iterate = vfio_save_iterate,
>       .save_live_complete_precopy = vfio_save_complete_precopy,
> +    .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
>       .save_state = vfio_save_state,
>       .load_setup = vfio_load_setup,
>       .load_cleanup = vfio_load_cleanup,
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 418b378ebd29..039979bdd98f 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
>   vfio_save_cleanup(const char *name) " (%s)"
>   vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
>   vfio_save_complete_precopy_start(const char *name) " (%s)"
> +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
> +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
>   vfio_save_device_config_state(const char *name) " (%s)"
>   vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
>   vfio_save_iterate_start(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index ce2bdea8a2c2..ba851917f9fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>   bool vfio_device_state_is_running(VFIODevice *vbasedev);
>   bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>
> +#ifdef CONFIG_LINUX
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> +                             enum vfio_device_mig_state new_state,
> +                             enum vfio_device_mig_state recover_state,
> +                             Error **errp);
> +#endif
> +
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>   int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>
>   #ifdef CONFIG_LINUX
Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Maciej S. Szmigiero 1 month ago
On 2.03.2025 15:41, Avihai Horon wrote:
> 
> On 19/02/2025 22:34, Maciej S. Szmigiero wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> Implement the multifd device state transfer via additional per-device
>> thread inside save_live_complete_precopy_thread handler.
>>
>> Switch between doing the data transfer in the new handler and doing it
>> in the old save_state handler depending on the
>> x-migration-multifd-transfer device property value.
> 
> x-migration-multifd-transfer is not yet introduced. Maybe rephrase to:
> 
> ... depending if VFIO multifd transfer is enabled or not.

Changed accordingly.

>>
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>>   hw/vfio/migration-multifd.h   |   5 ++
>>   hw/vfio/migration.c           |  26 +++++--
>>   hw/vfio/trace-events          |   2 +
>>   include/hw/vfio/vfio-common.h |   8 ++
>>   5 files changed, 174 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>> index 7200f6f1c2a2..0cfa9d31732a 100644
>> --- a/hw/vfio/migration-multifd.c
>> +++ b/hw/vfio/migration-multifd.c
>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>>       return true;
>>   }
>>
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> +    assert(vfio_multifd_transfer_enabled(vbasedev));
>> +
>> +    /*
>> +     * Emit dummy NOP data on the main migration channel since the actual
>> +     * device state transfer is done via multifd channels.
>> +     */
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +}
>> +
>> +static bool
>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>> +                                               char *idstr,
>> +                                               uint32_t instance_id,
>> +                                               uint32_t idx,
>> +                                               Error **errp)
>> +{
>> +    g_autoptr(QIOChannelBuffer) bioc = NULL;
>> +    g_autoptr(QEMUFile) f = NULL;
>> +    int ret;
>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>> +    size_t packet_len;
>> +
>> +    bioc = qio_channel_buffer_new(0);
>> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>> +
>> +    f = qemu_file_new_output(QIO_CHANNEL(bioc));
>> +
>> +    if (vfio_save_device_config_state(f, vbasedev, errp)) {
>> +        return false;
>> +    }
>> +
>> +    ret = qemu_fflush(f);
>> +    if (ret) {
>> +        error_setg(errp, "save config state flush failed: %d", ret);
> 
> Let's add vbasedev->name to the error message so we know which device caused the error.

Done.

>> +        return false;
>> +    }
>> +
>> +    packet_len = sizeof(*packet) + bioc->usage;
>> +    packet = g_malloc0(packet_len);
>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +    packet->idx = idx;
>> +    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>> +    memcpy(&packet->data, bioc->data, bioc->usage);
>> +
>> +    if (!multifd_queue_device_state(idstr, instance_id,
>> +                                    (char *)packet, packet_len)) {
>> +        error_setg(errp, "multifd config data queuing failed");
> 
> Ditto.

Done.

>> +        return false;
>> +    }
>> +
>> +    vfio_add_bytes_transferred(packet_len);
>> +
>> +    return true;
>> +}
>> +
>> +/*
>> + * This thread is spawned by the migration core directly via
>> + * .save_live_complete_precopy_thread SaveVMHandler.
>> + *
>> + * It exits after either:
>> + * * completing saving the remaining device state and device config, OR:
>> + * * encountering some error while doing the above, OR:
>> + * * being forcefully aborted by the migration core by
>> + *   multifd_device_state_save_thread_should_exit() returning true.
>> + */
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> +                                       Error **errp)
>> +{
>> +    VFIODevice *vbasedev = d->handler_opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    bool ret;
>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>> +    uint32_t idx;
>> +
>> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
>> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>> +        return true;
>> +    }
>> +
>> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>> +                                                  d->idstr, d->instance_id);
>> +
>> +    /* We reach here with device state STOP or STOP_COPY only */
>> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
>> +        ret = false;
>> +        goto ret_finish;
>> +    }
>> +
>> +    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +
>> +    for (idx = 0; ; idx++) {
>> +        ssize_t data_size;
>> +        size_t packet_size;
>> +
>> +        if (multifd_device_state_save_thread_should_exit()) {
>> +            error_setg(errp, "operation cancelled");
> 
> Same comment as in patch #27:
> 
> IIUC, if multifd_device_state_save_thread_should_exit() returns true, it means that some other code part already failed and set migration error, no?
> If so, shouldn't we return true here? After all, vfio_save_complete_precopy_thread didn't really fail, it just got signal to terminate itself

Same as in the "load thread" case - the thread didn't succeed with saving all the data either,
but got cancelled.

> 
>> +            ret = false;
>> +            goto ret_finish;
>> +        }
>> +
>> +        data_size = read(migration->data_fd, &packet->data,
>> +                         migration->data_buffer_size);
>> +        if (data_size < 0) {
>> +            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>> +                       idx, errno);
> 
> Let's add vbasedev->name to the error message so we know which device caused the error.

Done.

>> +            ret = false;
>> +            goto ret_finish;
>> +        } else if (data_size == 0) {
>> +            break;
>> +        }
>> +
>> +        packet->idx = idx;
>> +        packet_size = sizeof(*packet) + data_size;
>> +
>> +        if (!multifd_queue_device_state(d->idstr, d->instance_id,
>> +                                        (char *)packet, packet_size)) {
>> +            error_setg(errp, "multifd data queuing failed");
> 
> Ditto.

Done.

> Thanks.
> 

Thanks,
Maciej


Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Cédric Le Goater 1 month ago
On 2/19/25 21:34, Maciej S. Szmigiero wrote:
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
> 
> Implement the multifd device state transfer via additional per-device
> thread inside save_live_complete_precopy_thread handler.
> 
> Switch between doing the data transfer in the new handler and doing it
> in the old save_state handler depending on the
> x-migration-multifd-transfer device property value.
> 
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>   hw/vfio/migration-multifd.h   |   5 ++
>   hw/vfio/migration.c           |  26 +++++--
>   hw/vfio/trace-events          |   2 +
>   include/hw/vfio/vfio-common.h |   8 ++
>   5 files changed, 174 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
> index 7200f6f1c2a2..0cfa9d31732a 100644
> --- a/hw/vfio/migration-multifd.c
> +++ b/hw/vfio/migration-multifd.c
> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>       return true;
>   }
>   
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
> +{
> +    assert(vfio_multifd_transfer_enabled(vbasedev));
> +
> +    /*
> +     * Emit dummy NOP data on the main migration channel since the actual
> +     * device state transfer is done via multifd channels.
> +     */
> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +}
> +
> +static bool
> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
> +                                               char *idstr,
> +                                               uint32_t instance_id,
> +                                               uint32_t idx,
> +                                               Error **errp)
> +{
> +    g_autoptr(QIOChannelBuffer) bioc = NULL;
> +    g_autoptr(QEMUFile) f = NULL;
> +    int ret;
> +    g_autofree VFIODeviceStatePacket *packet = NULL;
> +    size_t packet_len;
> +
> +    bioc = qio_channel_buffer_new(0);
> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
> +
> +    f = qemu_file_new_output(QIO_CHANNEL(bioc));
> +
> +    if (vfio_save_device_config_state(f, vbasedev, errp)) {
> +        return false;
> +    }
> +
> +    ret = qemu_fflush(f);
> +    if (ret) {
> +        error_setg(errp, "save config state flush failed: %d", ret);
> +        return false;
> +    }
> +
> +    packet_len = sizeof(*packet) + bioc->usage;
> +    packet = g_malloc0(packet_len);
> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +    packet->idx = idx;
> +    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
> +    memcpy(&packet->data, bioc->data, bioc->usage);
> +
> +    if (!multifd_queue_device_state(idstr, instance_id,
> +                                    (char *)packet, packet_len)) {
> +        error_setg(errp, "multifd config data queuing failed");
> +        return false;
> +    }
> +
> +    vfio_add_bytes_transferred(packet_len);
> +
> +    return true;
> +}
> +
> +/*
> + * This thread is spawned by the migration core directly via
> + * .save_live_complete_precopy_thread SaveVMHandler.
> + *
> + * It exits after either:
> + * * completing saving the remaining device state and device config, OR:
> + * * encountering some error while doing the above, OR:
> + * * being forcefully aborted by the migration core by
> + *   multifd_device_state_save_thread_should_exit() returning true.
> + */
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> +                                       Error **errp)

In qemu_savevm_state_complete_precopy_iterable(), this handler is
called :

     ....
     if (multifd_device_state) {
         QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
             SaveLiveCompletePrecopyThreadHandler hdlr;

             if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
                              se->ops->has_postcopy(se->opaque)) ||
                 !se->ops->save_live_complete_precopy_thread) {
                 continue;
             }

             hdlr = se->ops->save_live_complete_precopy_thread;
             multifd_spawn_device_state_save_thread(hdlr,
                                                    se->idstr, se->instance_id,
                                                    se->opaque);
         }
     }


I suggest naming it : vfio_multifd_save_complete_precopy_thread()

> +{
> +    VFIODevice *vbasedev = d->handler_opaque;
> +    VFIOMigration *migration = vbasedev->migration;
> +    bool ret;
> +    g_autofree VFIODeviceStatePacket *packet = NULL;
> +    uint32_t idx;
> +
> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
> +        return true;
> +    }
> +
> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
> +                                                  d->idstr, d->instance_id);
> +
> +    /* We reach here with device state STOP or STOP_COPY only */
> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
> +        ret = false;

These "ret = false" can be avoided if the variable is set at the
top of the function.

> +        goto ret_finish;


goto thread_exit ?
> +    }
> +
> +    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +
> +    for (idx = 0; ; idx++) {
> +        ssize_t data_size;
> +        size_t packet_size;
> +
> +        if (multifd_device_state_save_thread_should_exit()) {
> +            error_setg(errp, "operation cancelled");
> +            ret = false;
> +            goto ret_finish;
> +        }> +
> +        data_size = read(migration->data_fd, &packet->data,
> +                         migration->data_buffer_size);
> +        if (data_size < 0) {
> +            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
> +                       idx, errno);
> +            ret = false;
> +            goto ret_finish;
> +        } else if (data_size == 0) {
> +            break;
> +        }
> +
> +        packet->idx = idx;
> +        packet_size = sizeof(*packet) + data_size;
> +
> +        if (!multifd_queue_device_state(d->idstr, d->instance_id,
> +                                        (char *)packet, packet_size)) {
> +            error_setg(errp, "multifd data queuing failed");
> +            ret = false;
> +            goto ret_finish;
> +        }
> +
> +        vfio_add_bytes_transferred(packet_size);
> +    }
> +
> +    ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
> +                                                         d->idstr,
> +                                                         d->instance_id,
> +                                                         idx, errp);
> +
> +ret_finish:
> +    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
> +
> +    return ret;
> +}
> +
>   int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>   {
>       VFIOMigration *migration = vbasedev->migration;
> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
> index 09cbb437d9d1..79780d7b5392 100644
> --- a/hw/vfio/migration-multifd.h
> +++ b/hw/vfio/migration-multifd.h
> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>   bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>                               Error **errp);
>   
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
> +
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> +                                       Error **errp);
> +
>   int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>   
>   #endif
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b962309f7c27..69dcf2dac2fa 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
>       vfio_migration_send_event(vbasedev);
>   }
>   
> -static int vfio_migration_set_state(VFIODevice *vbasedev,
> -                                    enum vfio_device_mig_state new_state,
> -                                    enum vfio_device_mig_state recover_state,
> -                                    Error **errp)
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> +                             enum vfio_device_mig_state new_state,
> +                             enum vfio_device_mig_state recover_state,
> +                             Error **errp)
>   {
>       VFIOMigration *migration = vbasedev->migration;
>       uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>       return ret;
>   }
>   
> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
> -                                         Error **errp)
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>   {
>       VFIODevice *vbasedev = opaque;
>       int ret;
> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>       uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>       int ret;
>   
> +    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
> +        return -EINVAL;
> +    }
> +

please move to another patch with the similar change of patch 25.


>       qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>   
>       vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
> @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
>       int ret;
>       Error *local_err = NULL;
>   
> +    if (vfio_multifd_transfer_enabled(vbasedev)) {
> +        vfio_multifd_emit_dummy_eos(vbasedev, f);
> +        return 0;
> +    }
> +
>       trace_vfio_save_complete_precopy_start(vbasedev->name);
>   
>       /* We reach here with device state STOP or STOP_COPY only */
> @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
>       Error *local_err = NULL;
>       int ret;
>   
> +    if (vfio_multifd_transfer_enabled(vbasedev)) {
> +        vfio_multifd_emit_dummy_eos(vbasedev, f);
> +        return;
> +    }
> +
>       ret = vfio_save_device_config_state(f, opaque, &local_err);
>       if (ret) {
>           error_prepend(&local_err,
> @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
>       .is_active_iterate = vfio_is_active_iterate,
>       .save_live_iterate = vfio_save_iterate,
>       .save_live_complete_precopy = vfio_save_complete_precopy,
> +    .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
>       .save_state = vfio_save_state,
>       .load_setup = vfio_load_setup,
>       .load_cleanup = vfio_load_cleanup,
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 418b378ebd29..039979bdd98f 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
>   vfio_save_cleanup(const char *name) " (%s)"
>   vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
>   vfio_save_complete_precopy_start(const char *name) " (%s)"
> +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
> +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
>   vfio_save_device_config_state(const char *name) " (%s)"
>   vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
>   vfio_save_iterate_start(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index ce2bdea8a2c2..ba851917f9fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>   bool vfio_device_state_is_running(VFIODevice *vbasedev);
>   bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>   
> +#ifdef CONFIG_LINUX
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> +                             enum vfio_device_mig_state new_state,
> +                             enum vfio_device_mig_state recover_state,
> +                             Error **errp);

please move below with the other declarations under #ifdef CONFIG_LINUX.

> +#endif
> +
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>   int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>   
>   #ifdef CONFIG_LINUX
> 



Thanks,

C.
Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Maciej S. Szmigiero 1 month ago
On 26.02.2025 17:43, Cédric Le Goater wrote:
> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> Implement the multifd device state transfer via additional per-device
>> thread inside save_live_complete_precopy_thread handler.
>>
>> Switch between doing the data transfer in the new handler and doing it
>> in the old save_state handler depending on the
>> x-migration-multifd-transfer device property value.
>>
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>>   hw/vfio/migration-multifd.h   |   5 ++
>>   hw/vfio/migration.c           |  26 +++++--
>>   hw/vfio/trace-events          |   2 +
>>   include/hw/vfio/vfio-common.h |   8 ++
>>   5 files changed, 174 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>> index 7200f6f1c2a2..0cfa9d31732a 100644
>> --- a/hw/vfio/migration-multifd.c
>> +++ b/hw/vfio/migration-multifd.c
>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>>       return true;
>>   }
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> +    assert(vfio_multifd_transfer_enabled(vbasedev));
>> +
>> +    /*
>> +     * Emit dummy NOP data on the main migration channel since the actual
>> +     * device state transfer is done via multifd channels.
>> +     */
>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +}
>> +
>> +static bool
>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>> +                                               char *idstr,
>> +                                               uint32_t instance_id,
>> +                                               uint32_t idx,
>> +                                               Error **errp)
>> +{
>> +    g_autoptr(QIOChannelBuffer) bioc = NULL;
>> +    g_autoptr(QEMUFile) f = NULL;
>> +    int ret;
>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>> +    size_t packet_len;
>> +
>> +    bioc = qio_channel_buffer_new(0);
>> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>> +
>> +    f = qemu_file_new_output(QIO_CHANNEL(bioc));
>> +
>> +    if (vfio_save_device_config_state(f, vbasedev, errp)) {
>> +        return false;
>> +    }
>> +
>> +    ret = qemu_fflush(f);
>> +    if (ret) {
>> +        error_setg(errp, "save config state flush failed: %d", ret);
>> +        return false;
>> +    }
>> +
>> +    packet_len = sizeof(*packet) + bioc->usage;
>> +    packet = g_malloc0(packet_len);
>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +    packet->idx = idx;
>> +    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>> +    memcpy(&packet->data, bioc->data, bioc->usage);
>> +
>> +    if (!multifd_queue_device_state(idstr, instance_id,
>> +                                    (char *)packet, packet_len)) {
>> +        error_setg(errp, "multifd config data queuing failed");
>> +        return false;
>> +    }
>> +
>> +    vfio_add_bytes_transferred(packet_len);
>> +
>> +    return true;
>> +}
>> +
>> +/*
>> + * This thread is spawned by the migration core directly via
>> + * .save_live_complete_precopy_thread SaveVMHandler.
>> + *
>> + * It exits after either:
>> + * * completing saving the remaining device state and device config, OR:
>> + * * encountering some error while doing the above, OR:
>> + * * being forcefully aborted by the migration core by
>> + *   multifd_device_state_save_thread_should_exit() returning true.
>> + */
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> +                                       Error **errp)
> 
> In qemu_savevm_state_complete_precopy_iterable(), this handler is
> called :
> 
>      ....
>      if (multifd_device_state) {
>          QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>              SaveLiveCompletePrecopyThreadHandler hdlr;
> 
>              if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
>                               se->ops->has_postcopy(se->opaque)) ||
>                  !se->ops->save_live_complete_precopy_thread) {
>                  continue;
>              }
> 
>              hdlr = se->ops->save_live_complete_precopy_thread;
>              multifd_spawn_device_state_save_thread(hdlr,
>                                                     se->idstr, se->instance_id,
>                                                     se->opaque);
>          }
>      }
> 
> 
> I suggest naming it : vfio_multifd_save_complete_precopy_thread()

Renamed accordingly.

>> +{
>> +    VFIODevice *vbasedev = d->handler_opaque;
>> +    VFIOMigration *migration = vbasedev->migration;
>> +    bool ret;
>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>> +    uint32_t idx;
>> +
>> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
>> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>> +        return true;
>> +    }
>> +
>> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>> +                                                  d->idstr, d->instance_id);
>> +
>> +    /* We reach here with device state STOP or STOP_COPY only */
>> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
>> +        ret = false;
> 
> These "ret = false" can be avoided if the variable is set at the
> top of the function.

I inverted the "ret" logic here as in vfio_load_bufs_thread()
to make it false by default and set to true just before early
exit label.

>> +        goto ret_finish;
> 
> 
> goto thread_exit ?

As I asked in one of the previous patches,
do this comment mean that your want to rename ret_finish label to
thread_exit?

>> +    }
>> +
>> +    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +
>> +    for (idx = 0; ; idx++) {
>> +        ssize_t data_size;
>> +        size_t packet_size;
>> +
>> +        if (multifd_device_state_save_thread_should_exit()) {
>> +            error_setg(errp, "operation cancelled");
>> +            ret = false;
>> +            goto ret_finish;
>> +        }> +
>> +        data_size = read(migration->data_fd, &packet->data,
>> +                         migration->data_buffer_size);
>> +        if (data_size < 0) {
>> +            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>> +                       idx, errno);
>> +            ret = false;
>> +            goto ret_finish;
>> +        } else if (data_size == 0) {
>> +            break;
>> +        }
>> +
>> +        packet->idx = idx;
>> +        packet_size = sizeof(*packet) + data_size;
>> +
>> +        if (!multifd_queue_device_state(d->idstr, d->instance_id,
>> +                                        (char *)packet, packet_size)) {
>> +            error_setg(errp, "multifd data queuing failed");
>> +            ret = false;
>> +            goto ret_finish;
>> +        }
>> +
>> +        vfio_add_bytes_transferred(packet_size);
>> +    }
>> +
>> +    ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
>> +                                                         d->idstr,
>> +                                                         d->instance_id,
>> +                                                         idx, errp);
>> +
>> +ret_finish:
>> +    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
>> +
>> +    return ret;
>> +}
>> +
>>   int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>>   {
>>       VFIOMigration *migration = vbasedev->migration;
>> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
>> index 09cbb437d9d1..79780d7b5392 100644
>> --- a/hw/vfio/migration-multifd.h
>> +++ b/hw/vfio/migration-multifd.h
>> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>>   bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>>                               Error **errp);
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
>> +
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> +                                       Error **errp);
>> +
>>   int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>>   #endif
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index b962309f7c27..69dcf2dac2fa 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
(..)
>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>>       return ret;
>>   }
>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>> -                                         Error **errp)
>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>>   {
>>       VFIODevice *vbasedev = opaque;
>>       int ret;
>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>       uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>>       int ret;
>> +    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>> +        return -EINVAL;
>> +    }
>> +
> 
> please move to another patch with the similar change of patch 25.
> 

This patch is about the send/save side while patch 25
is called "*receive* init/cleanup".

So adding save setup to patch called "receive init" wouldn't be
consistent with that patch subject.

>>       qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>>       vfio_query_stop_copy_size(vbasedev, &stop_copy_size);

(..)
>> index ce2bdea8a2c2..ba851917f9fc 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>>   bool vfio_device_state_is_running(VFIODevice *vbasedev);
>>   bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>> +#ifdef CONFIG_LINUX
>> +int vfio_migration_set_state(VFIODevice *vbasedev,
>> +                             enum vfio_device_mig_state new_state,
>> +                             enum vfio_device_mig_state recover_state,
>> +                             Error **errp);
> 
> please move below with the other declarations under #ifdef CONFIG_LINUX.
> 
>> +#endif
>> +
>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>>   int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>>   #ifdef CONFIG_LINUX
>>
> 

Done.

> 
> Thanks,
> 
> C.

Thanks,
Maciej


Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Cédric Le Goater 1 month ago
On 2/26/25 22:05, Maciej S. Szmigiero wrote:
> On 26.02.2025 17:43, Cédric Le Goater wrote:
>> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>>
>>> Implement the multifd device state transfer via additional per-device
>>> thread inside save_live_complete_precopy_thread handler.
>>>
>>> Switch between doing the data transfer in the new handler and doing it
>>> in the old save_state handler depending on the
>>> x-migration-multifd-transfer device property value.
>>>
>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>>> ---
>>>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>>>   hw/vfio/migration-multifd.h   |   5 ++
>>>   hw/vfio/migration.c           |  26 +++++--
>>>   hw/vfio/trace-events          |   2 +
>>>   include/hw/vfio/vfio-common.h |   8 ++
>>>   5 files changed, 174 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>>> index 7200f6f1c2a2..0cfa9d31732a 100644
>>> --- a/hw/vfio/migration-multifd.c
>>> +++ b/hw/vfio/migration-multifd.c
>>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>>>       return true;
>>>   }
>>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>>> +{
>>> +    assert(vfio_multifd_transfer_enabled(vbasedev));
>>> +
>>> +    /*
>>> +     * Emit dummy NOP data on the main migration channel since the actual
>>> +     * device state transfer is done via multifd channels.
>>> +     */
>>> +    qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>>> +}
>>> +
>>> +static bool
>>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>>> +                                               char *idstr,
>>> +                                               uint32_t instance_id,
>>> +                                               uint32_t idx,
>>> +                                               Error **errp)
>>> +{
>>> +    g_autoptr(QIOChannelBuffer) bioc = NULL;
>>> +    g_autoptr(QEMUFile) f = NULL;
>>> +    int ret;
>>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>>> +    size_t packet_len;
>>> +
>>> +    bioc = qio_channel_buffer_new(0);
>>> +    qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>>> +
>>> +    f = qemu_file_new_output(QIO_CHANNEL(bioc));
>>> +
>>> +    if (vfio_save_device_config_state(f, vbasedev, errp)) {
>>> +        return false;
>>> +    }
>>> +
>>> +    ret = qemu_fflush(f);
>>> +    if (ret) {
>>> +        error_setg(errp, "save config state flush failed: %d", ret);
>>> +        return false;
>>> +    }
>>> +
>>> +    packet_len = sizeof(*packet) + bioc->usage;
>>> +    packet = g_malloc0(packet_len);
>>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>>> +    packet->idx = idx;
>>> +    packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>>> +    memcpy(&packet->data, bioc->data, bioc->usage);
>>> +
>>> +    if (!multifd_queue_device_state(idstr, instance_id,
>>> +                                    (char *)packet, packet_len)) {
>>> +        error_setg(errp, "multifd config data queuing failed");
>>> +        return false;
>>> +    }
>>> +
>>> +    vfio_add_bytes_transferred(packet_len);
>>> +
>>> +    return true;
>>> +}
>>> +
>>> +/*
>>> + * This thread is spawned by the migration core directly via
>>> + * .save_live_complete_precopy_thread SaveVMHandler.
>>> + *
>>> + * It exits after either:
>>> + * * completing saving the remaining device state and device config, OR:
>>> + * * encountering some error while doing the above, OR:
>>> + * * being forcefully aborted by the migration core by
>>> + *   multifd_device_state_save_thread_should_exit() returning true.
>>> + */
>>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>>> +                                       Error **errp)
>>
>> In qemu_savevm_state_complete_precopy_iterable(), this handler is
>> called :
>>
>>      ....
>>      if (multifd_device_state) {
>>          QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>>              SaveLiveCompletePrecopyThreadHandler hdlr;
>>
>>              if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
>>                               se->ops->has_postcopy(se->opaque)) ||
>>                  !se->ops->save_live_complete_precopy_thread) {
>>                  continue;
>>              }
>>
>>              hdlr = se->ops->save_live_complete_precopy_thread;
>>              multifd_spawn_device_state_save_thread(hdlr,
>>                                                     se->idstr, se->instance_id,
>>                                                     se->opaque);
>>          }
>>      }
>>
>>
>> I suggest naming it : vfio_multifd_save_complete_precopy_thread()
> 
> Renamed accordingly.
> 
>>> +{
>>> +    VFIODevice *vbasedev = d->handler_opaque;
>>> +    VFIOMigration *migration = vbasedev->migration;
>>> +    bool ret;
>>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>>> +    uint32_t idx;
>>> +
>>> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
>>> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>>> +        return true;
>>> +    }
>>> +
>>> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>>> +                                                  d->idstr, d->instance_id);
>>> +
>>> +    /* We reach here with device state STOP or STOP_COPY only */
>>> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
>>> +        ret = false;
>>
>> These "ret = false" can be avoided if the variable is set at the
>> top of the function.
> 
> I inverted the "ret" logic here as in vfio_load_bufs_thread()
> to make it false by default and set to true just before early
> exit label.

ok. Let's see what it looks like in v6.

>>> +        goto ret_finish;
>>
>>
>> goto thread_exit ?
> 
> As I asked in one of the previous patches,
> do this comment mean that your want to rename ret_finish label to
> thread_exit?

Yes. I find label 'thread_exit' more meaning full. This is minor since
there is only one 'exit' label.


> 
>>> +    }
>>> +
>>> +    packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>>> +    packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>>> +
>>> +    for (idx = 0; ; idx++) {
>>> +        ssize_t data_size;
>>> +        size_t packet_size;
>>> +
>>> +        if (multifd_device_state_save_thread_should_exit()) {
>>> +            error_setg(errp, "operation cancelled");
>>> +            ret = false;
>>> +            goto ret_finish;
>>> +        }> +
>>> +        data_size = read(migration->data_fd, &packet->data,
>>> +                         migration->data_buffer_size);
>>> +        if (data_size < 0) {
>>> +            error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>>> +                       idx, errno);
>>> +            ret = false;
>>> +            goto ret_finish;
>>> +        } else if (data_size == 0) {
>>> +            break;
>>> +        }
>>> +
>>> +        packet->idx = idx;
>>> +        packet_size = sizeof(*packet) + data_size;
>>> +
>>> +        if (!multifd_queue_device_state(d->idstr, d->instance_id,
>>> +                                        (char *)packet, packet_size)) {
>>> +            error_setg(errp, "multifd data queuing failed");
>>> +            ret = false;
>>> +            goto ret_finish;
>>> +        }
>>> +
>>> +        vfio_add_bytes_transferred(packet_size);
>>> +    }
>>> +
>>> +    ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
>>> +                                                         d->idstr,
>>> +                                                         d->instance_id,
>>> +                                                         idx, errp);
>>> +
>>> +ret_finish:
>>> +    trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>>   int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>>>   {
>>>       VFIOMigration *migration = vbasedev->migration;
>>> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
>>> index 09cbb437d9d1..79780d7b5392 100644
>>> --- a/hw/vfio/migration-multifd.h
>>> +++ b/hw/vfio/migration-multifd.h
>>> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>>>   bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>>>                               Error **errp);
>>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
>>> +
>>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>>> +                                       Error **errp);
>>> +
>>>   int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>>>   #endif
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index b962309f7c27..69dcf2dac2fa 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
> (..)
>>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>>>       return ret;
>>>   }
>>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>>> -                                         Error **errp)
>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>>>   {
>>>       VFIODevice *vbasedev = opaque;
>>>       int ret;
>>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>>       uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>>>       int ret;
>>> +    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>>> +        return -EINVAL;
>>> +    }
>>> +
>>
>> please move to another patch with the similar change of patch 25.
>>
> 
> This patch is about the send/save side while patch 25
> is called "*receive* init/cleanup".
> 
> So adding save setup to patch called "receive init" wouldn't be
> consistent with that patch subject.

In that case, could please add an extra patch checking for the consistency
of the settings ?


Thanks,

C.



> 
>>>       qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>>>       vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
> 
> (..)
>>> index ce2bdea8a2c2..ba851917f9fc 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>>>   bool vfio_device_state_is_running(VFIODevice *vbasedev);
>>>   bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>>> +#ifdef CONFIG_LINUX
>>> +int vfio_migration_set_state(VFIODevice *vbasedev,
>>> +                             enum vfio_device_mig_state new_state,
>>> +                             enum vfio_device_mig_state recover_state,
>>> +                             Error **errp);
>>
>> please move below with the other declarations under #ifdef CONFIG_LINUX.
>>
>>> +#endif
>>> +
>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>>>   int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>>>   #ifdef CONFIG_LINUX
>>>
>>
> 
> Done.
> 
>>
>> Thanks,
>>
>> C.
> 
> Thanks,
> Maciej
> 


Re: [PATCH v5 30/36] vfio/migration: Multifd device state transfer support - send side
Posted by Maciej S. Szmigiero 1 month ago
On 28.02.2025 10:13, Cédric Le Goater wrote:
> On 2/26/25 22:05, Maciej S. Szmigiero wrote:
>> On 26.02.2025 17:43, Cédric Le Goater wrote:
>>> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>>>
>>>> Implement the multifd device state transfer via additional per-device
>>>> thread inside save_live_complete_precopy_thread handler.
>>>>
>>>> Switch between doing the data transfer in the new handler and doing it
>>>> in the old save_state handler depending on the
>>>> x-migration-multifd-transfer device property value.
>>>>
>>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>>>> ---
>>>>   hw/vfio/migration-multifd.c   | 139 ++++++++++++++++++++++++++++++++++
>>>>   hw/vfio/migration-multifd.h   |   5 ++
>>>>   hw/vfio/migration.c           |  26 +++++--
>>>>   hw/vfio/trace-events          |   2 +
>>>>   include/hw/vfio/vfio-common.h |   8 ++
>>>>   5 files changed, 174 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>>>> index 7200f6f1c2a2..0cfa9d31732a 100644
>>>> --- a/hw/vfio/migration-multifd.c
>>>> +++ b/hw/vfio/migration-multifd.c
(..)
>>>> +{
>>>> +    VFIODevice *vbasedev = d->handler_opaque;
>>>> +    VFIOMigration *migration = vbasedev->migration;
>>>> +    bool ret;
>>>> +    g_autofree VFIODeviceStatePacket *packet = NULL;
>>>> +    uint32_t idx;
>>>> +
>>>> +    if (!vfio_multifd_transfer_enabled(vbasedev)) {
>>>> +        /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>>>> +        return true;
>>>> +    }
>>>> +
>>>> +    trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>>>> +                                                  d->idstr, d->instance_id);
>>>> +
>>>> +    /* We reach here with device state STOP or STOP_COPY only */
>>>> +    if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>>> +                                 VFIO_DEVICE_STATE_STOP, errp)) {
>>>> +        ret = false;
>>>
>>> These "ret = false" can be avoided if the variable is set at the
>>> top of the function.
>>
>> I inverted the "ret" logic here as in vfio_load_bufs_thread()
>> to make it false by default and set to true just before early
>> exit label.
> 
> ok. Let's see what it looks like in v6.
> 
>>>> +        goto ret_finish;
>>>
>>>
>>> goto thread_exit ?
>>
>> As I asked in one of the previous patches,
>> do this comment mean that your want to rename ret_finish label to
>> thread_exit?
> 
> Yes. I find label 'thread_exit' more meaning full. This is minor since
> there is only one 'exit' label.

Renamed ret_finish to thread_exit then.

> 
>>
(..)
>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>> index b962309f7c27..69dcf2dac2fa 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>> (..)
>>>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>>>>       return ret;
>>>>   }
>>>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>>>> -                                         Error **errp)
>>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>>>>   {
>>>>       VFIODevice *vbasedev = opaque;
>>>>       int ret;
>>>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>>>       uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>>>>       int ret;
>>>> +    if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>>>> +        return -EINVAL;
>>>> +    }
>>>> +
>>>
>>> please move to another patch with the similar change of patch 25.
>>>
>>
>> This patch is about the send/save side while patch 25
>> is called "*receive* init/cleanup".
>>
>> So adding save setup to patch called "receive init" wouldn't be
>> consistent with that patch subject.
> 
> In that case, could please add an extra patch checking for the consistency
> of the settings ?

I split out wiring vfio_multifd_setup() and vfio_multifd_cleanup() into
general VFIO load/save setup and cleanup methods from this patch and
patch "Multifd device state transfer support - receive init/cleanup"
into a brand new patch/commit.

By the way, due to changes discussed over the last two days
vfio_multifd_setup() (aka vfio_multifd_transfer_setup()) not only
does consistency checking but also allocates VFIOMultifd:
https://lore.kernel.org/qemu-devel/6546c3a4-bd81-42ea-88a2-b2f88ec2fbb3@maciej.szmigiero.name/

> 
> Thanks,
> 
> C.
> 
> 
> 

Thanks,
Maciej