From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.
Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending on the
x-migration-multifd-transfer device property value.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
hw/vfio/migration-multifd.h | 5 ++
hw/vfio/migration.c | 26 +++++--
hw/vfio/trace-events | 2 +
include/hw/vfio/vfio-common.h | 8 ++
5 files changed, 174 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 7200f6f1c2a2..0cfa9d31732a 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
return true;
}
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+ assert(vfio_multifd_transfer_enabled(vbasedev));
+
+ /*
+ * Emit dummy NOP data on the main migration channel since the actual
+ * device state transfer is done via multifd channels.
+ */
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+ char *idstr,
+ uint32_t instance_id,
+ uint32_t idx,
+ Error **errp)
+{
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ g_autoptr(QEMUFile) f = NULL;
+ int ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ size_t packet_len;
+
+ bioc = qio_channel_buffer_new(0);
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+ f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+ if (vfio_save_device_config_state(f, vbasedev, errp)) {
+ return false;
+ }
+
+ ret = qemu_fflush(f);
+ if (ret) {
+ error_setg(errp, "save config state flush failed: %d", ret);
+ return false;
+ }
+
+ packet_len = sizeof(*packet) + bioc->usage;
+ packet = g_malloc0(packet_len);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+ packet->idx = idx;
+ packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+ memcpy(&packet->data, bioc->data, bioc->usage);
+
+ if (!multifd_queue_device_state(idstr, instance_id,
+ (char *)packet, packet_len)) {
+ error_setg(errp, "multifd config data queuing failed");
+ return false;
+ }
+
+ vfio_add_bytes_transferred(packet_len);
+
+ return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ * multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp)
+{
+ VFIODevice *vbasedev = d->handler_opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ bool ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ uint32_t idx;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+ return true;
+ }
+
+ trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+ d->idstr, d->instance_id);
+
+ /* We reach here with device state STOP or STOP_COPY only */
+ if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+ VFIO_DEVICE_STATE_STOP, errp)) {
+ ret = false;
+ goto ret_finish;
+ }
+
+ packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+
+ for (idx = 0; ; idx++) {
+ ssize_t data_size;
+ size_t packet_size;
+
+ if (multifd_device_state_save_thread_should_exit()) {
+ error_setg(errp, "operation cancelled");
+ ret = false;
+ goto ret_finish;
+ }
+
+ data_size = read(migration->data_fd, &packet->data,
+ migration->data_buffer_size);
+ if (data_size < 0) {
+ error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
+ idx, errno);
+ ret = false;
+ goto ret_finish;
+ } else if (data_size == 0) {
+ break;
+ }
+
+ packet->idx = idx;
+ packet_size = sizeof(*packet) + data_size;
+
+ if (!multifd_queue_device_state(d->idstr, d->instance_id,
+ (char *)packet, packet_size)) {
+ error_setg(errp, "multifd data queuing failed");
+ ret = false;
+ goto ret_finish;
+ }
+
+ vfio_add_bytes_transferred(packet_size);
+ }
+
+ ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
+ d->idstr,
+ d->instance_id,
+ idx, errp);
+
+ret_finish:
+ trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+ return ret;
+}
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 09cbb437d9d1..79780d7b5392 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp);
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b962309f7c27..69dcf2dac2fa 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
vfio_migration_send_event(vbasedev);
}
-static int vfio_migration_set_state(VFIODevice *vbasedev,
- enum vfio_device_mig_state new_state,
- enum vfio_device_mig_state recover_state,
- Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp)
{
VFIOMigration *migration = vbasedev->migration;
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
return ret;
}
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
- Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
int ret;
+ if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
+ return -EINVAL;
+ }
+
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return 0;
+ }
+
trace_vfio_save_complete_precopy_start(vbasedev->name);
/* We reach here with device state STOP or STOP_COPY only */
@@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
Error *local_err = NULL;
int ret;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return;
+ }
+
ret = vfio_save_device_config_state(f, opaque, &local_err);
if (ret) {
error_prepend(&local_err,
@@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.is_active_iterate = vfio_is_active_iterate,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
+ .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
.save_state = vfio_save_state,
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 418b378ebd29..039979bdd98f 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
vfio_save_complete_precopy_start(const char *name) " (%s)"
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_save_iterate_start(const char *name) " (%s)"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ce2bdea8a2c2..ba851917f9fc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
bool vfio_device_state_is_running(VFIODevice *vbasedev);
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
+#ifdef CONFIG_LINUX
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp);
+#endif
+
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
int vfio_load_device_config_state(QEMUFile *f, void *opaque);
#ifdef CONFIG_LINUX
On 19/02/2025 22:34, Maciej S. Szmigiero wrote:
> External email: Use caution opening links or attachments
>
>
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>
> Implement the multifd device state transfer via additional per-device
> thread inside save_live_complete_precopy_thread handler.
>
> Switch between doing the data transfer in the new handler and doing it
> in the old save_state handler depending on the
> x-migration-multifd-transfer device property value.
x-migration-multifd-transfer is not yet introduced. Maybe rephrase to:
... depending if VFIO multifd transfer is enabled or not.
>
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
> hw/vfio/migration-multifd.h | 5 ++
> hw/vfio/migration.c | 26 +++++--
> hw/vfio/trace-events | 2 +
> include/hw/vfio/vfio-common.h | 8 ++
> 5 files changed, 174 insertions(+), 6 deletions(-)
>
> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
> index 7200f6f1c2a2..0cfa9d31732a 100644
> --- a/hw/vfio/migration-multifd.c
> +++ b/hw/vfio/migration-multifd.c
> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
> return true;
> }
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
> +{
> + assert(vfio_multifd_transfer_enabled(vbasedev));
> +
> + /*
> + * Emit dummy NOP data on the main migration channel since the actual
> + * device state transfer is done via multifd channels.
> + */
> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +}
> +
> +static bool
> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
> + char *idstr,
> + uint32_t instance_id,
> + uint32_t idx,
> + Error **errp)
> +{
> + g_autoptr(QIOChannelBuffer) bioc = NULL;
> + g_autoptr(QEMUFile) f = NULL;
> + int ret;
> + g_autofree VFIODeviceStatePacket *packet = NULL;
> + size_t packet_len;
> +
> + bioc = qio_channel_buffer_new(0);
> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
> +
> + f = qemu_file_new_output(QIO_CHANNEL(bioc));
> +
> + if (vfio_save_device_config_state(f, vbasedev, errp)) {
> + return false;
> + }
> +
> + ret = qemu_fflush(f);
> + if (ret) {
> + error_setg(errp, "save config state flush failed: %d", ret);
Let's add vbasedev->name to the error message so we know which device
caused the error.
> + return false;
> + }
> +
> + packet_len = sizeof(*packet) + bioc->usage;
> + packet = g_malloc0(packet_len);
> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> + packet->idx = idx;
> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
> + memcpy(&packet->data, bioc->data, bioc->usage);
> +
> + if (!multifd_queue_device_state(idstr, instance_id,
> + (char *)packet, packet_len)) {
> + error_setg(errp, "multifd config data queuing failed");
Ditto.
> + return false;
> + }
> +
> + vfio_add_bytes_transferred(packet_len);
> +
> + return true;
> +}
> +
> +/*
> + * This thread is spawned by the migration core directly via
> + * .save_live_complete_precopy_thread SaveVMHandler.
> + *
> + * It exits after either:
> + * * completing saving the remaining device state and device config, OR:
> + * * encountering some error while doing the above, OR:
> + * * being forcefully aborted by the migration core by
> + * multifd_device_state_save_thread_should_exit() returning true.
> + */
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> + Error **errp)
> +{
> + VFIODevice *vbasedev = d->handler_opaque;
> + VFIOMigration *migration = vbasedev->migration;
> + bool ret;
> + g_autofree VFIODeviceStatePacket *packet = NULL;
> + uint32_t idx;
> +
> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
> + return true;
> + }
> +
> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
> + d->idstr, d->instance_id);
> +
> + /* We reach here with device state STOP or STOP_COPY only */
> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> + VFIO_DEVICE_STATE_STOP, errp)) {
> + ret = false;
> + goto ret_finish;
> + }
> +
> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +
> + for (idx = 0; ; idx++) {
> + ssize_t data_size;
> + size_t packet_size;
> +
> + if (multifd_device_state_save_thread_should_exit()) {
> + error_setg(errp, "operation cancelled");
Same comment as in patch #27:
IIUC, if multifd_device_state_save_thread_should_exit() returns true, it
means that some other code part already failed and set migration error, no?
If so, shouldn't we return true here? After all,
vfio_save_complete_precopy_thread didn't really fail, it just got signal
to terminate itself
> + ret = false;
> + goto ret_finish;
> + }
> +
> + data_size = read(migration->data_fd, &packet->data,
> + migration->data_buffer_size);
> + if (data_size < 0) {
> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
> + idx, errno);
Let's add vbasedev->name to the error message so we know which device
caused the error.
> + ret = false;
> + goto ret_finish;
> + } else if (data_size == 0) {
> + break;
> + }
> +
> + packet->idx = idx;
> + packet_size = sizeof(*packet) + data_size;
> +
> + if (!multifd_queue_device_state(d->idstr, d->instance_id,
> + (char *)packet, packet_size)) {
> + error_setg(errp, "multifd data queuing failed");
Ditto.
Thanks.
> + ret = false;
> + goto ret_finish;
> + }
> +
> + vfio_add_bytes_transferred(packet_size);
> + }
> +
> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
> + d->idstr,
> + d->instance_id,
> + idx, errp);
> +
> +ret_finish:
> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
> +
> + return ret;
> +}
> +
> int vfio_multifd_switchover_start(VFIODevice *vbasedev)
> {
> VFIOMigration *migration = vbasedev->migration;
> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
> index 09cbb437d9d1..79780d7b5392 100644
> --- a/hw/vfio/migration-multifd.h
> +++ b/hw/vfio/migration-multifd.h
> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
> Error **errp);
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
> +
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> + Error **errp);
> +
> int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>
> #endif
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b962309f7c27..69dcf2dac2fa 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
> vfio_migration_send_event(vbasedev);
> }
>
> -static int vfio_migration_set_state(VFIODevice *vbasedev,
> - enum vfio_device_mig_state new_state,
> - enum vfio_device_mig_state recover_state,
> - Error **errp)
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> + enum vfio_device_mig_state new_state,
> + enum vfio_device_mig_state recover_state,
> + Error **errp)
> {
> VFIOMigration *migration = vbasedev->migration;
> uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
> return ret;
> }
>
> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
> - Error **errp)
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
> {
> VFIODevice *vbasedev = opaque;
> int ret;
> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
> int ret;
>
> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
> + return -EINVAL;
> + }
> +
> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>
> vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
> @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> int ret;
> Error *local_err = NULL;
>
> + if (vfio_multifd_transfer_enabled(vbasedev)) {
> + vfio_multifd_emit_dummy_eos(vbasedev, f);
> + return 0;
> + }
> +
> trace_vfio_save_complete_precopy_start(vbasedev->name);
>
> /* We reach here with device state STOP or STOP_COPY only */
> @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
> Error *local_err = NULL;
> int ret;
>
> + if (vfio_multifd_transfer_enabled(vbasedev)) {
> + vfio_multifd_emit_dummy_eos(vbasedev, f);
> + return;
> + }
> +
> ret = vfio_save_device_config_state(f, opaque, &local_err);
> if (ret) {
> error_prepend(&local_err,
> @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
> .is_active_iterate = vfio_is_active_iterate,
> .save_live_iterate = vfio_save_iterate,
> .save_live_complete_precopy = vfio_save_complete_precopy,
> + .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
> .save_state = vfio_save_state,
> .load_setup = vfio_load_setup,
> .load_cleanup = vfio_load_cleanup,
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 418b378ebd29..039979bdd98f 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
> vfio_save_cleanup(const char *name) " (%s)"
> vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
> vfio_save_complete_precopy_start(const char *name) " (%s)"
> +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
> +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
> vfio_save_device_config_state(const char *name) " (%s)"
> vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
> vfio_save_iterate_start(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index ce2bdea8a2c2..ba851917f9fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
> bool vfio_device_state_is_running(VFIODevice *vbasedev);
> bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>
> +#ifdef CONFIG_LINUX
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> + enum vfio_device_mig_state new_state,
> + enum vfio_device_mig_state recover_state,
> + Error **errp);
> +#endif
> +
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
> int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>
> #ifdef CONFIG_LINUX
On 2.03.2025 15:41, Avihai Horon wrote:
>
> On 19/02/2025 22:34, Maciej S. Szmigiero wrote:
>> External email: Use caution opening links or attachments
>>
>>
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> Implement the multifd device state transfer via additional per-device
>> thread inside save_live_complete_precopy_thread handler.
>>
>> Switch between doing the data transfer in the new handler and doing it
>> in the old save_state handler depending on the
>> x-migration-multifd-transfer device property value.
>
> x-migration-multifd-transfer is not yet introduced. Maybe rephrase to:
>
> ... depending if VFIO multifd transfer is enabled or not.
Changed accordingly.
>>
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
>> hw/vfio/migration-multifd.h | 5 ++
>> hw/vfio/migration.c | 26 +++++--
>> hw/vfio/trace-events | 2 +
>> include/hw/vfio/vfio-common.h | 8 ++
>> 5 files changed, 174 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>> index 7200f6f1c2a2..0cfa9d31732a 100644
>> --- a/hw/vfio/migration-multifd.c
>> +++ b/hw/vfio/migration-multifd.c
>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>> return true;
>> }
>>
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> + assert(vfio_multifd_transfer_enabled(vbasedev));
>> +
>> + /*
>> + * Emit dummy NOP data on the main migration channel since the actual
>> + * device state transfer is done via multifd channels.
>> + */
>> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +}
>> +
>> +static bool
>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>> + char *idstr,
>> + uint32_t instance_id,
>> + uint32_t idx,
>> + Error **errp)
>> +{
>> + g_autoptr(QIOChannelBuffer) bioc = NULL;
>> + g_autoptr(QEMUFile) f = NULL;
>> + int ret;
>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>> + size_t packet_len;
>> +
>> + bioc = qio_channel_buffer_new(0);
>> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>> +
>> + f = qemu_file_new_output(QIO_CHANNEL(bioc));
>> +
>> + if (vfio_save_device_config_state(f, vbasedev, errp)) {
>> + return false;
>> + }
>> +
>> + ret = qemu_fflush(f);
>> + if (ret) {
>> + error_setg(errp, "save config state flush failed: %d", ret);
>
> Let's add vbasedev->name to the error message so we know which device caused the error.
Done.
>> + return false;
>> + }
>> +
>> + packet_len = sizeof(*packet) + bioc->usage;
>> + packet = g_malloc0(packet_len);
>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> + packet->idx = idx;
>> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>> + memcpy(&packet->data, bioc->data, bioc->usage);
>> +
>> + if (!multifd_queue_device_state(idstr, instance_id,
>> + (char *)packet, packet_len)) {
>> + error_setg(errp, "multifd config data queuing failed");
>
> Ditto.
Done.
>> + return false;
>> + }
>> +
>> + vfio_add_bytes_transferred(packet_len);
>> +
>> + return true;
>> +}
>> +
>> +/*
>> + * This thread is spawned by the migration core directly via
>> + * .save_live_complete_precopy_thread SaveVMHandler.
>> + *
>> + * It exits after either:
>> + * * completing saving the remaining device state and device config, OR:
>> + * * encountering some error while doing the above, OR:
>> + * * being forcefully aborted by the migration core by
>> + * multifd_device_state_save_thread_should_exit() returning true.
>> + */
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> + Error **errp)
>> +{
>> + VFIODevice *vbasedev = d->handler_opaque;
>> + VFIOMigration *migration = vbasedev->migration;
>> + bool ret;
>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>> + uint32_t idx;
>> +
>> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>> + return true;
>> + }
>> +
>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>> + d->idstr, d->instance_id);
>> +
>> + /* We reach here with device state STOP or STOP_COPY only */
>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>> + VFIO_DEVICE_STATE_STOP, errp)) {
>> + ret = false;
>> + goto ret_finish;
>> + }
>> +
>> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +
>> + for (idx = 0; ; idx++) {
>> + ssize_t data_size;
>> + size_t packet_size;
>> +
>> + if (multifd_device_state_save_thread_should_exit()) {
>> + error_setg(errp, "operation cancelled");
>
> Same comment as in patch #27:
>
> IIUC, if multifd_device_state_save_thread_should_exit() returns true, it means that some other code part already failed and set migration error, no?
> If so, shouldn't we return true here? After all, vfio_save_complete_precopy_thread didn't really fail, it just got signal to terminate itself
Same as in the "load thread" case - the thread didn't succeed with saving all the data either,
but got cancelled.
>
>> + ret = false;
>> + goto ret_finish;
>> + }
>> +
>> + data_size = read(migration->data_fd, &packet->data,
>> + migration->data_buffer_size);
>> + if (data_size < 0) {
>> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>> + idx, errno);
>
> Let's add vbasedev->name to the error message so we know which device caused the error.
Done.
>> + ret = false;
>> + goto ret_finish;
>> + } else if (data_size == 0) {
>> + break;
>> + }
>> +
>> + packet->idx = idx;
>> + packet_size = sizeof(*packet) + data_size;
>> +
>> + if (!multifd_queue_device_state(d->idstr, d->instance_id,
>> + (char *)packet, packet_size)) {
>> + error_setg(errp, "multifd data queuing failed");
>
> Ditto.
Done.
> Thanks.
>
Thanks,
Maciej
On 2/19/25 21:34, Maciej S. Szmigiero wrote:
> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>
> Implement the multifd device state transfer via additional per-device
> thread inside save_live_complete_precopy_thread handler.
>
> Switch between doing the data transfer in the new handler and doing it
> in the old save_state handler depending on the
> x-migration-multifd-transfer device property value.
>
> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
> ---
> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
> hw/vfio/migration-multifd.h | 5 ++
> hw/vfio/migration.c | 26 +++++--
> hw/vfio/trace-events | 2 +
> include/hw/vfio/vfio-common.h | 8 ++
> 5 files changed, 174 insertions(+), 6 deletions(-)
>
> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
> index 7200f6f1c2a2..0cfa9d31732a 100644
> --- a/hw/vfio/migration-multifd.c
> +++ b/hw/vfio/migration-multifd.c
> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
> return true;
> }
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
> +{
> + assert(vfio_multifd_transfer_enabled(vbasedev));
> +
> + /*
> + * Emit dummy NOP data on the main migration channel since the actual
> + * device state transfer is done via multifd channels.
> + */
> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +}
> +
> +static bool
> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
> + char *idstr,
> + uint32_t instance_id,
> + uint32_t idx,
> + Error **errp)
> +{
> + g_autoptr(QIOChannelBuffer) bioc = NULL;
> + g_autoptr(QEMUFile) f = NULL;
> + int ret;
> + g_autofree VFIODeviceStatePacket *packet = NULL;
> + size_t packet_len;
> +
> + bioc = qio_channel_buffer_new(0);
> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
> +
> + f = qemu_file_new_output(QIO_CHANNEL(bioc));
> +
> + if (vfio_save_device_config_state(f, vbasedev, errp)) {
> + return false;
> + }
> +
> + ret = qemu_fflush(f);
> + if (ret) {
> + error_setg(errp, "save config state flush failed: %d", ret);
> + return false;
> + }
> +
> + packet_len = sizeof(*packet) + bioc->usage;
> + packet = g_malloc0(packet_len);
> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> + packet->idx = idx;
> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
> + memcpy(&packet->data, bioc->data, bioc->usage);
> +
> + if (!multifd_queue_device_state(idstr, instance_id,
> + (char *)packet, packet_len)) {
> + error_setg(errp, "multifd config data queuing failed");
> + return false;
> + }
> +
> + vfio_add_bytes_transferred(packet_len);
> +
> + return true;
> +}
> +
> +/*
> + * This thread is spawned by the migration core directly via
> + * .save_live_complete_precopy_thread SaveVMHandler.
> + *
> + * It exits after either:
> + * * completing saving the remaining device state and device config, OR:
> + * * encountering some error while doing the above, OR:
> + * * being forcefully aborted by the migration core by
> + * multifd_device_state_save_thread_should_exit() returning true.
> + */
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> + Error **errp)
In qemu_savevm_state_complete_precopy_iterable(), this handler is
called :
....
if (multifd_device_state) {
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
SaveLiveCompletePrecopyThreadHandler hdlr;
if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
se->ops->has_postcopy(se->opaque)) ||
!se->ops->save_live_complete_precopy_thread) {
continue;
}
hdlr = se->ops->save_live_complete_precopy_thread;
multifd_spawn_device_state_save_thread(hdlr,
se->idstr, se->instance_id,
se->opaque);
}
}
I suggest naming it : vfio_multifd_save_complete_precopy_thread()
> +{
> + VFIODevice *vbasedev = d->handler_opaque;
> + VFIOMigration *migration = vbasedev->migration;
> + bool ret;
> + g_autofree VFIODeviceStatePacket *packet = NULL;
> + uint32_t idx;
> +
> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
> + return true;
> + }
> +
> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
> + d->idstr, d->instance_id);
> +
> + /* We reach here with device state STOP or STOP_COPY only */
> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
> + VFIO_DEVICE_STATE_STOP, errp)) {
> + ret = false;
These "ret = false" can be avoided if the variable is set at the
top of the function.
> + goto ret_finish;
goto thread_exit ?
> + }
> +
> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
> +
> + for (idx = 0; ; idx++) {
> + ssize_t data_size;
> + size_t packet_size;
> +
> + if (multifd_device_state_save_thread_should_exit()) {
> + error_setg(errp, "operation cancelled");
> + ret = false;
> + goto ret_finish;
> + }> +
> + data_size = read(migration->data_fd, &packet->data,
> + migration->data_buffer_size);
> + if (data_size < 0) {
> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
> + idx, errno);
> + ret = false;
> + goto ret_finish;
> + } else if (data_size == 0) {
> + break;
> + }
> +
> + packet->idx = idx;
> + packet_size = sizeof(*packet) + data_size;
> +
> + if (!multifd_queue_device_state(d->idstr, d->instance_id,
> + (char *)packet, packet_size)) {
> + error_setg(errp, "multifd data queuing failed");
> + ret = false;
> + goto ret_finish;
> + }
> +
> + vfio_add_bytes_transferred(packet_size);
> + }
> +
> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
> + d->idstr,
> + d->instance_id,
> + idx, errp);
> +
> +ret_finish:
> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
> +
> + return ret;
> +}
> +
> int vfio_multifd_switchover_start(VFIODevice *vbasedev)
> {
> VFIOMigration *migration = vbasedev->migration;
> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
> index 09cbb437d9d1..79780d7b5392 100644
> --- a/hw/vfio/migration-multifd.h
> +++ b/hw/vfio/migration-multifd.h
> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
> Error **errp);
>
> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
> +
> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
> + Error **errp);
> +
> int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>
> #endif
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index b962309f7c27..69dcf2dac2fa 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
> vfio_migration_send_event(vbasedev);
> }
>
> -static int vfio_migration_set_state(VFIODevice *vbasedev,
> - enum vfio_device_mig_state new_state,
> - enum vfio_device_mig_state recover_state,
> - Error **errp)
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> + enum vfio_device_mig_state new_state,
> + enum vfio_device_mig_state recover_state,
> + Error **errp)
> {
> VFIOMigration *migration = vbasedev->migration;
> uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
> return ret;
> }
>
> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
> - Error **errp)
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
> {
> VFIODevice *vbasedev = opaque;
> int ret;
> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
> int ret;
>
> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
> + return -EINVAL;
> + }
> +
please move to another patch with the similar change of patch 25.
> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>
> vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
> @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> int ret;
> Error *local_err = NULL;
>
> + if (vfio_multifd_transfer_enabled(vbasedev)) {
> + vfio_multifd_emit_dummy_eos(vbasedev, f);
> + return 0;
> + }
> +
> trace_vfio_save_complete_precopy_start(vbasedev->name);
>
> /* We reach here with device state STOP or STOP_COPY only */
> @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
> Error *local_err = NULL;
> int ret;
>
> + if (vfio_multifd_transfer_enabled(vbasedev)) {
> + vfio_multifd_emit_dummy_eos(vbasedev, f);
> + return;
> + }
> +
> ret = vfio_save_device_config_state(f, opaque, &local_err);
> if (ret) {
> error_prepend(&local_err,
> @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
> .is_active_iterate = vfio_is_active_iterate,
> .save_live_iterate = vfio_save_iterate,
> .save_live_complete_precopy = vfio_save_complete_precopy,
> + .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
> .save_state = vfio_save_state,
> .load_setup = vfio_load_setup,
> .load_cleanup = vfio_load_cleanup,
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 418b378ebd29..039979bdd98f 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
> vfio_save_cleanup(const char *name) " (%s)"
> vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
> vfio_save_complete_precopy_start(const char *name) " (%s)"
> +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
> +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
> vfio_save_device_config_state(const char *name) " (%s)"
> vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
> vfio_save_iterate_start(const char *name) " (%s)"
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index ce2bdea8a2c2..ba851917f9fc 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
> bool vfio_device_state_is_running(VFIODevice *vbasedev);
> bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>
> +#ifdef CONFIG_LINUX
> +int vfio_migration_set_state(VFIODevice *vbasedev,
> + enum vfio_device_mig_state new_state,
> + enum vfio_device_mig_state recover_state,
> + Error **errp);
please move below with the other declarations under #ifdef CONFIG_LINUX.
> +#endif
> +
> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
> int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>
> #ifdef CONFIG_LINUX
>
Thanks,
C.
On 26.02.2025 17:43, Cédric Le Goater wrote:
> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>
>> Implement the multifd device state transfer via additional per-device
>> thread inside save_live_complete_precopy_thread handler.
>>
>> Switch between doing the data transfer in the new handler and doing it
>> in the old save_state handler depending on the
>> x-migration-multifd-transfer device property value.
>>
>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>> ---
>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
>> hw/vfio/migration-multifd.h | 5 ++
>> hw/vfio/migration.c | 26 +++++--
>> hw/vfio/trace-events | 2 +
>> include/hw/vfio/vfio-common.h | 8 ++
>> 5 files changed, 174 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>> index 7200f6f1c2a2..0cfa9d31732a 100644
>> --- a/hw/vfio/migration-multifd.c
>> +++ b/hw/vfio/migration-multifd.c
>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>> return true;
>> }
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>> +{
>> + assert(vfio_multifd_transfer_enabled(vbasedev));
>> +
>> + /*
>> + * Emit dummy NOP data on the main migration channel since the actual
>> + * device state transfer is done via multifd channels.
>> + */
>> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +}
>> +
>> +static bool
>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>> + char *idstr,
>> + uint32_t instance_id,
>> + uint32_t idx,
>> + Error **errp)
>> +{
>> + g_autoptr(QIOChannelBuffer) bioc = NULL;
>> + g_autoptr(QEMUFile) f = NULL;
>> + int ret;
>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>> + size_t packet_len;
>> +
>> + bioc = qio_channel_buffer_new(0);
>> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>> +
>> + f = qemu_file_new_output(QIO_CHANNEL(bioc));
>> +
>> + if (vfio_save_device_config_state(f, vbasedev, errp)) {
>> + return false;
>> + }
>> +
>> + ret = qemu_fflush(f);
>> + if (ret) {
>> + error_setg(errp, "save config state flush failed: %d", ret);
>> + return false;
>> + }
>> +
>> + packet_len = sizeof(*packet) + bioc->usage;
>> + packet = g_malloc0(packet_len);
>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> + packet->idx = idx;
>> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>> + memcpy(&packet->data, bioc->data, bioc->usage);
>> +
>> + if (!multifd_queue_device_state(idstr, instance_id,
>> + (char *)packet, packet_len)) {
>> + error_setg(errp, "multifd config data queuing failed");
>> + return false;
>> + }
>> +
>> + vfio_add_bytes_transferred(packet_len);
>> +
>> + return true;
>> +}
>> +
>> +/*
>> + * This thread is spawned by the migration core directly via
>> + * .save_live_complete_precopy_thread SaveVMHandler.
>> + *
>> + * It exits after either:
>> + * * completing saving the remaining device state and device config, OR:
>> + * * encountering some error while doing the above, OR:
>> + * * being forcefully aborted by the migration core by
>> + * multifd_device_state_save_thread_should_exit() returning true.
>> + */
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> + Error **errp)
>
> In qemu_savevm_state_complete_precopy_iterable(), this handler is
> called :
>
> ....
> if (multifd_device_state) {
> QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
> SaveLiveCompletePrecopyThreadHandler hdlr;
>
> if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
> se->ops->has_postcopy(se->opaque)) ||
> !se->ops->save_live_complete_precopy_thread) {
> continue;
> }
>
> hdlr = se->ops->save_live_complete_precopy_thread;
> multifd_spawn_device_state_save_thread(hdlr,
> se->idstr, se->instance_id,
> se->opaque);
> }
> }
>
>
> I suggest naming it : vfio_multifd_save_complete_precopy_thread()
Renamed accordingly.
>> +{
>> + VFIODevice *vbasedev = d->handler_opaque;
>> + VFIOMigration *migration = vbasedev->migration;
>> + bool ret;
>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>> + uint32_t idx;
>> +
>> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>> + return true;
>> + }
>> +
>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>> + d->idstr, d->instance_id);
>> +
>> + /* We reach here with device state STOP or STOP_COPY only */
>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>> + VFIO_DEVICE_STATE_STOP, errp)) {
>> + ret = false;
>
> These "ret = false" can be avoided if the variable is set at the
> top of the function.
I inverted the "ret" logic here as in vfio_load_bufs_thread()
to make it false by default and set to true just before early
exit label.
>> + goto ret_finish;
>
>
> goto thread_exit ?
As I asked in one of the previous patches,
do this comment mean that your want to rename ret_finish label to
thread_exit?
>> + }
>> +
>> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>> +
>> + for (idx = 0; ; idx++) {
>> + ssize_t data_size;
>> + size_t packet_size;
>> +
>> + if (multifd_device_state_save_thread_should_exit()) {
>> + error_setg(errp, "operation cancelled");
>> + ret = false;
>> + goto ret_finish;
>> + }> +
>> + data_size = read(migration->data_fd, &packet->data,
>> + migration->data_buffer_size);
>> + if (data_size < 0) {
>> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>> + idx, errno);
>> + ret = false;
>> + goto ret_finish;
>> + } else if (data_size == 0) {
>> + break;
>> + }
>> +
>> + packet->idx = idx;
>> + packet_size = sizeof(*packet) + data_size;
>> +
>> + if (!multifd_queue_device_state(d->idstr, d->instance_id,
>> + (char *)packet, packet_size)) {
>> + error_setg(errp, "multifd data queuing failed");
>> + ret = false;
>> + goto ret_finish;
>> + }
>> +
>> + vfio_add_bytes_transferred(packet_size);
>> + }
>> +
>> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
>> + d->idstr,
>> + d->instance_id,
>> + idx, errp);
>> +
>> +ret_finish:
>> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
>> +
>> + return ret;
>> +}
>> +
>> int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>> {
>> VFIOMigration *migration = vbasedev->migration;
>> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
>> index 09cbb437d9d1..79780d7b5392 100644
>> --- a/hw/vfio/migration-multifd.h
>> +++ b/hw/vfio/migration-multifd.h
>> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>> Error **errp);
>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
>> +
>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>> + Error **errp);
>> +
>> int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>> #endif
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index b962309f7c27..69dcf2dac2fa 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
(..)
>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>> return ret;
>> }
>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>> - Error **errp)
>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>> {
>> VFIODevice *vbasedev = opaque;
>> int ret;
>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>> int ret;
>> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>> + return -EINVAL;
>> + }
>> +
>
> please move to another patch with the similar change of patch 25.
>
This patch is about the send/save side while patch 25
is called "*receive* init/cleanup".
So adding save setup to patch called "receive init" wouldn't be
consistent with that patch subject.
>> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>> vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
(..)
>> index ce2bdea8a2c2..ba851917f9fc 100644
>> --- a/include/hw/vfio/vfio-common.h
>> +++ b/include/hw/vfio/vfio-common.h
>> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>> bool vfio_device_state_is_running(VFIODevice *vbasedev);
>> bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>> +#ifdef CONFIG_LINUX
>> +int vfio_migration_set_state(VFIODevice *vbasedev,
>> + enum vfio_device_mig_state new_state,
>> + enum vfio_device_mig_state recover_state,
>> + Error **errp);
>
> please move below with the other declarations under #ifdef CONFIG_LINUX.
>
>> +#endif
>> +
>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>> int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>> #ifdef CONFIG_LINUX
>>
>
Done.
>
> Thanks,
>
> C.
Thanks,
Maciej
On 2/26/25 22:05, Maciej S. Szmigiero wrote:
> On 26.02.2025 17:43, Cédric Le Goater wrote:
>> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>>
>>> Implement the multifd device state transfer via additional per-device
>>> thread inside save_live_complete_precopy_thread handler.
>>>
>>> Switch between doing the data transfer in the new handler and doing it
>>> in the old save_state handler depending on the
>>> x-migration-multifd-transfer device property value.
>>>
>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>>> ---
>>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
>>> hw/vfio/migration-multifd.h | 5 ++
>>> hw/vfio/migration.c | 26 +++++--
>>> hw/vfio/trace-events | 2 +
>>> include/hw/vfio/vfio-common.h | 8 ++
>>> 5 files changed, 174 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>>> index 7200f6f1c2a2..0cfa9d31732a 100644
>>> --- a/hw/vfio/migration-multifd.c
>>> +++ b/hw/vfio/migration-multifd.c
>>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
>>> return true;
>>> }
>>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
>>> +{
>>> + assert(vfio_multifd_transfer_enabled(vbasedev));
>>> +
>>> + /*
>>> + * Emit dummy NOP data on the main migration channel since the actual
>>> + * device state transfer is done via multifd channels.
>>> + */
>>> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>>> +}
>>> +
>>> +static bool
>>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
>>> + char *idstr,
>>> + uint32_t instance_id,
>>> + uint32_t idx,
>>> + Error **errp)
>>> +{
>>> + g_autoptr(QIOChannelBuffer) bioc = NULL;
>>> + g_autoptr(QEMUFile) f = NULL;
>>> + int ret;
>>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>>> + size_t packet_len;
>>> +
>>> + bioc = qio_channel_buffer_new(0);
>>> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
>>> +
>>> + f = qemu_file_new_output(QIO_CHANNEL(bioc));
>>> +
>>> + if (vfio_save_device_config_state(f, vbasedev, errp)) {
>>> + return false;
>>> + }
>>> +
>>> + ret = qemu_fflush(f);
>>> + if (ret) {
>>> + error_setg(errp, "save config state flush failed: %d", ret);
>>> + return false;
>>> + }
>>> +
>>> + packet_len = sizeof(*packet) + bioc->usage;
>>> + packet = g_malloc0(packet_len);
>>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>>> + packet->idx = idx;
>>> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
>>> + memcpy(&packet->data, bioc->data, bioc->usage);
>>> +
>>> + if (!multifd_queue_device_state(idstr, instance_id,
>>> + (char *)packet, packet_len)) {
>>> + error_setg(errp, "multifd config data queuing failed");
>>> + return false;
>>> + }
>>> +
>>> + vfio_add_bytes_transferred(packet_len);
>>> +
>>> + return true;
>>> +}
>>> +
>>> +/*
>>> + * This thread is spawned by the migration core directly via
>>> + * .save_live_complete_precopy_thread SaveVMHandler.
>>> + *
>>> + * It exits after either:
>>> + * * completing saving the remaining device state and device config, OR:
>>> + * * encountering some error while doing the above, OR:
>>> + * * being forcefully aborted by the migration core by
>>> + * multifd_device_state_save_thread_should_exit() returning true.
>>> + */
>>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>>> + Error **errp)
>>
>> In qemu_savevm_state_complete_precopy_iterable(), this handler is
>> called :
>>
>> ....
>> if (multifd_device_state) {
>> QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>> SaveLiveCompletePrecopyThreadHandler hdlr;
>>
>> if (!se->ops || (in_postcopy && se->ops->has_postcopy &&
>> se->ops->has_postcopy(se->opaque)) ||
>> !se->ops->save_live_complete_precopy_thread) {
>> continue;
>> }
>>
>> hdlr = se->ops->save_live_complete_precopy_thread;
>> multifd_spawn_device_state_save_thread(hdlr,
>> se->idstr, se->instance_id,
>> se->opaque);
>> }
>> }
>>
>>
>> I suggest naming it : vfio_multifd_save_complete_precopy_thread()
>
> Renamed accordingly.
>
>>> +{
>>> + VFIODevice *vbasedev = d->handler_opaque;
>>> + VFIOMigration *migration = vbasedev->migration;
>>> + bool ret;
>>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>>> + uint32_t idx;
>>> +
>>> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
>>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>>> + return true;
>>> + }
>>> +
>>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>>> + d->idstr, d->instance_id);
>>> +
>>> + /* We reach here with device state STOP or STOP_COPY only */
>>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>> + VFIO_DEVICE_STATE_STOP, errp)) {
>>> + ret = false;
>>
>> These "ret = false" can be avoided if the variable is set at the
>> top of the function.
>
> I inverted the "ret" logic here as in vfio_load_bufs_thread()
> to make it false by default and set to true just before early
> exit label.
ok. Let's see what it looks like in v6.
>>> + goto ret_finish;
>>
>>
>> goto thread_exit ?
>
> As I asked in one of the previous patches,
> do this comment mean that your want to rename ret_finish label to
> thread_exit?
Yes. I find label 'thread_exit' more meaning full. This is minor since
there is only one 'exit' label.
>
>>> + }
>>> +
>>> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
>>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
>>> +
>>> + for (idx = 0; ; idx++) {
>>> + ssize_t data_size;
>>> + size_t packet_size;
>>> +
>>> + if (multifd_device_state_save_thread_should_exit()) {
>>> + error_setg(errp, "operation cancelled");
>>> + ret = false;
>>> + goto ret_finish;
>>> + }> +
>>> + data_size = read(migration->data_fd, &packet->data,
>>> + migration->data_buffer_size);
>>> + if (data_size < 0) {
>>> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
>>> + idx, errno);
>>> + ret = false;
>>> + goto ret_finish;
>>> + } else if (data_size == 0) {
>>> + break;
>>> + }
>>> +
>>> + packet->idx = idx;
>>> + packet_size = sizeof(*packet) + data_size;
>>> +
>>> + if (!multifd_queue_device_state(d->idstr, d->instance_id,
>>> + (char *)packet, packet_size)) {
>>> + error_setg(errp, "multifd data queuing failed");
>>> + ret = false;
>>> + goto ret_finish;
>>> + }
>>> +
>>> + vfio_add_bytes_transferred(packet_size);
>>> + }
>>> +
>>> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
>>> + d->idstr,
>>> + d->instance_id,
>>> + idx, errp);
>>> +
>>> +ret_finish:
>>> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
>>> +
>>> + return ret;
>>> +}
>>> +
>>> int vfio_multifd_switchover_start(VFIODevice *vbasedev)
>>> {
>>> VFIOMigration *migration = vbasedev->migration;
>>> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
>>> index 09cbb437d9d1..79780d7b5392 100644
>>> --- a/hw/vfio/migration-multifd.h
>>> +++ b/hw/vfio/migration-multifd.h
>>> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
>>> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
>>> Error **errp);
>>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
>>> +
>>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
>>> + Error **errp);
>>> +
>>> int vfio_multifd_switchover_start(VFIODevice *vbasedev);
>>> #endif
>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>> index b962309f7c27..69dcf2dac2fa 100644
>>> --- a/hw/vfio/migration.c
>>> +++ b/hw/vfio/migration.c
> (..)
>>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>>> return ret;
>>> }
>>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>>> - Error **errp)
>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>>> {
>>> VFIODevice *vbasedev = opaque;
>>> int ret;
>>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>>> int ret;
>>> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>>> + return -EINVAL;
>>> + }
>>> +
>>
>> please move to another patch with the similar change of patch 25.
>>
>
> This patch is about the send/save side while patch 25
> is called "*receive* init/cleanup".
>
> So adding save setup to patch called "receive init" wouldn't be
> consistent with that patch subject.
In that case, could please add an extra patch checking for the consistency
of the settings ?
Thanks,
C.
>
>>> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>>> vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
>
> (..)
>>> index ce2bdea8a2c2..ba851917f9fc 100644
>>> --- a/include/hw/vfio/vfio-common.h
>>> +++ b/include/hw/vfio/vfio-common.h
>>> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
>>> bool vfio_device_state_is_running(VFIODevice *vbasedev);
>>> bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
>>> +#ifdef CONFIG_LINUX
>>> +int vfio_migration_set_state(VFIODevice *vbasedev,
>>> + enum vfio_device_mig_state new_state,
>>> + enum vfio_device_mig_state recover_state,
>>> + Error **errp);
>>
>> please move below with the other declarations under #ifdef CONFIG_LINUX.
>>
>>> +#endif
>>> +
>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
>>> int vfio_load_device_config_state(QEMUFile *f, void *opaque);
>>> #ifdef CONFIG_LINUX
>>>
>>
>
> Done.
>
>>
>> Thanks,
>>
>> C.
>
> Thanks,
> Maciej
>
On 28.02.2025 10:13, Cédric Le Goater wrote:
> On 2/26/25 22:05, Maciej S. Szmigiero wrote:
>> On 26.02.2025 17:43, Cédric Le Goater wrote:
>>> On 2/19/25 21:34, Maciej S. Szmigiero wrote:
>>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
>>>>
>>>> Implement the multifd device state transfer via additional per-device
>>>> thread inside save_live_complete_precopy_thread handler.
>>>>
>>>> Switch between doing the data transfer in the new handler and doing it
>>>> in the old save_state handler depending on the
>>>> x-migration-multifd-transfer device property value.
>>>>
>>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
>>>> ---
>>>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
>>>> hw/vfio/migration-multifd.h | 5 ++
>>>> hw/vfio/migration.c | 26 +++++--
>>>> hw/vfio/trace-events | 2 +
>>>> include/hw/vfio/vfio-common.h | 8 ++
>>>> 5 files changed, 174 insertions(+), 6 deletions(-)
>>>>
>>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
>>>> index 7200f6f1c2a2..0cfa9d31732a 100644
>>>> --- a/hw/vfio/migration-multifd.c
>>>> +++ b/hw/vfio/migration-multifd.c
(..)
>>>> +{
>>>> + VFIODevice *vbasedev = d->handler_opaque;
>>>> + VFIOMigration *migration = vbasedev->migration;
>>>> + bool ret;
>>>> + g_autofree VFIODeviceStatePacket *packet = NULL;
>>>> + uint32_t idx;
>>>> +
>>>> + if (!vfio_multifd_transfer_enabled(vbasedev)) {
>>>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
>>>> + return true;
>>>> + }
>>>> +
>>>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
>>>> + d->idstr, d->instance_id);
>>>> +
>>>> + /* We reach here with device state STOP or STOP_COPY only */
>>>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
>>>> + VFIO_DEVICE_STATE_STOP, errp)) {
>>>> + ret = false;
>>>
>>> These "ret = false" can be avoided if the variable is set at the
>>> top of the function.
>>
>> I inverted the "ret" logic here as in vfio_load_bufs_thread()
>> to make it false by default and set to true just before early
>> exit label.
>
> ok. Let's see what it looks like in v6.
>
>>>> + goto ret_finish;
>>>
>>>
>>> goto thread_exit ?
>>
>> As I asked in one of the previous patches,
>> do this comment mean that your want to rename ret_finish label to
>> thread_exit?
>
> Yes. I find label 'thread_exit' more meaning full. This is minor since
> there is only one 'exit' label.
Renamed ret_finish to thread_exit then.
>
>>
(..)
>>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>>>> index b962309f7c27..69dcf2dac2fa 100644
>>>> --- a/hw/vfio/migration.c
>>>> +++ b/hw/vfio/migration.c
>> (..)
>>>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
>>>> return ret;
>>>> }
>>>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
>>>> - Error **errp)
>>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
>>>> {
>>>> VFIODevice *vbasedev = opaque;
>>>> int ret;
>>>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
>>>> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
>>>> int ret;
>>>> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
>>>> + return -EINVAL;
>>>> + }
>>>> +
>>>
>>> please move to another patch with the similar change of patch 25.
>>>
>>
>> This patch is about the send/save side while patch 25
>> is called "*receive* init/cleanup".
>>
>> So adding save setup to patch called "receive init" wouldn't be
>> consistent with that patch subject.
>
> In that case, could please add an extra patch checking for the consistency
> of the settings ?
I split out wiring vfio_multifd_setup() and vfio_multifd_cleanup() into
general VFIO load/save setup and cleanup methods from this patch and
patch "Multifd device state transfer support - receive init/cleanup"
into a brand new patch/commit.
By the way, due to changes discussed over the last two days
vfio_multifd_setup() (aka vfio_multifd_transfer_setup()) not only
does consistency checking but also allocates VFIOMultifd:
https://lore.kernel.org/qemu-devel/6546c3a4-bd81-42ea-88a2-b2f88ec2fbb3@maciej.szmigiero.name/
>
> Thanks,
>
> C.
>
>
>
Thanks,
Maciej
© 2016 - 2026 Red Hat, Inc.