From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Implement the multifd device state transfer via additional per-device
thread inside save_live_complete_precopy_thread handler.
Switch between doing the data transfer in the new handler and doing it
in the old save_state handler depending on the
x-migration-multifd-transfer device property value.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++
hw/vfio/migration-multifd.h | 5 ++
hw/vfio/migration.c | 26 +++++--
hw/vfio/trace-events | 2 +
include/hw/vfio/vfio-common.h | 8 ++
5 files changed, 174 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index 7200f6f1c2a2..0cfa9d31732a 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp)
return true;
}
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f)
+{
+ assert(vfio_multifd_transfer_enabled(vbasedev));
+
+ /*
+ * Emit dummy NOP data on the main migration channel since the actual
+ * device state transfer is done via multifd channels.
+ */
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+}
+
+static bool
+vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev,
+ char *idstr,
+ uint32_t instance_id,
+ uint32_t idx,
+ Error **errp)
+{
+ g_autoptr(QIOChannelBuffer) bioc = NULL;
+ g_autoptr(QEMUFile) f = NULL;
+ int ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ size_t packet_len;
+
+ bioc = qio_channel_buffer_new(0);
+ qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save");
+
+ f = qemu_file_new_output(QIO_CHANNEL(bioc));
+
+ if (vfio_save_device_config_state(f, vbasedev, errp)) {
+ return false;
+ }
+
+ ret = qemu_fflush(f);
+ if (ret) {
+ error_setg(errp, "save config state flush failed: %d", ret);
+ return false;
+ }
+
+ packet_len = sizeof(*packet) + bioc->usage;
+ packet = g_malloc0(packet_len);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+ packet->idx = idx;
+ packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE;
+ memcpy(&packet->data, bioc->data, bioc->usage);
+
+ if (!multifd_queue_device_state(idstr, instance_id,
+ (char *)packet, packet_len)) {
+ error_setg(errp, "multifd config data queuing failed");
+ return false;
+ }
+
+ vfio_add_bytes_transferred(packet_len);
+
+ return true;
+}
+
+/*
+ * This thread is spawned by the migration core directly via
+ * .save_live_complete_precopy_thread SaveVMHandler.
+ *
+ * It exits after either:
+ * * completing saving the remaining device state and device config, OR:
+ * * encountering some error while doing the above, OR:
+ * * being forcefully aborted by the migration core by
+ * multifd_device_state_save_thread_should_exit() returning true.
+ */
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp)
+{
+ VFIODevice *vbasedev = d->handler_opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ bool ret;
+ g_autofree VFIODeviceStatePacket *packet = NULL;
+ uint32_t idx;
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ /* Nothing to do, vfio_save_complete_precopy() does the transfer. */
+ return true;
+ }
+
+ trace_vfio_save_complete_precopy_thread_start(vbasedev->name,
+ d->idstr, d->instance_id);
+
+ /* We reach here with device state STOP or STOP_COPY only */
+ if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY,
+ VFIO_DEVICE_STATE_STOP, errp)) {
+ ret = false;
+ goto ret_finish;
+ }
+
+ packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size);
+ packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT;
+
+ for (idx = 0; ; idx++) {
+ ssize_t data_size;
+ size_t packet_size;
+
+ if (multifd_device_state_save_thread_should_exit()) {
+ error_setg(errp, "operation cancelled");
+ ret = false;
+ goto ret_finish;
+ }
+
+ data_size = read(migration->data_fd, &packet->data,
+ migration->data_buffer_size);
+ if (data_size < 0) {
+ error_setg(errp, "reading state buffer %" PRIu32 " failed: %d",
+ idx, errno);
+ ret = false;
+ goto ret_finish;
+ } else if (data_size == 0) {
+ break;
+ }
+
+ packet->idx = idx;
+ packet_size = sizeof(*packet) + data_size;
+
+ if (!multifd_queue_device_state(d->idstr, d->instance_id,
+ (char *)packet, packet_size)) {
+ error_setg(errp, "multifd data queuing failed");
+ ret = false;
+ goto ret_finish;
+ }
+
+ vfio_add_bytes_transferred(packet_size);
+ }
+
+ ret = vfio_save_complete_precopy_thread_config_state(vbasedev,
+ d->idstr,
+ d->instance_id,
+ idx, errp);
+
+ret_finish:
+ trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret);
+
+ return ret;
+}
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev)
{
VFIOMigration *migration = vbasedev->migration;
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 09cbb437d9d1..79780d7b5392 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
Error **errp);
+void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f);
+
+bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d,
+ Error **errp);
+
int vfio_multifd_switchover_start(VFIODevice *vbasedev);
#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index b962309f7c27..69dcf2dac2fa 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev,
vfio_migration_send_event(vbasedev);
}
-static int vfio_migration_set_state(VFIODevice *vbasedev,
- enum vfio_device_mig_state new_state,
- enum vfio_device_mig_state recover_state,
- Error **errp)
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp)
{
VFIOMigration *migration = vbasedev->migration;
uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) +
@@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev,
return ret;
}
-static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
- Error **errp)
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp)
{
VFIODevice *vbasedev = opaque;
int ret;
@@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp)
uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE;
int ret;
+ if (!vfio_multifd_transfer_setup(vbasedev, errp)) {
+ return -EINVAL;
+ }
+
qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
vfio_query_stop_copy_size(vbasedev, &stop_copy_size);
@@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
int ret;
Error *local_err = NULL;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return 0;
+ }
+
trace_vfio_save_complete_precopy_start(vbasedev->name);
/* We reach here with device state STOP or STOP_COPY only */
@@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque)
Error *local_err = NULL;
int ret;
+ if (vfio_multifd_transfer_enabled(vbasedev)) {
+ vfio_multifd_emit_dummy_eos(vbasedev, f);
+ return;
+ }
+
ret = vfio_save_device_config_state(f, opaque, &local_err);
if (ret) {
error_prepend(&local_err,
@@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.is_active_iterate = vfio_is_active_iterate,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
+ .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread,
.save_state = vfio_save_state,
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 418b378ebd29..039979bdd98f 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)"
vfio_save_cleanup(const char *name) " (%s)"
vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d"
vfio_save_complete_precopy_start(const char *name) " (%s)"
+vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32
+vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d"
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64
vfio_save_iterate_start(const char *name) " (%s)"
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ce2bdea8a2c2..ba851917f9fc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val);
bool vfio_device_state_is_running(VFIODevice *vbasedev);
bool vfio_device_state_is_precopy(VFIODevice *vbasedev);
+#ifdef CONFIG_LINUX
+int vfio_migration_set_state(VFIODevice *vbasedev,
+ enum vfio_device_mig_state new_state,
+ enum vfio_device_mig_state recover_state,
+ Error **errp);
+#endif
+
+int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp);
int vfio_load_device_config_state(QEMUFile *f, void *opaque);
#ifdef CONFIG_LINUX
On 19/02/2025 22:34, Maciej S. Szmigiero wrote: > External email: Use caution opening links or attachments > > > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > Implement the multifd device state transfer via additional per-device > thread inside save_live_complete_precopy_thread handler. > > Switch between doing the data transfer in the new handler and doing it > in the old save_state handler depending on the > x-migration-multifd-transfer device property value. x-migration-multifd-transfer is not yet introduced. Maybe rephrase to: ... depending if VFIO multifd transfer is enabled or not. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ > hw/vfio/migration-multifd.h | 5 ++ > hw/vfio/migration.c | 26 +++++-- > hw/vfio/trace-events | 2 + > include/hw/vfio/vfio-common.h | 8 ++ > 5 files changed, 174 insertions(+), 6 deletions(-) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index 7200f6f1c2a2..0cfa9d31732a 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp) > return true; > } > > +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) > +{ > + assert(vfio_multifd_transfer_enabled(vbasedev)); > + > + /* > + * Emit dummy NOP data on the main migration channel since the actual > + * device state transfer is done via multifd channels. > + */ > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > +} > + > +static bool > +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, > + char *idstr, > + uint32_t instance_id, > + uint32_t idx, > + Error **errp) > +{ > + g_autoptr(QIOChannelBuffer) bioc = NULL; > + g_autoptr(QEMUFile) f = NULL; > + int ret; > + g_autofree VFIODeviceStatePacket *packet = NULL; > + size_t packet_len; > + > + bioc = qio_channel_buffer_new(0); > + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); > + > + f = qemu_file_new_output(QIO_CHANNEL(bioc)); > + > + if (vfio_save_device_config_state(f, vbasedev, errp)) { > + return false; > + } > + > + ret = qemu_fflush(f); > + if (ret) { > + error_setg(errp, "save config state flush failed: %d", ret); Let's add vbasedev->name to the error message so we know which device caused the error. > + return false; > + } > + > + packet_len = sizeof(*packet) + bioc->usage; > + packet = g_malloc0(packet_len); > + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; > + packet->idx = idx; > + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; > + memcpy(&packet->data, bioc->data, bioc->usage); > + > + if (!multifd_queue_device_state(idstr, instance_id, > + (char *)packet, packet_len)) { > + error_setg(errp, "multifd config data queuing failed"); Ditto. > + return false; > + } > + > + vfio_add_bytes_transferred(packet_len); > + > + return true; > +} > + > +/* > + * This thread is spawned by the migration core directly via > + * .save_live_complete_precopy_thread SaveVMHandler. > + * > + * It exits after either: > + * * completing saving the remaining device state and device config, OR: > + * * encountering some error while doing the above, OR: > + * * being forcefully aborted by the migration core by > + * multifd_device_state_save_thread_should_exit() returning true. > + */ > +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, > + Error **errp) > +{ > + VFIODevice *vbasedev = d->handler_opaque; > + VFIOMigration *migration = vbasedev->migration; > + bool ret; > + g_autofree VFIODeviceStatePacket *packet = NULL; > + uint32_t idx; > + > + if (!vfio_multifd_transfer_enabled(vbasedev)) { > + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ > + return true; > + } > + > + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, > + d->idstr, d->instance_id); > + > + /* We reach here with device state STOP or STOP_COPY only */ > + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, > + VFIO_DEVICE_STATE_STOP, errp)) { > + ret = false; > + goto ret_finish; > + } > + > + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); > + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; > + > + for (idx = 0; ; idx++) { > + ssize_t data_size; > + size_t packet_size; > + > + if (multifd_device_state_save_thread_should_exit()) { > + error_setg(errp, "operation cancelled"); Same comment as in patch #27: IIUC, if multifd_device_state_save_thread_should_exit() returns true, it means that some other code part already failed and set migration error, no? If so, shouldn't we return true here? After all, vfio_save_complete_precopy_thread didn't really fail, it just got signal to terminate itself > + ret = false; > + goto ret_finish; > + } > + > + data_size = read(migration->data_fd, &packet->data, > + migration->data_buffer_size); > + if (data_size < 0) { > + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d", > + idx, errno); Let's add vbasedev->name to the error message so we know which device caused the error. > + ret = false; > + goto ret_finish; > + } else if (data_size == 0) { > + break; > + } > + > + packet->idx = idx; > + packet_size = sizeof(*packet) + data_size; > + > + if (!multifd_queue_device_state(d->idstr, d->instance_id, > + (char *)packet, packet_size)) { > + error_setg(errp, "multifd data queuing failed"); Ditto. Thanks. > + ret = false; > + goto ret_finish; > + } > + > + vfio_add_bytes_transferred(packet_size); > + } > + > + ret = vfio_save_complete_precopy_thread_config_state(vbasedev, > + d->idstr, > + d->instance_id, > + idx, errp); > + > +ret_finish: > + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret); > + > + return ret; > +} > + > int vfio_multifd_switchover_start(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h > index 09cbb437d9d1..79780d7b5392 100644 > --- a/hw/vfio/migration-multifd.h > +++ b/hw/vfio/migration-multifd.h > @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); > bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > Error **errp); > > +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); > + > +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, > + Error **errp); > + > int vfio_multifd_switchover_start(VFIODevice *vbasedev); > > #endif > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index b962309f7c27..69dcf2dac2fa 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev, > vfio_migration_send_event(vbasedev); > } > > -static int vfio_migration_set_state(VFIODevice *vbasedev, > - enum vfio_device_mig_state new_state, > - enum vfio_device_mig_state recover_state, > - Error **errp) > +int vfio_migration_set_state(VFIODevice *vbasedev, > + enum vfio_device_mig_state new_state, > + enum vfio_device_mig_state recover_state, > + Error **errp) > { > VFIOMigration *migration = vbasedev->migration; > uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + > @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, > return ret; > } > > -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, > - Error **errp) > +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) > { > VFIODevice *vbasedev = opaque; > int ret; > @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) > uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; > int ret; > > + if (!vfio_multifd_transfer_setup(vbasedev, errp)) { > + return -EINVAL; > + } > + > qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > > vfio_query_stop_copy_size(vbasedev, &stop_copy_size); > @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) > int ret; > Error *local_err = NULL; > > + if (vfio_multifd_transfer_enabled(vbasedev)) { > + vfio_multifd_emit_dummy_eos(vbasedev, f); > + return 0; > + } > + > trace_vfio_save_complete_precopy_start(vbasedev->name); > > /* We reach here with device state STOP or STOP_COPY only */ > @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) > Error *local_err = NULL; > int ret; > > + if (vfio_multifd_transfer_enabled(vbasedev)) { > + vfio_multifd_emit_dummy_eos(vbasedev, f); > + return; > + } > + > ret = vfio_save_device_config_state(f, opaque, &local_err); > if (ret) { > error_prepend(&local_err, > @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { > .is_active_iterate = vfio_is_active_iterate, > .save_live_iterate = vfio_save_iterate, > .save_live_complete_precopy = vfio_save_complete_precopy, > + .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread, > .save_state = vfio_save_state, > .load_setup = vfio_load_setup, > .load_cleanup = vfio_load_cleanup, > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 418b378ebd29..039979bdd98f 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)" > vfio_save_cleanup(const char *name) " (%s)" > vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" > vfio_save_complete_precopy_start(const char *name) " (%s)" > +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32 > +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d" > vfio_save_device_config_state(const char *name) " (%s)" > vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64 > vfio_save_iterate_start(const char *name) " (%s)" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index ce2bdea8a2c2..ba851917f9fc 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val); > bool vfio_device_state_is_running(VFIODevice *vbasedev); > bool vfio_device_state_is_precopy(VFIODevice *vbasedev); > > +#ifdef CONFIG_LINUX > +int vfio_migration_set_state(VFIODevice *vbasedev, > + enum vfio_device_mig_state new_state, > + enum vfio_device_mig_state recover_state, > + Error **errp); > +#endif > + > +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp); > int vfio_load_device_config_state(QEMUFile *f, void *opaque); > > #ifdef CONFIG_LINUX
On 2.03.2025 15:41, Avihai Horon wrote: > > On 19/02/2025 22:34, Maciej S. Szmigiero wrote: >> External email: Use caution opening links or attachments >> >> >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> Implement the multifd device state transfer via additional per-device >> thread inside save_live_complete_precopy_thread handler. >> >> Switch between doing the data transfer in the new handler and doing it >> in the old save_state handler depending on the >> x-migration-multifd-transfer device property value. > > x-migration-multifd-transfer is not yet introduced. Maybe rephrase to: > > ... depending if VFIO multifd transfer is enabled or not. Changed accordingly. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ >> hw/vfio/migration-multifd.h | 5 ++ >> hw/vfio/migration.c | 26 +++++-- >> hw/vfio/trace-events | 2 + >> include/hw/vfio/vfio-common.h | 8 ++ >> 5 files changed, 174 insertions(+), 6 deletions(-) >> >> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >> index 7200f6f1c2a2..0cfa9d31732a 100644 >> --- a/hw/vfio/migration-multifd.c >> +++ b/hw/vfio/migration-multifd.c >> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp) >> return true; >> } >> >> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) >> +{ >> + assert(vfio_multifd_transfer_enabled(vbasedev)); >> + >> + /* >> + * Emit dummy NOP data on the main migration channel since the actual >> + * device state transfer is done via multifd channels. >> + */ >> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); >> +} >> + >> +static bool >> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, >> + char *idstr, >> + uint32_t instance_id, >> + uint32_t idx, >> + Error **errp) >> +{ >> + g_autoptr(QIOChannelBuffer) bioc = NULL; >> + g_autoptr(QEMUFile) f = NULL; >> + int ret; >> + g_autofree VFIODeviceStatePacket *packet = NULL; >> + size_t packet_len; >> + >> + bioc = qio_channel_buffer_new(0); >> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); >> + >> + f = qemu_file_new_output(QIO_CHANNEL(bioc)); >> + >> + if (vfio_save_device_config_state(f, vbasedev, errp)) { >> + return false; >> + } >> + >> + ret = qemu_fflush(f); >> + if (ret) { >> + error_setg(errp, "save config state flush failed: %d", ret); > > Let's add vbasedev->name to the error message so we know which device caused the error. Done. >> + return false; >> + } >> + >> + packet_len = sizeof(*packet) + bioc->usage; >> + packet = g_malloc0(packet_len); >> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >> + packet->idx = idx; >> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; >> + memcpy(&packet->data, bioc->data, bioc->usage); >> + >> + if (!multifd_queue_device_state(idstr, instance_id, >> + (char *)packet, packet_len)) { >> + error_setg(errp, "multifd config data queuing failed"); > > Ditto. Done. >> + return false; >> + } >> + >> + vfio_add_bytes_transferred(packet_len); >> + >> + return true; >> +} >> + >> +/* >> + * This thread is spawned by the migration core directly via >> + * .save_live_complete_precopy_thread SaveVMHandler. >> + * >> + * It exits after either: >> + * * completing saving the remaining device state and device config, OR: >> + * * encountering some error while doing the above, OR: >> + * * being forcefully aborted by the migration core by >> + * multifd_device_state_save_thread_should_exit() returning true. >> + */ >> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, >> + Error **errp) >> +{ >> + VFIODevice *vbasedev = d->handler_opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + bool ret; >> + g_autofree VFIODeviceStatePacket *packet = NULL; >> + uint32_t idx; >> + >> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ >> + return true; >> + } >> + >> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, >> + d->idstr, d->instance_id); >> + >> + /* We reach here with device state STOP or STOP_COPY only */ >> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, >> + VFIO_DEVICE_STATE_STOP, errp)) { >> + ret = false; >> + goto ret_finish; >> + } >> + >> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); >> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >> + >> + for (idx = 0; ; idx++) { >> + ssize_t data_size; >> + size_t packet_size; >> + >> + if (multifd_device_state_save_thread_should_exit()) { >> + error_setg(errp, "operation cancelled"); > > Same comment as in patch #27: > > IIUC, if multifd_device_state_save_thread_should_exit() returns true, it means that some other code part already failed and set migration error, no? > If so, shouldn't we return true here? After all, vfio_save_complete_precopy_thread didn't really fail, it just got signal to terminate itself Same as in the "load thread" case - the thread didn't succeed with saving all the data either, but got cancelled. > >> + ret = false; >> + goto ret_finish; >> + } >> + >> + data_size = read(migration->data_fd, &packet->data, >> + migration->data_buffer_size); >> + if (data_size < 0) { >> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d", >> + idx, errno); > > Let's add vbasedev->name to the error message so we know which device caused the error. Done. >> + ret = false; >> + goto ret_finish; >> + } else if (data_size == 0) { >> + break; >> + } >> + >> + packet->idx = idx; >> + packet_size = sizeof(*packet) + data_size; >> + >> + if (!multifd_queue_device_state(d->idstr, d->instance_id, >> + (char *)packet, packet_size)) { >> + error_setg(errp, "multifd data queuing failed"); > > Ditto. Done. > Thanks. > Thanks, Maciej
On 2/19/25 21:34, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > Implement the multifd device state transfer via additional per-device > thread inside save_live_complete_precopy_thread handler. > > Switch between doing the data transfer in the new handler and doing it > in the old save_state handler depending on the > x-migration-multifd-transfer device property value. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ > hw/vfio/migration-multifd.h | 5 ++ > hw/vfio/migration.c | 26 +++++-- > hw/vfio/trace-events | 2 + > include/hw/vfio/vfio-common.h | 8 ++ > 5 files changed, 174 insertions(+), 6 deletions(-) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index 7200f6f1c2a2..0cfa9d31732a 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp) > return true; > } > > +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) > +{ > + assert(vfio_multifd_transfer_enabled(vbasedev)); > + > + /* > + * Emit dummy NOP data on the main migration channel since the actual > + * device state transfer is done via multifd channels. > + */ > + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); > +} > + > +static bool > +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, > + char *idstr, > + uint32_t instance_id, > + uint32_t idx, > + Error **errp) > +{ > + g_autoptr(QIOChannelBuffer) bioc = NULL; > + g_autoptr(QEMUFile) f = NULL; > + int ret; > + g_autofree VFIODeviceStatePacket *packet = NULL; > + size_t packet_len; > + > + bioc = qio_channel_buffer_new(0); > + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); > + > + f = qemu_file_new_output(QIO_CHANNEL(bioc)); > + > + if (vfio_save_device_config_state(f, vbasedev, errp)) { > + return false; > + } > + > + ret = qemu_fflush(f); > + if (ret) { > + error_setg(errp, "save config state flush failed: %d", ret); > + return false; > + } > + > + packet_len = sizeof(*packet) + bioc->usage; > + packet = g_malloc0(packet_len); > + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; > + packet->idx = idx; > + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; > + memcpy(&packet->data, bioc->data, bioc->usage); > + > + if (!multifd_queue_device_state(idstr, instance_id, > + (char *)packet, packet_len)) { > + error_setg(errp, "multifd config data queuing failed"); > + return false; > + } > + > + vfio_add_bytes_transferred(packet_len); > + > + return true; > +} > + > +/* > + * This thread is spawned by the migration core directly via > + * .save_live_complete_precopy_thread SaveVMHandler. > + * > + * It exits after either: > + * * completing saving the remaining device state and device config, OR: > + * * encountering some error while doing the above, OR: > + * * being forcefully aborted by the migration core by > + * multifd_device_state_save_thread_should_exit() returning true. > + */ > +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, > + Error **errp) In qemu_savevm_state_complete_precopy_iterable(), this handler is called : .... if (multifd_device_state) { QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { SaveLiveCompletePrecopyThreadHandler hdlr; if (!se->ops || (in_postcopy && se->ops->has_postcopy && se->ops->has_postcopy(se->opaque)) || !se->ops->save_live_complete_precopy_thread) { continue; } hdlr = se->ops->save_live_complete_precopy_thread; multifd_spawn_device_state_save_thread(hdlr, se->idstr, se->instance_id, se->opaque); } } I suggest naming it : vfio_multifd_save_complete_precopy_thread() > +{ > + VFIODevice *vbasedev = d->handler_opaque; > + VFIOMigration *migration = vbasedev->migration; > + bool ret; > + g_autofree VFIODeviceStatePacket *packet = NULL; > + uint32_t idx; > + > + if (!vfio_multifd_transfer_enabled(vbasedev)) { > + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ > + return true; > + } > + > + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, > + d->idstr, d->instance_id); > + > + /* We reach here with device state STOP or STOP_COPY only */ > + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, > + VFIO_DEVICE_STATE_STOP, errp)) { > + ret = false; These "ret = false" can be avoided if the variable is set at the top of the function. > + goto ret_finish; goto thread_exit ? > + } > + > + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); > + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; > + > + for (idx = 0; ; idx++) { > + ssize_t data_size; > + size_t packet_size; > + > + if (multifd_device_state_save_thread_should_exit()) { > + error_setg(errp, "operation cancelled"); > + ret = false; > + goto ret_finish; > + }> + > + data_size = read(migration->data_fd, &packet->data, > + migration->data_buffer_size); > + if (data_size < 0) { > + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d", > + idx, errno); > + ret = false; > + goto ret_finish; > + } else if (data_size == 0) { > + break; > + } > + > + packet->idx = idx; > + packet_size = sizeof(*packet) + data_size; > + > + if (!multifd_queue_device_state(d->idstr, d->instance_id, > + (char *)packet, packet_size)) { > + error_setg(errp, "multifd data queuing failed"); > + ret = false; > + goto ret_finish; > + } > + > + vfio_add_bytes_transferred(packet_size); > + } > + > + ret = vfio_save_complete_precopy_thread_config_state(vbasedev, > + d->idstr, > + d->instance_id, > + idx, errp); > + > +ret_finish: > + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret); > + > + return ret; > +} > + > int vfio_multifd_switchover_start(VFIODevice *vbasedev) > { > VFIOMigration *migration = vbasedev->migration; > diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h > index 09cbb437d9d1..79780d7b5392 100644 > --- a/hw/vfio/migration-multifd.h > +++ b/hw/vfio/migration-multifd.h > @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); > bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > Error **errp); > > +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); > + > +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, > + Error **errp); > + > int vfio_multifd_switchover_start(VFIODevice *vbasedev); > > #endif > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index b962309f7c27..69dcf2dac2fa 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -120,10 +120,10 @@ static void vfio_migration_set_device_state(VFIODevice *vbasedev, > vfio_migration_send_event(vbasedev); > } > > -static int vfio_migration_set_state(VFIODevice *vbasedev, > - enum vfio_device_mig_state new_state, > - enum vfio_device_mig_state recover_state, > - Error **errp) > +int vfio_migration_set_state(VFIODevice *vbasedev, > + enum vfio_device_mig_state new_state, > + enum vfio_device_mig_state recover_state, > + Error **errp) > { > VFIOMigration *migration = vbasedev->migration; > uint64_t buf[DIV_ROUND_UP(sizeof(struct vfio_device_feature) + > @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, > return ret; > } > > -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, > - Error **errp) > +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) > { > VFIODevice *vbasedev = opaque; > int ret; > @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) > uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; > int ret; > > + if (!vfio_multifd_transfer_setup(vbasedev, errp)) { > + return -EINVAL; > + } > + please move to another patch with the similar change of patch 25. > qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); > > vfio_query_stop_copy_size(vbasedev, &stop_copy_size); > @@ -631,6 +634,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) > int ret; > Error *local_err = NULL; > > + if (vfio_multifd_transfer_enabled(vbasedev)) { > + vfio_multifd_emit_dummy_eos(vbasedev, f); > + return 0; > + } > + > trace_vfio_save_complete_precopy_start(vbasedev->name); > > /* We reach here with device state STOP or STOP_COPY only */ > @@ -662,6 +670,11 @@ static void vfio_save_state(QEMUFile *f, void *opaque) > Error *local_err = NULL; > int ret; > > + if (vfio_multifd_transfer_enabled(vbasedev)) { > + vfio_multifd_emit_dummy_eos(vbasedev, f); > + return; > + } > + > ret = vfio_save_device_config_state(f, opaque, &local_err); > if (ret) { > error_prepend(&local_err, > @@ -819,6 +832,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { > .is_active_iterate = vfio_is_active_iterate, > .save_live_iterate = vfio_save_iterate, > .save_live_complete_precopy = vfio_save_complete_precopy, > + .save_live_complete_precopy_thread = vfio_save_complete_precopy_thread, > .save_state = vfio_save_state, > .load_setup = vfio_load_setup, > .load_cleanup = vfio_load_cleanup, > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 418b378ebd29..039979bdd98f 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -168,6 +168,8 @@ vfio_save_block_precopy_empty_hit(const char *name) " (%s)" > vfio_save_cleanup(const char *name) " (%s)" > vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" > vfio_save_complete_precopy_start(const char *name) " (%s)" > +vfio_save_complete_precopy_thread_start(const char *name, const char *idstr, uint32_t instance_id) " (%s) idstr %s instance %"PRIu32 > +vfio_save_complete_precopy_thread_end(const char *name, int ret) " (%s) ret %d" > vfio_save_device_config_state(const char *name) " (%s)" > vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size %"PRIu64" precopy dirty size %"PRIu64 > vfio_save_iterate_start(const char *name) " (%s)" > diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h > index ce2bdea8a2c2..ba851917f9fc 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val); > bool vfio_device_state_is_running(VFIODevice *vbasedev); > bool vfio_device_state_is_precopy(VFIODevice *vbasedev); > > +#ifdef CONFIG_LINUX > +int vfio_migration_set_state(VFIODevice *vbasedev, > + enum vfio_device_mig_state new_state, > + enum vfio_device_mig_state recover_state, > + Error **errp); please move below with the other declarations under #ifdef CONFIG_LINUX. > +#endif > + > +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp); > int vfio_load_device_config_state(QEMUFile *f, void *opaque); > > #ifdef CONFIG_LINUX > Thanks, C.
On 26.02.2025 17:43, Cédric Le Goater wrote: > On 2/19/25 21:34, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> Implement the multifd device state transfer via additional per-device >> thread inside save_live_complete_precopy_thread handler. >> >> Switch between doing the data transfer in the new handler and doing it >> in the old save_state handler depending on the >> x-migration-multifd-transfer device property value. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ >> hw/vfio/migration-multifd.h | 5 ++ >> hw/vfio/migration.c | 26 +++++-- >> hw/vfio/trace-events | 2 + >> include/hw/vfio/vfio-common.h | 8 ++ >> 5 files changed, 174 insertions(+), 6 deletions(-) >> >> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >> index 7200f6f1c2a2..0cfa9d31732a 100644 >> --- a/hw/vfio/migration-multifd.c >> +++ b/hw/vfio/migration-multifd.c >> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp) >> return true; >> } >> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) >> +{ >> + assert(vfio_multifd_transfer_enabled(vbasedev)); >> + >> + /* >> + * Emit dummy NOP data on the main migration channel since the actual >> + * device state transfer is done via multifd channels. >> + */ >> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); >> +} >> + >> +static bool >> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, >> + char *idstr, >> + uint32_t instance_id, >> + uint32_t idx, >> + Error **errp) >> +{ >> + g_autoptr(QIOChannelBuffer) bioc = NULL; >> + g_autoptr(QEMUFile) f = NULL; >> + int ret; >> + g_autofree VFIODeviceStatePacket *packet = NULL; >> + size_t packet_len; >> + >> + bioc = qio_channel_buffer_new(0); >> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); >> + >> + f = qemu_file_new_output(QIO_CHANNEL(bioc)); >> + >> + if (vfio_save_device_config_state(f, vbasedev, errp)) { >> + return false; >> + } >> + >> + ret = qemu_fflush(f); >> + if (ret) { >> + error_setg(errp, "save config state flush failed: %d", ret); >> + return false; >> + } >> + >> + packet_len = sizeof(*packet) + bioc->usage; >> + packet = g_malloc0(packet_len); >> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >> + packet->idx = idx; >> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; >> + memcpy(&packet->data, bioc->data, bioc->usage); >> + >> + if (!multifd_queue_device_state(idstr, instance_id, >> + (char *)packet, packet_len)) { >> + error_setg(errp, "multifd config data queuing failed"); >> + return false; >> + } >> + >> + vfio_add_bytes_transferred(packet_len); >> + >> + return true; >> +} >> + >> +/* >> + * This thread is spawned by the migration core directly via >> + * .save_live_complete_precopy_thread SaveVMHandler. >> + * >> + * It exits after either: >> + * * completing saving the remaining device state and device config, OR: >> + * * encountering some error while doing the above, OR: >> + * * being forcefully aborted by the migration core by >> + * multifd_device_state_save_thread_should_exit() returning true. >> + */ >> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, >> + Error **errp) > > In qemu_savevm_state_complete_precopy_iterable(), this handler is > called : > > .... > if (multifd_device_state) { > QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { > SaveLiveCompletePrecopyThreadHandler hdlr; > > if (!se->ops || (in_postcopy && se->ops->has_postcopy && > se->ops->has_postcopy(se->opaque)) || > !se->ops->save_live_complete_precopy_thread) { > continue; > } > > hdlr = se->ops->save_live_complete_precopy_thread; > multifd_spawn_device_state_save_thread(hdlr, > se->idstr, se->instance_id, > se->opaque); > } > } > > > I suggest naming it : vfio_multifd_save_complete_precopy_thread() Renamed accordingly. >> +{ >> + VFIODevice *vbasedev = d->handler_opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + bool ret; >> + g_autofree VFIODeviceStatePacket *packet = NULL; >> + uint32_t idx; >> + >> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ >> + return true; >> + } >> + >> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, >> + d->idstr, d->instance_id); >> + >> + /* We reach here with device state STOP or STOP_COPY only */ >> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, >> + VFIO_DEVICE_STATE_STOP, errp)) { >> + ret = false; > > These "ret = false" can be avoided if the variable is set at the > top of the function. I inverted the "ret" logic here as in vfio_load_bufs_thread() to make it false by default and set to true just before early exit label. >> + goto ret_finish; > > > goto thread_exit ? As I asked in one of the previous patches, do this comment mean that your want to rename ret_finish label to thread_exit? >> + } >> + >> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); >> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >> + >> + for (idx = 0; ; idx++) { >> + ssize_t data_size; >> + size_t packet_size; >> + >> + if (multifd_device_state_save_thread_should_exit()) { >> + error_setg(errp, "operation cancelled"); >> + ret = false; >> + goto ret_finish; >> + }> + >> + data_size = read(migration->data_fd, &packet->data, >> + migration->data_buffer_size); >> + if (data_size < 0) { >> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d", >> + idx, errno); >> + ret = false; >> + goto ret_finish; >> + } else if (data_size == 0) { >> + break; >> + } >> + >> + packet->idx = idx; >> + packet_size = sizeof(*packet) + data_size; >> + >> + if (!multifd_queue_device_state(d->idstr, d->instance_id, >> + (char *)packet, packet_size)) { >> + error_setg(errp, "multifd data queuing failed"); >> + ret = false; >> + goto ret_finish; >> + } >> + >> + vfio_add_bytes_transferred(packet_size); >> + } >> + >> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev, >> + d->idstr, >> + d->instance_id, >> + idx, errp); >> + >> +ret_finish: >> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret); >> + >> + return ret; >> +} >> + >> int vfio_multifd_switchover_start(VFIODevice *vbasedev) >> { >> VFIOMigration *migration = vbasedev->migration; >> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h >> index 09cbb437d9d1..79780d7b5392 100644 >> --- a/hw/vfio/migration-multifd.h >> +++ b/hw/vfio/migration-multifd.h >> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); >> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, >> Error **errp); >> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); >> + >> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, >> + Error **errp); >> + >> int vfio_multifd_switchover_start(VFIODevice *vbasedev); >> #endif >> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >> index b962309f7c27..69dcf2dac2fa 100644 >> --- a/hw/vfio/migration.c >> +++ b/hw/vfio/migration.c (..) >> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, >> return ret; >> } >> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, >> - Error **errp) >> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) >> { >> VFIODevice *vbasedev = opaque; >> int ret; >> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) >> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; >> int ret; >> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) { >> + return -EINVAL; >> + } >> + > > please move to another patch with the similar change of patch 25. > This patch is about the send/save side while patch 25 is called "*receive* init/cleanup". So adding save setup to patch called "receive init" wouldn't be consistent with that patch subject. >> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); >> vfio_query_stop_copy_size(vbasedev, &stop_copy_size); (..) >> index ce2bdea8a2c2..ba851917f9fc 100644 >> --- a/include/hw/vfio/vfio-common.h >> +++ b/include/hw/vfio/vfio-common.h >> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val); >> bool vfio_device_state_is_running(VFIODevice *vbasedev); >> bool vfio_device_state_is_precopy(VFIODevice *vbasedev); >> +#ifdef CONFIG_LINUX >> +int vfio_migration_set_state(VFIODevice *vbasedev, >> + enum vfio_device_mig_state new_state, >> + enum vfio_device_mig_state recover_state, >> + Error **errp); > > please move below with the other declarations under #ifdef CONFIG_LINUX. > >> +#endif >> + >> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp); >> int vfio_load_device_config_state(QEMUFile *f, void *opaque); >> #ifdef CONFIG_LINUX >> > Done. > > Thanks, > > C. Thanks, Maciej
On 2/26/25 22:05, Maciej S. Szmigiero wrote: > On 26.02.2025 17:43, Cédric Le Goater wrote: >> On 2/19/25 21:34, Maciej S. Szmigiero wrote: >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> Implement the multifd device state transfer via additional per-device >>> thread inside save_live_complete_precopy_thread handler. >>> >>> Switch between doing the data transfer in the new handler and doing it >>> in the old save_state handler depending on the >>> x-migration-multifd-transfer device property value. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ >>> hw/vfio/migration-multifd.h | 5 ++ >>> hw/vfio/migration.c | 26 +++++-- >>> hw/vfio/trace-events | 2 + >>> include/hw/vfio/vfio-common.h | 8 ++ >>> 5 files changed, 174 insertions(+), 6 deletions(-) >>> >>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>> index 7200f6f1c2a2..0cfa9d31732a 100644 >>> --- a/hw/vfio/migration-multifd.c >>> +++ b/hw/vfio/migration-multifd.c >>> @@ -476,6 +476,145 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp) >>> return true; >>> } >>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f) >>> +{ >>> + assert(vfio_multifd_transfer_enabled(vbasedev)); >>> + >>> + /* >>> + * Emit dummy NOP data on the main migration channel since the actual >>> + * device state transfer is done via multifd channels. >>> + */ >>> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); >>> +} >>> + >>> +static bool >>> +vfio_save_complete_precopy_thread_config_state(VFIODevice *vbasedev, >>> + char *idstr, >>> + uint32_t instance_id, >>> + uint32_t idx, >>> + Error **errp) >>> +{ >>> + g_autoptr(QIOChannelBuffer) bioc = NULL; >>> + g_autoptr(QEMUFile) f = NULL; >>> + int ret; >>> + g_autofree VFIODeviceStatePacket *packet = NULL; >>> + size_t packet_len; >>> + >>> + bioc = qio_channel_buffer_new(0); >>> + qio_channel_set_name(QIO_CHANNEL(bioc), "vfio-device-config-save"); >>> + >>> + f = qemu_file_new_output(QIO_CHANNEL(bioc)); >>> + >>> + if (vfio_save_device_config_state(f, vbasedev, errp)) { >>> + return false; >>> + } >>> + >>> + ret = qemu_fflush(f); >>> + if (ret) { >>> + error_setg(errp, "save config state flush failed: %d", ret); >>> + return false; >>> + } >>> + >>> + packet_len = sizeof(*packet) + bioc->usage; >>> + packet = g_malloc0(packet_len); >>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >>> + packet->idx = idx; >>> + packet->flags = VFIO_DEVICE_STATE_CONFIG_STATE; >>> + memcpy(&packet->data, bioc->data, bioc->usage); >>> + >>> + if (!multifd_queue_device_state(idstr, instance_id, >>> + (char *)packet, packet_len)) { >>> + error_setg(errp, "multifd config data queuing failed"); >>> + return false; >>> + } >>> + >>> + vfio_add_bytes_transferred(packet_len); >>> + >>> + return true; >>> +} >>> + >>> +/* >>> + * This thread is spawned by the migration core directly via >>> + * .save_live_complete_precopy_thread SaveVMHandler. >>> + * >>> + * It exits after either: >>> + * * completing saving the remaining device state and device config, OR: >>> + * * encountering some error while doing the above, OR: >>> + * * being forcefully aborted by the migration core by >>> + * multifd_device_state_save_thread_should_exit() returning true. >>> + */ >>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, >>> + Error **errp) >> >> In qemu_savevm_state_complete_precopy_iterable(), this handler is >> called : >> >> .... >> if (multifd_device_state) { >> QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { >> SaveLiveCompletePrecopyThreadHandler hdlr; >> >> if (!se->ops || (in_postcopy && se->ops->has_postcopy && >> se->ops->has_postcopy(se->opaque)) || >> !se->ops->save_live_complete_precopy_thread) { >> continue; >> } >> >> hdlr = se->ops->save_live_complete_precopy_thread; >> multifd_spawn_device_state_save_thread(hdlr, >> se->idstr, se->instance_id, >> se->opaque); >> } >> } >> >> >> I suggest naming it : vfio_multifd_save_complete_precopy_thread() > > Renamed accordingly. > >>> +{ >>> + VFIODevice *vbasedev = d->handler_opaque; >>> + VFIOMigration *migration = vbasedev->migration; >>> + bool ret; >>> + g_autofree VFIODeviceStatePacket *packet = NULL; >>> + uint32_t idx; >>> + >>> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ >>> + return true; >>> + } >>> + >>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, >>> + d->idstr, d->instance_id); >>> + >>> + /* We reach here with device state STOP or STOP_COPY only */ >>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, >>> + VFIO_DEVICE_STATE_STOP, errp)) { >>> + ret = false; >> >> These "ret = false" can be avoided if the variable is set at the >> top of the function. > > I inverted the "ret" logic here as in vfio_load_bufs_thread() > to make it false by default and set to true just before early > exit label. ok. Let's see what it looks like in v6. >>> + goto ret_finish; >> >> >> goto thread_exit ? > > As I asked in one of the previous patches, > do this comment mean that your want to rename ret_finish label to > thread_exit? Yes. I find label 'thread_exit' more meaning full. This is minor since there is only one 'exit' label. > >>> + } >>> + >>> + packet = g_malloc0(sizeof(*packet) + migration->data_buffer_size); >>> + packet->version = VFIO_DEVICE_STATE_PACKET_VER_CURRENT; >>> + >>> + for (idx = 0; ; idx++) { >>> + ssize_t data_size; >>> + size_t packet_size; >>> + >>> + if (multifd_device_state_save_thread_should_exit()) { >>> + error_setg(errp, "operation cancelled"); >>> + ret = false; >>> + goto ret_finish; >>> + }> + >>> + data_size = read(migration->data_fd, &packet->data, >>> + migration->data_buffer_size); >>> + if (data_size < 0) { >>> + error_setg(errp, "reading state buffer %" PRIu32 " failed: %d", >>> + idx, errno); >>> + ret = false; >>> + goto ret_finish; >>> + } else if (data_size == 0) { >>> + break; >>> + } >>> + >>> + packet->idx = idx; >>> + packet_size = sizeof(*packet) + data_size; >>> + >>> + if (!multifd_queue_device_state(d->idstr, d->instance_id, >>> + (char *)packet, packet_size)) { >>> + error_setg(errp, "multifd data queuing failed"); >>> + ret = false; >>> + goto ret_finish; >>> + } >>> + >>> + vfio_add_bytes_transferred(packet_size); >>> + } >>> + >>> + ret = vfio_save_complete_precopy_thread_config_state(vbasedev, >>> + d->idstr, >>> + d->instance_id, >>> + idx, errp); >>> + >>> +ret_finish: >>> + trace_vfio_save_complete_precopy_thread_end(vbasedev->name, ret); >>> + >>> + return ret; >>> +} >>> + >>> int vfio_multifd_switchover_start(VFIODevice *vbasedev) >>> { >>> VFIOMigration *migration = vbasedev->migration; >>> diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h >>> index 09cbb437d9d1..79780d7b5392 100644 >>> --- a/hw/vfio/migration-multifd.h >>> +++ b/hw/vfio/migration-multifd.h >>> @@ -25,6 +25,11 @@ bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); >>> bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, >>> Error **errp); >>> +void vfio_multifd_emit_dummy_eos(VFIODevice *vbasedev, QEMUFile *f); >>> + >>> +bool vfio_save_complete_precopy_thread(SaveLiveCompletePrecopyThreadData *d, >>> + Error **errp); >>> + >>> int vfio_multifd_switchover_start(VFIODevice *vbasedev); >>> #endif >>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>> index b962309f7c27..69dcf2dac2fa 100644 >>> --- a/hw/vfio/migration.c >>> +++ b/hw/vfio/migration.c > (..) >>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, >>> return ret; >>> } >>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, >>> - Error **errp) >>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) >>> { >>> VFIODevice *vbasedev = opaque; >>> int ret; >>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) >>> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; >>> int ret; >>> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) { >>> + return -EINVAL; >>> + } >>> + >> >> please move to another patch with the similar change of patch 25. >> > > This patch is about the send/save side while patch 25 > is called "*receive* init/cleanup". > > So adding save setup to patch called "receive init" wouldn't be > consistent with that patch subject. In that case, could please add an extra patch checking for the consistency of the settings ? Thanks, C. > >>> qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE); >>> vfio_query_stop_copy_size(vbasedev, &stop_copy_size); > > (..) >>> index ce2bdea8a2c2..ba851917f9fc 100644 >>> --- a/include/hw/vfio/vfio-common.h >>> +++ b/include/hw/vfio/vfio-common.h >>> @@ -298,6 +298,14 @@ void vfio_add_bytes_transferred(unsigned long val); >>> bool vfio_device_state_is_running(VFIODevice *vbasedev); >>> bool vfio_device_state_is_precopy(VFIODevice *vbasedev); >>> +#ifdef CONFIG_LINUX >>> +int vfio_migration_set_state(VFIODevice *vbasedev, >>> + enum vfio_device_mig_state new_state, >>> + enum vfio_device_mig_state recover_state, >>> + Error **errp); >> >> please move below with the other declarations under #ifdef CONFIG_LINUX. >> >>> +#endif >>> + >>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp); >>> int vfio_load_device_config_state(QEMUFile *f, void *opaque); >>> #ifdef CONFIG_LINUX >>> >> > > Done. > >> >> Thanks, >> >> C. > > Thanks, > Maciej >
On 28.02.2025 10:13, Cédric Le Goater wrote: > On 2/26/25 22:05, Maciej S. Szmigiero wrote: >> On 26.02.2025 17:43, Cédric Le Goater wrote: >>> On 2/19/25 21:34, Maciej S. Szmigiero wrote: >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> Implement the multifd device state transfer via additional per-device >>>> thread inside save_live_complete_precopy_thread handler. >>>> >>>> Switch between doing the data transfer in the new handler and doing it >>>> in the old save_state handler depending on the >>>> x-migration-multifd-transfer device property value. >>>> >>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>>> --- >>>> hw/vfio/migration-multifd.c | 139 ++++++++++++++++++++++++++++++++++ >>>> hw/vfio/migration-multifd.h | 5 ++ >>>> hw/vfio/migration.c | 26 +++++-- >>>> hw/vfio/trace-events | 2 + >>>> include/hw/vfio/vfio-common.h | 8 ++ >>>> 5 files changed, 174 insertions(+), 6 deletions(-) >>>> >>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>>> index 7200f6f1c2a2..0cfa9d31732a 100644 >>>> --- a/hw/vfio/migration-multifd.c >>>> +++ b/hw/vfio/migration-multifd.c (..) >>>> +{ >>>> + VFIODevice *vbasedev = d->handler_opaque; >>>> + VFIOMigration *migration = vbasedev->migration; >>>> + bool ret; >>>> + g_autofree VFIODeviceStatePacket *packet = NULL; >>>> + uint32_t idx; >>>> + >>>> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >>>> + /* Nothing to do, vfio_save_complete_precopy() does the transfer. */ >>>> + return true; >>>> + } >>>> + >>>> + trace_vfio_save_complete_precopy_thread_start(vbasedev->name, >>>> + d->idstr, d->instance_id); >>>> + >>>> + /* We reach here with device state STOP or STOP_COPY only */ >>>> + if (vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, >>>> + VFIO_DEVICE_STATE_STOP, errp)) { >>>> + ret = false; >>> >>> These "ret = false" can be avoided if the variable is set at the >>> top of the function. >> >> I inverted the "ret" logic here as in vfio_load_bufs_thread() >> to make it false by default and set to true just before early >> exit label. > > ok. Let's see what it looks like in v6. > >>>> + goto ret_finish; >>> >>> >>> goto thread_exit ? >> >> As I asked in one of the previous patches, >> do this comment mean that your want to rename ret_finish label to >> thread_exit? > > Yes. I find label 'thread_exit' more meaning full. This is minor since > there is only one 'exit' label. Renamed ret_finish to thread_exit then. > >> (..) >>>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c >>>> index b962309f7c27..69dcf2dac2fa 100644 >>>> --- a/hw/vfio/migration.c >>>> +++ b/hw/vfio/migration.c >> (..) >>>> @@ -238,8 +238,7 @@ static int vfio_load_buffer(QEMUFile *f, VFIODevice *vbasedev, >>>> return ret; >>>> } >>>> -static int vfio_save_device_config_state(QEMUFile *f, void *opaque, >>>> - Error **errp) >>>> +int vfio_save_device_config_state(QEMUFile *f, void *opaque, Error **errp) >>>> { >>>> VFIODevice *vbasedev = opaque; >>>> int ret; >>>> @@ -453,6 +452,10 @@ static int vfio_save_setup(QEMUFile *f, void *opaque, Error **errp) >>>> uint64_t stop_copy_size = VFIO_MIG_DEFAULT_DATA_BUFFER_SIZE; >>>> int ret; >>>> + if (!vfio_multifd_transfer_setup(vbasedev, errp)) { >>>> + return -EINVAL; >>>> + } >>>> + >>> >>> please move to another patch with the similar change of patch 25. >>> >> >> This patch is about the send/save side while patch 25 >> is called "*receive* init/cleanup". >> >> So adding save setup to patch called "receive init" wouldn't be >> consistent with that patch subject. > > In that case, could please add an extra patch checking for the consistency > of the settings ? I split out wiring vfio_multifd_setup() and vfio_multifd_cleanup() into general VFIO load/save setup and cleanup methods from this patch and patch "Multifd device state transfer support - receive init/cleanup" into a brand new patch/commit. By the way, due to changes discussed over the last two days vfio_multifd_setup() (aka vfio_multifd_transfer_setup()) not only does consistency checking but also allocates VFIOMultifd: https://lore.kernel.org/qemu-devel/6546c3a4-bd81-42ea-88a2-b2f88ec2fbb3@maciej.szmigiero.name/ > > Thanks, > > C. > > > Thanks, Maciej
© 2016 - 2025 Red Hat, Inc.