From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
The multifd received data needs to be reassembled since device state
packets sent via different multifd channels can arrive out-of-order.
Therefore, each VFIO device state packet carries a header indicating its
position in the stream.
The raw device state data is saved into a VFIOStateBuffer for later
in-order loading into the device.
The last such VFIO device state packet should have
VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++
hw/vfio/migration-multifd.h | 3 ++
hw/vfio/migration.c | 1 +
hw/vfio/trace-events | 1 +
4 files changed, 108 insertions(+)
diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c
index c2defc0efef0..5d5ee1393674 100644
--- a/hw/vfio/migration-multifd.c
+++ b/hw/vfio/migration-multifd.c
@@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer {
} VFIOStateBuffer;
typedef struct VFIOMultifd {
+ VFIOStateBuffers load_bufs;
+ QemuCond load_bufs_buffer_ready_cond;
+ QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
+ uint32_t load_buf_idx;
+ uint32_t load_buf_idx_last;
} VFIOMultifd;
static void vfio_state_buffer_clear(gpointer data)
@@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx)
return &g_array_index(bufs->array, VFIOStateBuffer, idx);
}
+static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev,
+ VFIODeviceStatePacket *packet,
+ size_t packet_total_size,
+ Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ VFIOStateBuffer *lb;
+
+ vfio_state_buffers_assert_init(&multifd->load_bufs);
+ if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) {
+ vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1);
+ }
+
+ lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx);
+ if (lb->is_present) {
+ error_setg(errp, "state buffer %" PRIu32 " already filled",
+ packet->idx);
+ return false;
+ }
+
+ assert(packet->idx >= multifd->load_buf_idx);
+
+ lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet));
+ lb->len = packet_total_size - sizeof(*packet);
+ lb->is_present = true;
+
+ return true;
+}
+
+bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
+ Error **errp)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data;
+
+ /*
+ * Holding BQL here would violate the lock order and can cause
+ * a deadlock once we attempt to lock load_bufs_mutex below.
+ */
+ assert(!bql_locked());
+
+ if (!vfio_multifd_transfer_enabled(vbasedev)) {
+ error_setg(errp,
+ "got device state packet but not doing multifd transfer");
+ return false;
+ }
+
+ assert(multifd);
+
+ if (data_size < sizeof(*packet)) {
+ error_setg(errp, "packet too short at %zu (min is %zu)",
+ data_size, sizeof(*packet));
+ return false;
+ }
+
+ if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) {
+ error_setg(errp, "packet has unknown version %" PRIu32,
+ packet->version);
+ return false;
+ }
+
+ if (packet->idx == UINT32_MAX) {
+ error_setg(errp, "packet has too high idx");
+ return false;
+ }
+
+ trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx);
+
+ QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
+
+ /* config state packet should be the last one in the stream */
+ if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) {
+ multifd->load_buf_idx_last = packet->idx;
+ }
+
+ if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) {
+ return false;
+ }
+
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+
+ return true;
+}
+
VFIOMultifd *vfio_multifd_new(void)
{
VFIOMultifd *multifd = g_new(VFIOMultifd, 1);
+ vfio_state_buffers_init(&multifd->load_bufs);
+
+ qemu_mutex_init(&multifd->load_bufs_mutex);
+
+ multifd->load_buf_idx = 0;
+ multifd->load_buf_idx_last = UINT32_MAX;
+ qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+
return multifd;
}
void vfio_multifd_free(VFIOMultifd *multifd)
{
+ qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
+ qemu_mutex_destroy(&multifd->load_bufs_mutex);
+
g_free(multifd);
}
diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h
index 1eefba3b2eed..d5ab7d6f85f5 100644
--- a/hw/vfio/migration-multifd.h
+++ b/hw/vfio/migration-multifd.h
@@ -22,4 +22,7 @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev);
bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp);
+bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size,
+ Error **errp);
+
#endif
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 4311de763885..abaf4d08d4a9 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -806,6 +806,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
+ .load_state_buffer = vfio_load_state_buffer,
.switchover_ack_needed = vfio_switchover_ack_needed,
};
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 1bebe9877d88..042a3dc54a33 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -153,6 +153,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)"
vfio_load_device_config_state_end(const char *name) " (%s)"
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d"
+vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32
vfio_migration_realize(const char *name) " (%s)"
vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s"
vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
On 19/02/2025 22:34, Maciej S. Szmigiero wrote: > External email: Use caution opening links or attachments > > > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > The multifd received data needs to be reassembled since device state > packets sent via different multifd channels can arrive out-of-order. > > Therefore, each VFIO device state packet carries a header indicating its > position in the stream. > The raw device state data is saved into a VFIOStateBuffer for later > in-order loading into the device. > > The last such VFIO device state packet should have > VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ > hw/vfio/migration-multifd.h | 3 ++ > hw/vfio/migration.c | 1 + > hw/vfio/trace-events | 1 + > 4 files changed, 108 insertions(+) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index c2defc0efef0..5d5ee1393674 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { > } VFIOStateBuffer; > > typedef struct VFIOMultifd { > + VFIOStateBuffers load_bufs; > + QemuCond load_bufs_buffer_ready_cond; > + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ > + uint32_t load_buf_idx; > + uint32_t load_buf_idx_last; > } VFIOMultifd; > > static void vfio_state_buffer_clear(gpointer data) > @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) > return &g_array_index(bufs->array, VFIOStateBuffer, idx); > } > > +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > + VFIODeviceStatePacket *packet, > + size_t packet_total_size, > + Error **errp) > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIOStateBuffer *lb; > + > + vfio_state_buffers_assert_init(&multifd->load_bufs); > + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { > + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); > + } > + > + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); > + if (lb->is_present) { > + error_setg(errp, "state buffer %" PRIu32 " already filled", > + packet->idx); Let's add vbasedev->name to the error message so we know which device caused the error. > + return false; > + } > + > + assert(packet->idx >= multifd->load_buf_idx); > + > + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); > + lb->len = packet_total_size - sizeof(*packet); > + lb->is_present = true; > + > + return true; > +} > + > +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp) > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; > + > + /* > + * Holding BQL here would violate the lock order and can cause > + * a deadlock once we attempt to lock load_bufs_mutex below. > + */ > + assert(!bql_locked()); To be clearer, I'd move the assert down to be just above "QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);". > + > + if (!vfio_multifd_transfer_enabled(vbasedev)) { > + error_setg(errp, > + "got device state packet but not doing multifd transfer"); Let's add vbasedev->name to the error message so we know which device caused the error. > + return false; > + } > + > + assert(multifd); > + > + if (data_size < sizeof(*packet)) { > + error_setg(errp, "packet too short at %zu (min is %zu)", > + data_size, sizeof(*packet)); Ditto. > + return false; > + } > + > + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { > + error_setg(errp, "packet has unknown version %" PRIu32, > + packet->version); Ditto. > + return false; > + } > + > + if (packet->idx == UINT32_MAX) { > + error_setg(errp, "packet has too high idx"); Ditto. > + return false; > + } > + > + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); > + > + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); > + > + /* config state packet should be the last one in the stream */ > + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { > + multifd->load_buf_idx_last = packet->idx; > + } > + > + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) { > + return false; > + } > + > + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); > + > + return true; > +} > + > VFIOMultifd *vfio_multifd_new(void) > { > VFIOMultifd *multifd = g_new(VFIOMultifd, 1); > > + vfio_state_buffers_init(&multifd->load_bufs); > + > + qemu_mutex_init(&multifd->load_bufs_mutex); Nit: move qemu_mutex_init() just above qemu_cond_init()? Thanks. > + > + multifd->load_buf_idx = 0; > + multifd->load_buf_idx_last = UINT32_MAX; > + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); > + > return multifd; > } > > void vfio_multifd_free(VFIOMultifd *multifd) > { > + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); > + qemu_mutex_destroy(&multifd->load_bufs_mutex); > + > g_free(multifd); > } > > diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h > index 1eefba3b2eed..d5ab7d6f85f5 100644 > --- a/hw/vfio/migration-multifd.h > +++ b/hw/vfio/migration-multifd.h > @@ -22,4 +22,7 @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); > > bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); > > +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp); > + > #endif > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 4311de763885..abaf4d08d4a9 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -806,6 +806,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { > .load_setup = vfio_load_setup, > .load_cleanup = vfio_load_cleanup, > .load_state = vfio_load_state, > + .load_state_buffer = vfio_load_state_buffer, > .switchover_ack_needed = vfio_switchover_ack_needed, > }; > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 1bebe9877d88..042a3dc54a33 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -153,6 +153,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)" > vfio_load_device_config_state_end(const char *name) " (%s)" > vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 > vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" > +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 > vfio_migration_realize(const char *name) " (%s)" > vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" > vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s"
On 2.03.2025 14:12, Avihai Horon wrote: > > On 19/02/2025 22:34, Maciej S. Szmigiero wrote: >> External email: Use caution opening links or attachments >> >> >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> The multifd received data needs to be reassembled since device state >> packets sent via different multifd channels can arrive out-of-order. >> >> Therefore, each VFIO device state packet carries a header indicating its >> position in the stream. >> The raw device state data is saved into a VFIOStateBuffer for later >> in-order loading into the device. >> >> The last such VFIO device state packet should have >> VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ >> hw/vfio/migration-multifd.h | 3 ++ >> hw/vfio/migration.c | 1 + >> hw/vfio/trace-events | 1 + >> 4 files changed, 108 insertions(+) >> >> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >> index c2defc0efef0..5d5ee1393674 100644 >> --- a/hw/vfio/migration-multifd.c >> +++ b/hw/vfio/migration-multifd.c >> @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { >> } VFIOStateBuffer; >> >> typedef struct VFIOMultifd { >> + VFIOStateBuffers load_bufs; >> + QemuCond load_bufs_buffer_ready_cond; >> + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ >> + uint32_t load_buf_idx; >> + uint32_t load_buf_idx_last; >> } VFIOMultifd; >> >> static void vfio_state_buffer_clear(gpointer data) >> @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) >> return &g_array_index(bufs->array, VFIOStateBuffer, idx); >> } >> >> +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, >> + VFIODeviceStatePacket *packet, >> + size_t packet_total_size, >> + Error **errp) >> +{ >> + VFIOMigration *migration = vbasedev->migration; >> + VFIOMultifd *multifd = migration->multifd; >> + VFIOStateBuffer *lb; >> + >> + vfio_state_buffers_assert_init(&multifd->load_bufs); >> + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { >> + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); >> + } >> + >> + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); >> + if (lb->is_present) { >> + error_setg(errp, "state buffer %" PRIu32 " already filled", >> + packet->idx); > > Let's add vbasedev->name to the error message so we know which device caused the error. Done. >> + return false; >> + } >> + >> + assert(packet->idx >= multifd->load_buf_idx); >> + >> + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); >> + lb->len = packet_total_size - sizeof(*packet); >> + lb->is_present = true; >> + >> + return true; >> +} >> + >> +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, >> + Error **errp) >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + VFIOMultifd *multifd = migration->multifd; >> + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; >> + >> + /* >> + * Holding BQL here would violate the lock order and can cause >> + * a deadlock once we attempt to lock load_bufs_mutex below. >> + */ >> + assert(!bql_locked()); > > To be clearer, I'd move the assert down to be just above "QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);". Moved there. >> + >> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >> + error_setg(errp, >> + "got device state packet but not doing multifd transfer"); > > Let's add vbasedev->name to the error message so we know which device caused the error. Done. >> + return false; >> + } >> + >> + assert(multifd); >> + >> + if (data_size < sizeof(*packet)) { >> + error_setg(errp, "packet too short at %zu (min is %zu)", >> + data_size, sizeof(*packet)); > > Ditto. Done. >> + return false; >> + } >> + >> + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { >> + error_setg(errp, "packet has unknown version %" PRIu32, >> + packet->version); > > Ditto. Done. >> + return false; >> + } >> + >> + if (packet->idx == UINT32_MAX) { >> + error_setg(errp, "packet has too high idx"); > > Ditto. Done. >> + return false; >> + } >> + >> + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); >> + >> + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); >> + >> + /* config state packet should be the last one in the stream */ >> + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { >> + multifd->load_buf_idx_last = packet->idx; >> + } >> + >> + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) { >> + return false; >> + } >> + >> + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); >> + >> + return true; >> +} >> + >> VFIOMultifd *vfio_multifd_new(void) >> { >> VFIOMultifd *multifd = g_new(VFIOMultifd, 1); >> >> + vfio_state_buffers_init(&multifd->load_bufs); >> + >> + qemu_mutex_init(&multifd->load_bufs_mutex); > > Nit: move qemu_mutex_init() just above qemu_cond_init()? It's in a separate "block" because it is common for all 3 conditions in the ultimate form of code (and most of the later variables too), rather just this single condition. > Thanks. Thanks, Maciej >> + >> + multifd->load_buf_idx = 0; >> + multifd->load_buf_idx_last = UINT32_MAX; >> + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); >> + >> return multifd; >> } >>
On 2/19/25 21:34, Maciej S. Szmigiero wrote: > From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> > > The multifd received data needs to be reassembled since device state > packets sent via different multifd channels can arrive out-of-order. > > Therefore, each VFIO device state packet carries a header indicating its > position in the stream. > The raw device state data is saved into a VFIOStateBuffer for later > in-order loading into the device. > > The last such VFIO device state packet should have > VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. > > Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> > --- > hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ > hw/vfio/migration-multifd.h | 3 ++ > hw/vfio/migration.c | 1 + > hw/vfio/trace-events | 1 + > 4 files changed, 108 insertions(+) > > diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c > index c2defc0efef0..5d5ee1393674 100644 > --- a/hw/vfio/migration-multifd.c > +++ b/hw/vfio/migration-multifd.c > @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { > } VFIOStateBuffer; > > typedef struct VFIOMultifd { > + VFIOStateBuffers load_bufs; > + QemuCond load_bufs_buffer_ready_cond; > + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ > + uint32_t load_buf_idx; > + uint32_t load_buf_idx_last; > } VFIOMultifd; > > static void vfio_state_buffer_clear(gpointer data) > @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) > return &g_array_index(bufs->array, VFIOStateBuffer, idx); > } > this routine expects load_bufs_mutex to be locked ? May be say so. > +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, could you pass VFIOMultifd* instead ? > + VFIODeviceStatePacket *packet, > + size_t packet_total_size, > + Error **errp) > +{ > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIOStateBuffer *lb; > + > + vfio_state_buffers_assert_init(&multifd->load_bufs); > + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { > + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); > + } > + > + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); > + if (lb->is_present) { > + error_setg(errp, "state buffer %" PRIu32 " already filled", > + packet->idx); > + return false; > + } > + > + assert(packet->idx >= multifd->load_buf_idx); > + > + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); > + lb->len = packet_total_size - sizeof(*packet); > + lb->is_present = true; > + > + return true; > +} > + > +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp) AFAICS, the only users of the .load_state_buffer() handlers is multifd_device_state_recv(). Please rename to vfio_multifd_load_state_buffer(). > +{ > + VFIODevice *vbasedev = opaque; > + VFIOMigration *migration = vbasedev->migration; > + VFIOMultifd *multifd = migration->multifd; > + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; > + > + /* > + * Holding BQL here would violate the lock order and can cause > + * a deadlock once we attempt to lock load_bufs_mutex below. > + */ > + assert(!bql_locked()); > + > + if (!vfio_multifd_transfer_enabled(vbasedev)) { > + error_setg(errp, > + "got device state packet but not doing multifd transfer"); > + return false; > + } > + > + assert(multifd); > + > + if (data_size < sizeof(*packet)) { > + error_setg(errp, "packet too short at %zu (min is %zu)", > + data_size, sizeof(*packet)); > + return false; > + } > + > + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { > + error_setg(errp, "packet has unknown version %" PRIu32, > + packet->version); > + return false; > + } > + > + if (packet->idx == UINT32_MAX) { > + error_setg(errp, "packet has too high idx"); or "packet index is invalid" ? > + return false; > + } > + > + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); > + > + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); Using WITH_QEMU_LOCK_GUARD() would be cleaner I think. > + > + /* config state packet should be the last one in the stream */ > + if (packet->flags & VFIO_DEVICE_STATE_CONFIG_STATE) { > + multifd->load_buf_idx_last = packet->idx; > + } > + > + if (!vfio_load_state_buffer_insert(vbasedev, packet, data_size, errp)) { > + return false; > + } > + > + qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond); > + > + return true; > +} > + > VFIOMultifd *vfio_multifd_new(void) > { > VFIOMultifd *multifd = g_new(VFIOMultifd, 1); > > + vfio_state_buffers_init(&multifd->load_bufs); > + > + qemu_mutex_init(&multifd->load_bufs_mutex); > + > + multifd->load_buf_idx = 0; > + multifd->load_buf_idx_last = UINT32_MAX; > + qemu_cond_init(&multifd->load_bufs_buffer_ready_cond); > + > return multifd; > } > > void vfio_multifd_free(VFIOMultifd *multifd) > { > + qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond); > + qemu_mutex_destroy(&multifd->load_bufs_mutex); > + > g_free(multifd); > } > > diff --git a/hw/vfio/migration-multifd.h b/hw/vfio/migration-multifd.h > index 1eefba3b2eed..d5ab7d6f85f5 100644 > --- a/hw/vfio/migration-multifd.h > +++ b/hw/vfio/migration-multifd.h > @@ -22,4 +22,7 @@ bool vfio_multifd_transfer_enabled(VFIODevice *vbasedev); > > bool vfio_multifd_transfer_setup(VFIODevice *vbasedev, Error **errp); > > +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, > + Error **errp); > + > #endif > diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c > index 4311de763885..abaf4d08d4a9 100644 > --- a/hw/vfio/migration.c > +++ b/hw/vfio/migration.c > @@ -806,6 +806,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { > .load_setup = vfio_load_setup, > .load_cleanup = vfio_load_cleanup, > .load_state = vfio_load_state, > + .load_state_buffer = vfio_load_state_buffer, > .switchover_ack_needed = vfio_switchover_ack_needed,> }; > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index 1bebe9877d88..042a3dc54a33 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -153,6 +153,7 @@ vfio_load_device_config_state_start(const char *name) " (%s)" > vfio_load_device_config_state_end(const char *name) " (%s)" > vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 > vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size %"PRIu64" ret %d" > +vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s) idx %"PRIu32 > vfio_migration_realize(const char *name) " (%s)" > vfio_migration_set_device_state(const char *name, const char *state) " (%s) state %s" > vfio_migration_set_state(const char *name, const char *new_state, const char *recover_state) " (%s) new state %s, recover state %s" > Thanks, C.
On 26.02.2025 11:43, Cédric Le Goater wrote: > On 2/19/25 21:34, Maciej S. Szmigiero wrote: >> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >> >> The multifd received data needs to be reassembled since device state >> packets sent via different multifd channels can arrive out-of-order. >> >> Therefore, each VFIO device state packet carries a header indicating its >> position in the stream. >> The raw device state data is saved into a VFIOStateBuffer for later >> in-order loading into the device. >> >> The last such VFIO device state packet should have >> VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. >> >> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >> --- >> hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ >> hw/vfio/migration-multifd.h | 3 ++ >> hw/vfio/migration.c | 1 + >> hw/vfio/trace-events | 1 + >> 4 files changed, 108 insertions(+) >> >> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >> index c2defc0efef0..5d5ee1393674 100644 >> --- a/hw/vfio/migration-multifd.c >> +++ b/hw/vfio/migration-multifd.c >> @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { >> } VFIOStateBuffer; >> typedef struct VFIOMultifd { >> + VFIOStateBuffers load_bufs; >> + QemuCond load_bufs_buffer_ready_cond; >> + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ >> + uint32_t load_buf_idx; >> + uint32_t load_buf_idx_last; >> } VFIOMultifd; >> static void vfio_state_buffer_clear(gpointer data) >> @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) >> return &g_array_index(bufs->array, VFIOStateBuffer, idx); >> } > > this routine expects load_bufs_mutex to be locked ? May be say so. I guess the comment above pertains to the vfio_load_state_buffer_insert() below. Do you mean it should have a comment that it expects to be called under load_bufs_mutex? >> +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, > > could you pass VFIOMultifd* instead ? No, it needs vbasedev->migration_max_queued_buffers too (introduced in later patch). Also, most of VFIO routines (besides very small helpers/wrappers) take VFIODevice *. >> + VFIODeviceStatePacket *packet, >> + size_t packet_total_size, >> + Error **errp) >> +{ >> + VFIOMigration *migration = vbasedev->migration; >> + VFIOMultifd *multifd = migration->multifd; >> + VFIOStateBuffer *lb; >> + >> + vfio_state_buffers_assert_init(&multifd->load_bufs); >> + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { >> + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); >> + } >> + >> + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); >> + if (lb->is_present) { >> + error_setg(errp, "state buffer %" PRIu32 " already filled", >> + packet->idx); >> + return false; >> + } >> + >> + assert(packet->idx >= multifd->load_buf_idx); >> + >> + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); >> + lb->len = packet_total_size - sizeof(*packet); >> + lb->is_present = true; >> + >> + return true; >> +} >> + >> +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, >> + Error **errp) > > > AFAICS, the only users of the .load_state_buffer() handlers is > multifd_device_state_recv(). > > Please rename to vfio_multifd_load_state_buffer(). Renamed it accordingly. >> +{ >> + VFIODevice *vbasedev = opaque; >> + VFIOMigration *migration = vbasedev->migration; >> + VFIOMultifd *multifd = migration->multifd; >> + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; >> + >> + /* >> + * Holding BQL here would violate the lock order and can cause >> + * a deadlock once we attempt to lock load_bufs_mutex below. >> + */ >> + assert(!bql_locked()); >> + >> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >> + error_setg(errp, >> + "got device state packet but not doing multifd transfer"); >> + return false; >> + } >> + >> + assert(multifd); >> + >> + if (data_size < sizeof(*packet)) { >> + error_setg(errp, "packet too short at %zu (min is %zu)", >> + data_size, sizeof(*packet)); >> + return false; >> + } >> + >> + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { >> + error_setg(errp, "packet has unknown version %" PRIu32, >> + packet->version); >> + return false; >> + } >> + >> + if (packet->idx == UINT32_MAX) { >> + error_setg(errp, "packet has too high idx"); > > or "packet index is invalid" ? Changed the error message. >> + return false; >> + } >> + >> + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); >> + >> + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); > > Using WITH_QEMU_LOCK_GUARD() would be cleaner I think. Changed into a WITH_QEMU_LOCK_GUARD() block. > > > Thanks, > > C. Thanks, Maciej
On 2/26/25 22:04, Maciej S. Szmigiero wrote: > On 26.02.2025 11:43, Cédric Le Goater wrote: >> On 2/19/25 21:34, Maciej S. Szmigiero wrote: >>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>> >>> The multifd received data needs to be reassembled since device state >>> packets sent via different multifd channels can arrive out-of-order. >>> >>> Therefore, each VFIO device state packet carries a header indicating its >>> position in the stream. >>> The raw device state data is saved into a VFIOStateBuffer for later >>> in-order loading into the device. >>> >>> The last such VFIO device state packet should have >>> VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. >>> >>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>> --- >>> hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ >>> hw/vfio/migration-multifd.h | 3 ++ >>> hw/vfio/migration.c | 1 + >>> hw/vfio/trace-events | 1 + >>> 4 files changed, 108 insertions(+) >>> >>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>> index c2defc0efef0..5d5ee1393674 100644 >>> --- a/hw/vfio/migration-multifd.c >>> +++ b/hw/vfio/migration-multifd.c >>> @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { >>> } VFIOStateBuffer; >>> typedef struct VFIOMultifd { >>> + VFIOStateBuffers load_bufs; >>> + QemuCond load_bufs_buffer_ready_cond; >>> + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ >>> + uint32_t load_buf_idx; >>> + uint32_t load_buf_idx_last; >>> } VFIOMultifd; >>> static void vfio_state_buffer_clear(gpointer data) >>> @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) >>> return &g_array_index(bufs->array, VFIOStateBuffer, idx); >>> } >> >> this routine expects load_bufs_mutex to be locked ? May be say so. > > I guess the comment above pertains to the vfio_load_state_buffer_insert() > below. > > Do you mean it should have a comment that it expects to be called > under load_bufs_mutex? Just a one liner like : /* called with load_bufs_mutex locked */ ? It's good to have for the future generations. > >>> +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, >> >> could you pass VFIOMultifd* instead ? > > No, it needs vbasedev->migration_max_queued_buffers too (introduced > in later patch). > > Also, most of VFIO routines (besides very small helpers/wrappers) > take VFIODevice *. OK. It's minor but I prefer when parameters are limited to the minimum. Having 'VFIODevice *' opens doors to a lot of state. Thanks, C. > >>> + VFIODeviceStatePacket *packet, >>> + size_t packet_total_size, >>> + Error **errp) >>> +{ >>> + VFIOMigration *migration = vbasedev->migration; >>> + VFIOMultifd *multifd = migration->multifd; >>> + VFIOStateBuffer *lb; >>> + >>> + vfio_state_buffers_assert_init(&multifd->load_bufs); >>> + if (packet->idx >= vfio_state_buffers_size_get(&multifd->load_bufs)) { >>> + vfio_state_buffers_size_set(&multifd->load_bufs, packet->idx + 1); >>> + } >>> + >>> + lb = vfio_state_buffers_at(&multifd->load_bufs, packet->idx); >>> + if (lb->is_present) { >>> + error_setg(errp, "state buffer %" PRIu32 " already filled", >>> + packet->idx); >>> + return false; >>> + } >>> + >>> + assert(packet->idx >= multifd->load_buf_idx); >>> + >>> + lb->data = g_memdup2(&packet->data, packet_total_size - sizeof(*packet)); >>> + lb->len = packet_total_size - sizeof(*packet); >>> + lb->is_present = true; >>> + >>> + return true; >>> +} >>> + >>> +bool vfio_load_state_buffer(void *opaque, char *data, size_t data_size, >>> + Error **errp) >> >> >> AFAICS, the only users of the .load_state_buffer() handlers is >> multifd_device_state_recv(). >> >> Please rename to vfio_multifd_load_state_buffer(). > > Renamed it accordingly. > >>> +{ >>> + VFIODevice *vbasedev = opaque; >>> + VFIOMigration *migration = vbasedev->migration; >>> + VFIOMultifd *multifd = migration->multifd; >>> + VFIODeviceStatePacket *packet = (VFIODeviceStatePacket *)data; >>> + >>> + /* >>> + * Holding BQL here would violate the lock order and can cause >>> + * a deadlock once we attempt to lock load_bufs_mutex below. >>> + */ >>> + assert(!bql_locked()); >>> + >>> + if (!vfio_multifd_transfer_enabled(vbasedev)) { >>> + error_setg(errp, >>> + "got device state packet but not doing multifd transfer"); >>> + return false; >>> + } >>> + >>> + assert(multifd); >>> + >>> + if (data_size < sizeof(*packet)) { >>> + error_setg(errp, "packet too short at %zu (min is %zu)", >>> + data_size, sizeof(*packet)); >>> + return false; >>> + } >>> + >>> + if (packet->version != VFIO_DEVICE_STATE_PACKET_VER_CURRENT) { >>> + error_setg(errp, "packet has unknown version %" PRIu32, >>> + packet->version); >>> + return false; >>> + } >>> + >>> + if (packet->idx == UINT32_MAX) { >>> + error_setg(errp, "packet has too high idx"); >> >> or "packet index is invalid" ? > > Changed the error message. > >>> + return false; >>> + } >>> + >>> + trace_vfio_load_state_device_buffer_incoming(vbasedev->name, packet->idx); >>> + >>> + QEMU_LOCK_GUARD(&multifd->load_bufs_mutex); >> >> Using WITH_QEMU_LOCK_GUARD() would be cleaner I think. > > Changed into a WITH_QEMU_LOCK_GUARD() block. > >> >> >> Thanks, >> >> C. > > Thanks, > Maciej >
On 28.02.2025 09:09, Cédric Le Goater wrote: > On 2/26/25 22:04, Maciej S. Szmigiero wrote: >> On 26.02.2025 11:43, Cédric Le Goater wrote: >>> On 2/19/25 21:34, Maciej S. Szmigiero wrote: >>>> From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com> >>>> >>>> The multifd received data needs to be reassembled since device state >>>> packets sent via different multifd channels can arrive out-of-order. >>>> >>>> Therefore, each VFIO device state packet carries a header indicating its >>>> position in the stream. >>>> The raw device state data is saved into a VFIOStateBuffer for later >>>> in-order loading into the device. >>>> >>>> The last such VFIO device state packet should have >>>> VFIO_DEVICE_STATE_CONFIG_STATE flag set and carry the device config state. >>>> >>>> Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com> >>>> --- >>>> hw/vfio/migration-multifd.c | 103 ++++++++++++++++++++++++++++++++++++ >>>> hw/vfio/migration-multifd.h | 3 ++ >>>> hw/vfio/migration.c | 1 + >>>> hw/vfio/trace-events | 1 + >>>> 4 files changed, 108 insertions(+) >>>> >>>> diff --git a/hw/vfio/migration-multifd.c b/hw/vfio/migration-multifd.c >>>> index c2defc0efef0..5d5ee1393674 100644 >>>> --- a/hw/vfio/migration-multifd.c >>>> +++ b/hw/vfio/migration-multifd.c >>>> @@ -42,6 +42,11 @@ typedef struct VFIOStateBuffer { >>>> } VFIOStateBuffer; >>>> typedef struct VFIOMultifd { >>>> + VFIOStateBuffers load_bufs; >>>> + QemuCond load_bufs_buffer_ready_cond; >>>> + QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */ >>>> + uint32_t load_buf_idx; >>>> + uint32_t load_buf_idx_last; >>>> } VFIOMultifd; >>>> static void vfio_state_buffer_clear(gpointer data) >>>> @@ -87,15 +92,113 @@ static VFIOStateBuffer *vfio_state_buffers_at(VFIOStateBuffers *bufs, guint idx) >>>> return &g_array_index(bufs->array, VFIOStateBuffer, idx); >>>> } >>> >>> this routine expects load_bufs_mutex to be locked ? May be say so. >> >> I guess the comment above pertains to the vfio_load_state_buffer_insert() >> below. >> >> Do you mean it should have a comment that it expects to be called >> under load_bufs_mutex? > > Just a one liner like : > > /* called with load_bufs_mutex locked */ > > ? > > It's good to have for the future generations. Okay, done. >> >>>> +static bool vfio_load_state_buffer_insert(VFIODevice *vbasedev, >>> >>> could you pass VFIOMultifd* instead ? >> >> No, it needs vbasedev->migration_max_queued_buffers too (introduced >> in later patch). >> > Also, most of VFIO routines (besides very small helpers/wrappers) >> take VFIODevice *. > > OK. It's minor but I prefer when parameters are limited to the minimum. > Having 'VFIODevice *' opens doors to a lot of state. > > > Thanks, > > C. > Thanks. Maciej
© 2016 - 2025 Red Hat, Inc.