During _RESUMING device state:
- If Vendor driver defines mappable region, mmap migration region.
- Load config state.
- For data packet, till VFIO_MIG_FLAG_END_OF_STATE is not reached
- read data_size from packet, read buffer of data_size
- read data_offset from where QEMU should write data.
if region is mmaped, write data of data_size to mmaped region.
- write data_size.
In case of mmapped region, write to data_size indicates kernel
driver that data is written in staging buffer.
- if region is trapped, pwrite() data of data_size from data_offset.
- Repeat above until VFIO_MIG_FLAG_END_OF_STATE.
- Unmap migration region.
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
hw/vfio/migration.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 153 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 0a2f30872316..e4895f91761d 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -212,6 +212,22 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
return qemu_file_get_error(f);
}
+static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+
+ if (vbasedev->type == VFIO_DEVICE_TYPE_PCI) {
+ vfio_pci_load_config(vbasedev, f);
+ }
+
+ if (qemu_get_be64(f) != VFIO_MIG_FLAG_END_OF_STATE) {
+ error_report("Wrong end of block while loading device config space");
+ return -EINVAL;
+ }
+
+ return qemu_file_get_error(f);
+}
+
/* ---------------------------------------------------------------------- */
static int vfio_save_setup(QEMUFile *f, void *opaque)
@@ -372,12 +388,149 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
return ret;
}
+static int vfio_load_setup(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret = 0;
+
+ if (migration->region.buffer.mmaps) {
+ ret = vfio_region_mmap(&migration->region.buffer);
+ if (ret) {
+ error_report("Failed to mmap VFIO migration region %d: %s",
+ migration->region.index, strerror(-ret));
+ return ret;
+ }
+ }
+
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING);
+ if (ret) {
+ error_report("Failed to set state RESUMING");
+ }
+ return ret;
+}
+
+static int vfio_load_cleanup(void *opaque)
+{
+ vfio_save_cleanup(opaque);
+ return 0;
+}
+
+static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret = 0;
+ uint64_t data, data_size;
+
+ data = qemu_get_be64(f);
+ while (data != VFIO_MIG_FLAG_END_OF_STATE) {
+ switch (data) {
+ case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
+ {
+ ret = vfio_load_device_config_state(f, opaque);
+ if (ret) {
+ return ret;
+ }
+ break;
+ }
+ case VFIO_MIG_FLAG_DEV_SETUP_STATE:
+ {
+ data = qemu_get_be64(f);
+ if (data == VFIO_MIG_FLAG_END_OF_STATE) {
+ return ret;
+ } else {
+ error_report("SETUP STATE: EOS not found 0x%lx", data);
+ return -EINVAL;
+ }
+ break;
+ }
+ case VFIO_MIG_FLAG_DEV_DATA_STATE:
+ {
+ VFIORegion *region = &migration->region.buffer;
+ void *buf = NULL;
+ bool buffer_mmaped = false;
+ uint64_t data_offset = 0;
+
+ data_size = qemu_get_be64(f);
+ if (data_size == 0) {
+ break;
+ }
+
+ ret = pread(vbasedev->fd, &data_offset, sizeof(data_offset),
+ region->fd_offset +
+ offsetof(struct vfio_device_migration_info,
+ data_offset));
+ if (ret != sizeof(data_offset)) {
+ error_report("Failed to get migration buffer data offset %d",
+ ret);
+ return -EINVAL;
+ }
+
+ if (region->mmaps) {
+ int i;
+
+ for (i = 0; i < region->nr_mmaps; i++) {
+ if (region->mmaps[i].mmap &&
+ (data_offset >= region->mmaps[i].offset) &&
+ (data_offset < region->mmaps[i].offset +
+ region->mmaps[i].size)) {
+ buf = region->mmaps[i].mmap + (data_offset -
+ region->mmaps[i].offset);
+ buffer_mmaped = true;
+ break;
+ }
+ }
+ }
+
+ if (!buffer_mmaped) {
+ buf = g_malloc0(data_size);
+ }
+
+ qemu_get_buffer(f, buf, data_size);
+
+ ret = pwrite(vbasedev->fd, &data_size, sizeof(data_size),
+ region->fd_offset +
+ offsetof(struct vfio_device_migration_info, data_size));
+ if (ret != sizeof(data_size)) {
+ error_report("Failed to set migration buffer data size %d",
+ ret);
+ return -EINVAL;
+ }
+
+ if (!buffer_mmaped) {
+ ret = pwrite(vbasedev->fd, buf, data_size,
+ region->fd_offset + data_offset);
+ g_free(buf);
+
+ if (ret != data_size) {
+ error_report("Failed to set migration buffer %d", ret);
+ return -EINVAL;
+ }
+ }
+ break;
+ }
+ }
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+ data = qemu_get_be64(f);
+ }
+
+ return ret;
+}
+
static SaveVMHandlers savevm_vfio_handlers = {
.save_setup = vfio_save_setup,
.save_cleanup = vfio_save_cleanup,
.save_live_pending = vfio_save_pending,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
+ .load_setup = vfio_load_setup,
+ .load_cleanup = vfio_load_cleanup,
+ .load_state = vfio_load_state,
};
/* ---------------------------------------------------------------------- */
--
2.7.0
* Kirti Wankhede (kwankhede@nvidia.com) wrote:
> During _RESUMING device state:
> - If Vendor driver defines mappable region, mmap migration region.
> - Load config state.
> - For data packet, till VFIO_MIG_FLAG_END_OF_STATE is not reached
> - read data_size from packet, read buffer of data_size
> - read data_offset from where QEMU should write data.
> if region is mmaped, write data of data_size to mmaped region.
> - write data_size.
> In case of mmapped region, write to data_size indicates kernel
> driver that data is written in staging buffer.
> - if region is trapped, pwrite() data of data_size from data_offset.
> - Repeat above until VFIO_MIG_FLAG_END_OF_STATE.
> - Unmap migration region.
>
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
> hw/vfio/migration.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 153 insertions(+)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 0a2f30872316..e4895f91761d 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -212,6 +212,22 @@ static int vfio_save_device_config_state(QEMUFile *f, void *opaque)
> return qemu_file_get_error(f);
> }
>
> +static int vfio_load_device_config_state(QEMUFile *f, void *opaque)
> +{
> + VFIODevice *vbasedev = opaque;
> +
> + if (vbasedev->type == VFIO_DEVICE_TYPE_PCI) {
> + vfio_pci_load_config(vbasedev, f);
> + }
> +
> + if (qemu_get_be64(f) != VFIO_MIG_FLAG_END_OF_STATE) {
> + error_report("Wrong end of block while loading device config space");
You might find it useful to print the value in the error.
> + return -EINVAL;
> + }
> +
> + return qemu_file_get_error(f);
> +}
> +
> /* ---------------------------------------------------------------------- */
>
> static int vfio_save_setup(QEMUFile *f, void *opaque)
> @@ -372,12 +388,149 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque)
> return ret;
> }
>
> +static int vfio_load_setup(QEMUFile *f, void *opaque)
> +{
> + VFIODevice *vbasedev = opaque;
> + VFIOMigration *migration = vbasedev->migration;
> + int ret = 0;
> +
> + if (migration->region.buffer.mmaps) {
> + ret = vfio_region_mmap(&migration->region.buffer);
> + if (ret) {
> + error_report("Failed to mmap VFIO migration region %d: %s",
> + migration->region.index, strerror(-ret));
> + return ret;
> + }
> + }
> +
> + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_RESUMING);
> + if (ret) {
> + error_report("Failed to set state RESUMING");
> + }
> + return ret;
> +}
> +
> +static int vfio_load_cleanup(void *opaque)
> +{
> + vfio_save_cleanup(opaque);
> + return 0;
> +}
> +
> +static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
> +{
> + VFIODevice *vbasedev = opaque;
> + VFIOMigration *migration = vbasedev->migration;
> + int ret = 0;
> + uint64_t data, data_size;
> +
> + data = qemu_get_be64(f);
> + while (data != VFIO_MIG_FLAG_END_OF_STATE) {
> + switch (data) {
> + case VFIO_MIG_FLAG_DEV_CONFIG_STATE:
> + {
> + ret = vfio_load_device_config_state(f, opaque);
> + if (ret) {
> + return ret;
> + }
> + break;
> + }
> + case VFIO_MIG_FLAG_DEV_SETUP_STATE:
> + {
> + data = qemu_get_be64(f);
> + if (data == VFIO_MIG_FLAG_END_OF_STATE) {
> + return ret;
> + } else {
> + error_report("SETUP STATE: EOS not found 0x%lx", data);
Please use PRIx64 for uint64_t (I know it's painful, but it's portable).
> + return -EINVAL;
> + }
> + break;
> + }
> + case VFIO_MIG_FLAG_DEV_DATA_STATE:
> + {
> + VFIORegion *region = &migration->region.buffer;
> + void *buf = NULL;
> + bool buffer_mmaped = false;
> + uint64_t data_offset = 0;
> +
> + data_size = qemu_get_be64(f);
> + if (data_size == 0) {
> + break;
> + }
> +
> + ret = pread(vbasedev->fd, &data_offset, sizeof(data_offset),
> + region->fd_offset +
> + offsetof(struct vfio_device_migration_info,
> + data_offset));
> + if (ret != sizeof(data_offset)) {
> + error_report("Failed to get migration buffer data offset %d",
> + ret);
> + return -EINVAL;
> + }
> +
> + if (region->mmaps) {
> + int i;
> +
> + for (i = 0; i < region->nr_mmaps; i++) {
> + if (region->mmaps[i].mmap &&
> + (data_offset >= region->mmaps[i].offset) &&
> + (data_offset < region->mmaps[i].offset +
> + region->mmaps[i].size)) {
> + buf = region->mmaps[i].mmap + (data_offset -
> + region->mmaps[i].offset);
What checks that the read data_size fits in the region?
[Treat the incoming stream as broken/malicious until proven otherwise]
> + buffer_mmaped = true;
> + break;
> + }
> + }
I think you had some code like this on the save side? Perhaps a
'find_region' helper would be good?
> + }
> +
> + if (!buffer_mmaped) {
> + buf = g_malloc0(data_size);
> + }
Given that data_size could be quite large, you might want to use
g_try_malloc0 and check the return value; especially since 'data_size'
is read from the stream.
> + qemu_get_buffer(f, buf, data_size);
> +
> + ret = pwrite(vbasedev->fd, &data_size, sizeof(data_size),
> + region->fd_offset +
> + offsetof(struct vfio_device_migration_info, data_size));
> + if (ret != sizeof(data_size)) {
> + error_report("Failed to set migration buffer data size %d",
> + ret);
> + return -EINVAL;
> + }
> +
> + if (!buffer_mmaped) {
> + ret = pwrite(vbasedev->fd, buf, data_size,
> + region->fd_offset + data_offset);
> + g_free(buf);
> +
> + if (ret != data_size) {
> + error_report("Failed to set migration buffer %d", ret);
> + return -EINVAL;
> + }
> + }
> + break;
> + }
> + }
> +
> + ret = qemu_file_get_error(f);
> + if (ret) {
> + return ret;
> + }
> + data = qemu_get_be64(f);
> + }
> +
> + return ret;
> +}
> +
> static SaveVMHandlers savevm_vfio_handlers = {
> .save_setup = vfio_save_setup,
> .save_cleanup = vfio_save_cleanup,
> .save_live_pending = vfio_save_pending,
> .save_live_iterate = vfio_save_iterate,
> .save_live_complete_precopy = vfio_save_complete_precopy,
> + .load_setup = vfio_load_setup,
> + .load_cleanup = vfio_load_cleanup,
> + .load_state = vfio_load_state,
> };
>
> /* ---------------------------------------------------------------------- */
> --
> 2.7.0
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
© 2016 - 2025 Red Hat, Inc.