Define flags to be used as delimiter in migration stream for VFIO devices.
Added .save_setup and .save_cleanup functions. Map & unmap migration
region from these functions at source during saving or pre-copy phase.
Set VFIO device state depending on VM's state. During live migration, VM is
running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Neo Jia <cjia@nvidia.com>
---
hw/vfio/migration.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++
hw/vfio/trace-events | 2 ++
2 files changed, 98 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 7c4fa0d08ea6..2e1054bf7f43 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -8,12 +8,15 @@
*/
#include "qemu/osdep.h"
+#include "qemu/main-loop.h"
+#include "qemu/cutils.h"
#include <linux/vfio.h>
#include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h"
#include "cpu.h"
#include "migration/migration.h"
+#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
#include "migration/blocker.h"
@@ -25,6 +28,22 @@
#include "trace.h"
#include "hw/hw.h"
+/*
+ * Flags to be used as unique delimiters for VFIO devices in the migration
+ * stream. These flags are composed as:
+ * 0xffffffff => MSB 32-bit all 1s
+ * 0xef10 => Magic ID, represents emulated (virtual) function IO
+ * 0x0000 => 16-bits reserved for flags
+ *
+ * The beginning of state information is marked by _DEV_CONFIG_STATE,
+ * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
+ * certain state information is marked by _END_OF_STATE.
+ */
+#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
+#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
+#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
+#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
+
static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
off_t off, bool iswrite)
{
@@ -129,6 +148,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
return 0;
}
+/* ---------------------------------------------------------------------- */
+
+static int vfio_save_setup(QEMUFile *f, void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ int ret;
+
+ trace_vfio_save_setup(vbasedev->name);
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
+
+ if (migration->region.mmaps) {
+ /*
+ * vfio_region_mmap() called from migration thread. Memory API called
+ * from vfio_regio_mmap() need it when called from outdide the main loop
+ * thread.
+ */
+ qemu_mutex_lock_iothread();
+ ret = vfio_region_mmap(&migration->region);
+ qemu_mutex_unlock_iothread();
+ if (ret) {
+ error_report("%s: Failed to mmap VFIO migration region: %s",
+ vbasedev->name, strerror(-ret));
+ error_report("%s: Falling back to slow path", vbasedev->name);
+ }
+ }
+
+ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
+ VFIO_DEVICE_STATE_SAVING);
+ if (ret) {
+ error_report("%s: Failed to set state SAVING", vbasedev->name);
+ return ret;
+ }
+
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+
+ ret = qemu_file_get_error(f);
+ if (ret) {
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vfio_save_cleanup(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+
+ if (migration->region.mmaps) {
+ vfio_region_unmap(&migration->region);
+ }
+ trace_vfio_save_cleanup(vbasedev->name);
+}
+
+static SaveVMHandlers savevm_vfio_handlers = {
+ .save_setup = vfio_save_setup,
+ .save_cleanup = vfio_save_cleanup,
+};
+
+/* ---------------------------------------------------------------------- */
+
static void vfio_vmstate_change(void *opaque, int running, RunState state)
{
VFIODevice *vbasedev = opaque;
@@ -219,6 +301,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
int ret;
Object *obj;
VFIOMigration *migration;
+ char id[256] = "";
+ g_autofree char *path = NULL, *oid;
if (!vbasedev->ops->vfio_get_object) {
return -EINVAL;
@@ -248,6 +332,18 @@ static int vfio_migration_init(VFIODevice *vbasedev,
vbasedev->migration = migration;
migration->vbasedev = vbasedev;
+
+ oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
+ if (oid) {
+ path = g_strdup_printf("%s/vfio", oid);
+ } else {
+ path = g_strdup("vfio");
+ }
+ strpadcpy(id, sizeof(id), path, '\0');
+
+ register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
+ vbasedev);
+
migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
vbasedev);
migration->migration_state.notify = vfio_migration_state_notifier;
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 78d7d83b5ef8..f148b5e828c1 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -151,3 +151,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
+vfio_save_setup(const char *name) " (%s)"
+vfio_save_cleanup(const char *name) " (%s)"
--
2.7.0
On Thu, 22 Oct 2020 16:41:57 +0530
Kirti Wankhede <kwankhede@nvidia.com> wrote:
> Define flags to be used as delimiter in migration stream for VFIO devices.
> Added .save_setup and .save_cleanup functions. Map & unmap migration
> region from these functions at source during saving or pre-copy phase.
>
> Set VFIO device state depending on VM's state. During live migration, VM is
> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>
> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
> Reviewed-by: Neo Jia <cjia@nvidia.com>
> ---
> hw/vfio/migration.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/trace-events | 2 ++
> 2 files changed, 98 insertions(+)
>
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> index 7c4fa0d08ea6..2e1054bf7f43 100644
> --- a/hw/vfio/migration.c
> +++ b/hw/vfio/migration.c
> @@ -8,12 +8,15 @@
> */
>
> #include "qemu/osdep.h"
> +#include "qemu/main-loop.h"
> +#include "qemu/cutils.h"
> #include <linux/vfio.h>
>
> #include "sysemu/runstate.h"
> #include "hw/vfio/vfio-common.h"
> #include "cpu.h"
> #include "migration/migration.h"
> +#include "migration/vmstate.h"
> #include "migration/qemu-file.h"
> #include "migration/register.h"
> #include "migration/blocker.h"
> @@ -25,6 +28,22 @@
> #include "trace.h"
> #include "hw/hw.h"
>
> +/*
> + * Flags to be used as unique delimiters for VFIO devices in the migration
> + * stream. These flags are composed as:
> + * 0xffffffff => MSB 32-bit all 1s
> + * 0xef10 => Magic ID, represents emulated (virtual) function IO
> + * 0x0000 => 16-bits reserved for flags
> + *
> + * The beginning of state information is marked by _DEV_CONFIG_STATE,
> + * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
> + * certain state information is marked by _END_OF_STATE.
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
> +
> static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
> off_t off, bool iswrite)
> {
> @@ -129,6 +148,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
> return 0;
> }
>
> +/* ---------------------------------------------------------------------- */
> +
> +static int vfio_save_setup(QEMUFile *f, void *opaque)
> +{
> + VFIODevice *vbasedev = opaque;
> + VFIOMigration *migration = vbasedev->migration;
> + int ret;
> +
> + trace_vfio_save_setup(vbasedev->name);
> +
> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
> +
> + if (migration->region.mmaps) {
> + /*
> + * vfio_region_mmap() called from migration thread. Memory API called
> + * from vfio_regio_mmap() need it when called from outdide the main loop
> + * thread.
> + */
Thanks for adding this detail, maybe refine slightly as:
Calling vfio_region_mmap() from migration thread. Memory APIs called
from this function require locking the iothread when called from
outside the main loop thread.
Does that capture the intent?
> + qemu_mutex_lock_iothread();
> + ret = vfio_region_mmap(&migration->region);
> + qemu_mutex_unlock_iothread();
> + if (ret) {
> + error_report("%s: Failed to mmap VFIO migration region: %s",
> + vbasedev->name, strerror(-ret));
> + error_report("%s: Falling back to slow path", vbasedev->name);
> + }
> + }
> +
> + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
> + VFIO_DEVICE_STATE_SAVING);
> + if (ret) {
> + error_report("%s: Failed to set state SAVING", vbasedev->name);
> + return ret;
> + }
> +
> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
> +
> + ret = qemu_file_get_error(f);
> + if (ret) {
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static void vfio_save_cleanup(void *opaque)
> +{
> + VFIODevice *vbasedev = opaque;
> + VFIOMigration *migration = vbasedev->migration;
> +
> + if (migration->region.mmaps) {
> + vfio_region_unmap(&migration->region);
> + }
Are we in a different thread context here that we don't need that same
iothread locking?
> + trace_vfio_save_cleanup(vbasedev->name);
> +}
> +
> +static SaveVMHandlers savevm_vfio_handlers = {
> + .save_setup = vfio_save_setup,
> + .save_cleanup = vfio_save_cleanup,
> +};
> +
> +/* ---------------------------------------------------------------------- */
> +
> static void vfio_vmstate_change(void *opaque, int running, RunState state)
> {
> VFIODevice *vbasedev = opaque;
> @@ -219,6 +301,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
> int ret;
> Object *obj;
> VFIOMigration *migration;
> + char id[256] = "";
> + g_autofree char *path = NULL, *oid;
AIUI, oid must also be initialized as a g_autofree variable.
>
> if (!vbasedev->ops->vfio_get_object) {
> return -EINVAL;
> @@ -248,6 +332,18 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>
> vbasedev->migration = migration;
> migration->vbasedev = vbasedev;
> +
> + oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
> + if (oid) {
> + path = g_strdup_printf("%s/vfio", oid);
> + } else {
> + path = g_strdup("vfio");
If we get here then all vfio devices have the same id string. Isn't
that a problem? Thanks,
Alex
> + }
> + strpadcpy(id, sizeof(id), path, '\0');
> +
> + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
> + vbasedev);
> +
> migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
> vbasedev);
> migration->migration_state.notify = vfio_migration_state_notifier;
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 78d7d83b5ef8..f148b5e828c1 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -151,3 +151,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
> vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
> vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
> vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
> +vfio_save_setup(const char *name) " (%s)"
> +vfio_save_cleanup(const char *name) " (%s)"
On 10/23/2020 12:21 AM, Alex Williamson wrote:
> On Thu, 22 Oct 2020 16:41:57 +0530
> Kirti Wankhede <kwankhede@nvidia.com> wrote:
>
>> Define flags to be used as delimiter in migration stream for VFIO devices.
>> Added .save_setup and .save_cleanup functions. Map & unmap migration
>> region from these functions at source during saving or pre-copy phase.
>>
>> Set VFIO device state depending on VM's state. During live migration, VM is
>> running when .save_setup is called, _SAVING | _RUNNING state is set for VFIO
>> device. During save-restore, VM is paused, _SAVING state is set for VFIO device.
>>
>> Signed-off-by: Kirti Wankhede <kwankhede@nvidia.com>
>> Reviewed-by: Neo Jia <cjia@nvidia.com>
>> ---
>> hw/vfio/migration.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>> hw/vfio/trace-events | 2 ++
>> 2 files changed, 98 insertions(+)
>>
>> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
>> index 7c4fa0d08ea6..2e1054bf7f43 100644
>> --- a/hw/vfio/migration.c
>> +++ b/hw/vfio/migration.c
>> @@ -8,12 +8,15 @@
>> */
>>
>> #include "qemu/osdep.h"
>> +#include "qemu/main-loop.h"
>> +#include "qemu/cutils.h"
>> #include <linux/vfio.h>
>>
>> #include "sysemu/runstate.h"
>> #include "hw/vfio/vfio-common.h"
>> #include "cpu.h"
>> #include "migration/migration.h"
>> +#include "migration/vmstate.h"
>> #include "migration/qemu-file.h"
>> #include "migration/register.h"
>> #include "migration/blocker.h"
>> @@ -25,6 +28,22 @@
>> #include "trace.h"
>> #include "hw/hw.h"
>>
>> +/*
>> + * Flags to be used as unique delimiters for VFIO devices in the migration
>> + * stream. These flags are composed as:
>> + * 0xffffffff => MSB 32-bit all 1s
>> + * 0xef10 => Magic ID, represents emulated (virtual) function IO
>> + * 0x0000 => 16-bits reserved for flags
>> + *
>> + * The beginning of state information is marked by _DEV_CONFIG_STATE,
>> + * _DEV_SETUP_STATE, or _DEV_DATA_STATE, respectively. The end of a
>> + * certain state information is marked by _END_OF_STATE.
>> + */
>> +#define VFIO_MIG_FLAG_END_OF_STATE (0xffffffffef100001ULL)
>> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
>> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
>> +#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
>> +
>> static inline int vfio_mig_access(VFIODevice *vbasedev, void *val, int count,
>> off_t off, bool iswrite)
>> {
>> @@ -129,6 +148,69 @@ static int vfio_migration_set_state(VFIODevice *vbasedev, uint32_t mask,
>> return 0;
>> }
>>
>> +/* ---------------------------------------------------------------------- */
>> +
>> +static int vfio_save_setup(QEMUFile *f, void *opaque)
>> +{
>> + VFIODevice *vbasedev = opaque;
>> + VFIOMigration *migration = vbasedev->migration;
>> + int ret;
>> +
>> + trace_vfio_save_setup(vbasedev->name);
>> +
>> + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_SETUP_STATE);
>> +
>> + if (migration->region.mmaps) {
>> + /*
>> + * vfio_region_mmap() called from migration thread. Memory API called
>> + * from vfio_regio_mmap() need it when called from outdide the main loop
>> + * thread.
>> + */
>
> Thanks for adding this detail, maybe refine slightly as:
>
> Calling vfio_region_mmap() from migration thread. Memory APIs called
> from this function require locking the iothread when called from
> outside the main loop thread.
>
> Does that capture the intent?
>
Ok.
>> + qemu_mutex_lock_iothread();
>> + ret = vfio_region_mmap(&migration->region);
>> + qemu_mutex_unlock_iothread();
>> + if (ret) {
>> + error_report("%s: Failed to mmap VFIO migration region: %s",
>> + vbasedev->name, strerror(-ret));
>> + error_report("%s: Falling back to slow path", vbasedev->name);
>> + }
>> + }
>> +
>> + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_MASK,
>> + VFIO_DEVICE_STATE_SAVING);
>> + if (ret) {
>> + error_report("%s: Failed to set state SAVING", vbasedev->name);
>> + return ret;
>> + }
>> +
>> + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
>> +
>> + ret = qemu_file_get_error(f);
>> + if (ret) {
>> + return ret;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void vfio_save_cleanup(void *opaque)
>> +{
>> + VFIODevice *vbasedev = opaque;
>> + VFIOMigration *migration = vbasedev->migration;
>> +
>> + if (migration->region.mmaps) {
>> + vfio_region_unmap(&migration->region);
>> + }
>
>
> Are we in a different thread context here that we don't need that same
> iothread locking?
>
qemu_savevm_state_setup() is called without holding iothread lock and
qemu_savevm_state_cleanup() is called holding iothread lock, so we don't
need lock here.
>
>> + trace_vfio_save_cleanup(vbasedev->name);
>> +}
>> +
>> +static SaveVMHandlers savevm_vfio_handlers = {
>> + .save_setup = vfio_save_setup,
>> + .save_cleanup = vfio_save_cleanup,
>> +};
>> +
>> +/* ---------------------------------------------------------------------- */
>> +
>> static void vfio_vmstate_change(void *opaque, int running, RunState state)
>> {
>> VFIODevice *vbasedev = opaque;
>> @@ -219,6 +301,8 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>> int ret;
>> Object *obj;
>> VFIOMigration *migration;
>> + char id[256] = "";
>> + g_autofree char *path = NULL, *oid;
>
>
> AIUI, oid must also be initialized as a g_autofree variable.
>
>>
>> if (!vbasedev->ops->vfio_get_object) {
>> return -EINVAL;
>> @@ -248,6 +332,18 @@ static int vfio_migration_init(VFIODevice *vbasedev,
>>
>> vbasedev->migration = migration;
>> migration->vbasedev = vbasedev;
>> +
>> + oid = vmstate_if_get_id(VMSTATE_IF(DEVICE(obj)));
>> + if (oid) {
>> + path = g_strdup_printf("%s/vfio", oid);
>> + } else {
>> + path = g_strdup("vfio");
>
>
> If we get here then all vfio devices have the same id string. Isn't
> that a problem? Thanks,
>
Most of the bus types has get_dev_path() callback implemented which is
called from get_id, so there are very less chance to get here.
With above change, id string we get looks like '0000:00:04.0/vfio',
trace logs below:
qemu_loadvm_state_section_startfull 61.942 pid=625231 section_id=0x2f
idstr=b'0000:00:04.0/vfio' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 1.242 pid=625231 section_id=0x30
idstr=b'0000:00:05.0/vfio' instance_id=0x0 version_id=0x1
where '0000:00:04.0'shows location within guest, so that it gets
preserved and used during resume.
In the worst when it is not present, idstr remains same but instance_id
changes:
qemu_loadvm_state_section_startfull 54.931 pid=609474 section_id=0x2f
idstr=b'vfio' instance_id=0x0 version_id=0x1
qemu_loadvm_state_section_startfull 1.180 pid=609474 section_id=0x30
idstr=b'vfio' instance_id=0x1 version_id=0x1
But there is no other way to know location of the device within guest.
Dave, any suggestions here?
Thanks,
Kirti
> Alex
>
>
>> + }
>> + strpadcpy(id, sizeof(id), path, '\0');
>> +
>> + register_savevm_live(id, VMSTATE_INSTANCE_ID_ANY, 1, &savevm_vfio_handlers,
>> + vbasedev);
>> +
>> migration->vm_state = qemu_add_vm_change_state_handler(vfio_vmstate_change,
>> vbasedev);
>> migration->migration_state.notify = vfio_migration_state_notifier;
>> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
>> index 78d7d83b5ef8..f148b5e828c1 100644
>> --- a/hw/vfio/trace-events
>> +++ b/hw/vfio/trace-events
>> @@ -151,3 +151,5 @@ vfio_migration_probe(const char *name, uint32_t index) " (%s) Region %d"
>> vfio_migration_set_state(const char *name, uint32_t state) " (%s) state %d"
>> vfio_vmstate_change(const char *name, int running, const char *reason, uint32_t dev_state) " (%s) running %d reason %s device state %d"
>> vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s"
>> +vfio_save_setup(const char *name) " (%s)"
>> +vfio_save_cleanup(const char *name) " (%s)"
>
>
© 2016 - 2026 Red Hat, Inc.