Prepare for future inflight region migration for vhost-user-blk.
We need to migrate size, queue_size, and inner buffer.
So firstly it migrate size and queue_size fields, then allocate memory for buffer with
migrated size, then migrate inner buffer itself.
Signed-off-by: Alexandr Moshkov <dtalexundeer@yandex-team.ru>
---
hw/virtio/vhost.c | 42 +++++++++++++++++++++++++++++++++++++++
include/hw/virtio/vhost.h | 6 ++++++
2 files changed, 48 insertions(+)
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index c46203eb9c..9a746c9861 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -2028,6 +2028,48 @@ const VMStateDescription vmstate_backend_transfer_vhost_inflight = {
}
};
+static int vhost_inflight_buffer_pre_load(void *opaque, Error **errp)
+{
+ info_report("vhost_inflight_region_buffer_pre_load");
+ struct vhost_inflight *inflight = opaque;
+
+ int fd = -1;
+ void *addr = qemu_memfd_alloc("vhost-inflight", inflight->size,
+ F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
+ &fd, errp);
+ if (*errp) {
+ return -ENOMEM;
+ }
+
+ inflight->offset = 0;
+ inflight->addr = addr;
+ inflight->fd = fd;
+
+ return 0;
+}
+
+const VMStateDescription vmstate_vhost_inflight_region_buffer = {
+ .name = "vhost-inflight-region/buffer",
+ .pre_load_errp = vhost_inflight_buffer_pre_load,
+ .fields = (const VMStateField[]) {
+ VMSTATE_VBUFFER_UINT64(addr, struct vhost_inflight, 0, NULL, size),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+const VMStateDescription vmstate_vhost_inflight_region = {
+ .name = "vhost-inflight-region",
+ .fields = (const VMStateField[]) {
+ VMSTATE_UINT64(size, struct vhost_inflight),
+ VMSTATE_UINT16(queue_size, struct vhost_inflight),
+ VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * const []) {
+ &vmstate_vhost_inflight_region_buffer,
+ NULL
+ }
+};
+
const VMStateDescription vmstate_vhost_virtqueue = {
.name = "vhost-virtqueue",
.fields = (const VMStateField[]) {
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 13ca2c319f..dd552de91f 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -596,6 +596,12 @@ extern const VMStateDescription vmstate_backend_transfer_vhost_inflight;
vmstate_backend_transfer_vhost_inflight, \
struct vhost_inflight)
+extern const VMStateDescription vmstate_vhost_inflight_region;
+#define VMSTATE_VHOST_INFLIGHT_REGION(_field, _state) \
+ VMSTATE_STRUCT_POINTER(_field, _state, \
+ vmstate_vhost_inflight_region, \
+ struct vhost_inflight)
+
extern const VMStateDescription vmstate_vhost_dev;
#define VMSTATE_BACKEND_TRANSFER_VHOST(_field, _state) \
VMSTATE_STRUCT(_field, _state, 0, vmstate_vhost_dev, struct vhost_dev)
--
2.34.1
On Mon, Jan 12, 2026 at 04:45:02PM +0500, Alexandr Moshkov wrote:
> Prepare for future inflight region migration for vhost-user-blk.
> We need to migrate size, queue_size, and inner buffer.
>
> So firstly it migrate size and queue_size fields, then allocate memory for buffer with
> migrated size, then migrate inner buffer itself.
>
> Signed-off-by: Alexandr Moshkov <dtalexundeer@yandex-team.ru>
> ---
> hw/virtio/vhost.c | 42 +++++++++++++++++++++++++++++++++++++++
> include/hw/virtio/vhost.h | 6 ++++++
> 2 files changed, 48 insertions(+)
>
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index c46203eb9c..9a746c9861 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -2028,6 +2028,48 @@ const VMStateDescription vmstate_backend_transfer_vhost_inflight = {
> }
> };
>
> +static int vhost_inflight_buffer_pre_load(void *opaque, Error **errp)
> +{
> + info_report("vhost_inflight_region_buffer_pre_load");
> + struct vhost_inflight *inflight = opaque;
> +
> + int fd = -1;
> + void *addr = qemu_memfd_alloc("vhost-inflight", inflight->size,
> + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
> + &fd, errp);
> + if (*errp) {
> + return -ENOMEM;
> + }
> +
> + inflight->offset = 0;
> + inflight->addr = addr;
> + inflight->fd = fd;
> +
> + return 0;
> +}
> +
> +const VMStateDescription vmstate_vhost_inflight_region_buffer = {
> + .name = "vhost-inflight-region/buffer",
> + .pre_load_errp = vhost_inflight_buffer_pre_load,
> + .fields = (const VMStateField[]) {
> + VMSTATE_VBUFFER_UINT64(addr, struct vhost_inflight, 0, NULL, size),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +const VMStateDescription vmstate_vhost_inflight_region = {
> + .name = "vhost-inflight-region",
> + .fields = (const VMStateField[]) {
> + VMSTATE_UINT64(size, struct vhost_inflight),
> + VMSTATE_UINT16(queue_size, struct vhost_inflight),
> + VMSTATE_END_OF_LIST()
> + },
> + .subsections = (const VMStateDescription * const []) {
> + &vmstate_vhost_inflight_region_buffer,
> + NULL
> + }
> +};
The subsection trick is neat - it allows the size to be loaded first and
then the memfd is allocated. However, it introduces a weird case: if the
source QEMU does not send the subsection, then the destination QEMU
loads successfully but with inflight partially uninitialized.
It's not obvious to me that the destination QEMU will fail in a safe way
when this happens. The source QEMU must not be able to trigger undefined
behavior. Can you add an explicit check somewhere to fail when this
required subsection is missing?
Thanks,
Stefan
On 1/12/26 23:22, Stefan Hajnoczi wrote:
> On Mon, Jan 12, 2026 at 04:45:02PM +0500, Alexandr Moshkov wrote:
>> Prepare for future inflight region migration for vhost-user-blk.
>> We need to migrate size, queue_size, and inner buffer.
>>
>> So firstly it migrate size and queue_size fields, then allocate memory for buffer with
>> migrated size, then migrate inner buffer itself.
>>
>> Signed-off-by: Alexandr Moshkov<dtalexundeer@yandex-team.ru>
>> ---
>> hw/virtio/vhost.c | 42 +++++++++++++++++++++++++++++++++++++++
>> include/hw/virtio/vhost.h | 6 ++++++
>> 2 files changed, 48 insertions(+)
>>
>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> index c46203eb9c..9a746c9861 100644
>> --- a/hw/virtio/vhost.c
>> +++ b/hw/virtio/vhost.c
>> @@ -2028,6 +2028,48 @@ const VMStateDescription vmstate_backend_transfer_vhost_inflight = {
>> }
>> };
>>
>> +static int vhost_inflight_buffer_pre_load(void *opaque, Error **errp)
>> +{
>> + info_report("vhost_inflight_region_buffer_pre_load");
>> + struct vhost_inflight *inflight = opaque;
>> +
>> + int fd = -1;
>> + void *addr = qemu_memfd_alloc("vhost-inflight", inflight->size,
>> + F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL,
>> + &fd, errp);
>> + if (*errp) {
>> + return -ENOMEM;
>> + }
>> +
>> + inflight->offset = 0;
>> + inflight->addr = addr;
>> + inflight->fd = fd;
>> +
>> + return 0;
>> +}
>> +
>> +const VMStateDescription vmstate_vhost_inflight_region_buffer = {
>> + .name = "vhost-inflight-region/buffer",
>> + .pre_load_errp = vhost_inflight_buffer_pre_load,
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_VBUFFER_UINT64(addr, struct vhost_inflight, 0, NULL, size),
>> + VMSTATE_END_OF_LIST()
>> + }
>> +};
>> +
>> +const VMStateDescription vmstate_vhost_inflight_region = {
>> + .name = "vhost-inflight-region",
>> + .fields = (const VMStateField[]) {
>> + VMSTATE_UINT64(size, struct vhost_inflight),
>> + VMSTATE_UINT16(queue_size, struct vhost_inflight),
>> + VMSTATE_END_OF_LIST()
>> + },
>> + .subsections = (const VMStateDescription * const []) {
>> + &vmstate_vhost_inflight_region_buffer,
>> + NULL
>> + }
>> +};
> The subsection trick is neat - it allows the size to be loaded first and
> then the memfd is allocated. However, it introduces a weird case: if the
> source QEMU does not send the subsection, then the destination QEMU
> loads successfully but with inflight partially uninitialized.
>
> It's not obvious to me that the destination QEMU will fail in a safe way
> when this happens. The source QEMU must not be able to trigger undefined
> behavior. Can you add an explicit check somewhere to fail when this
> required subsection is missing?
>
Thanks for reply! For this check, I can add post_load on
`vmstate_vhost_inflight_region` vmstate and check that `inflight->addr`
has been initialized (not NULL)
© 2016 - 2026 Red Hat, Inc.