[PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC

Alexander Mikhalitsyn posted 8 patches 2 weeks, 6 days ago
[PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC
Posted by Alexander Mikhalitsyn 2 weeks, 6 days ago
From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>

Add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC, which
helps to save/restore a dynamic array of pointers to
structures.

Suggested-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
v2:
- added VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT8_ALLOC
v4:
- almost completely reworked, new flag VMS_ARRAY_OF_POINTER_ALLOW_NULL
  was introduced as suggested by Peter
v5:
- rebased on top of https://lore.kernel.org/all/20260304212303.667141-1-vsementsov@yandex-team.ru/
---
 include/migration/vmstate.h | 77 ++++++++++++++++++++++++++++++-
 migration/savevm.c          | 26 +++++++++++
 migration/vmstate-types.c   | 91 +++++++++++++++++++++++++++++++++++++
 migration/vmstate.c         | 54 ++++++++++++++++++----
 4 files changed, 236 insertions(+), 12 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 7ed4a0742b2..0a409700598 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -162,7 +162,19 @@ enum VMStateFlags {
     VMS_VSTRUCT           = 0x8000,
 
     /* Marker for end of list */
-    VMS_END = 0x10000
+    VMS_END                         = 0x10000,
+
+    /* The field is a (fixed-size or variable-size) array of pointers
+     * (e.g. struct a { uint8_t **b; }) that can contain NULL values.
+     * This instructs vmstate engine to:
+     * - Dereference each array entry before using it.
+     * - Assume that array is initialized with NULLs on load phase
+     * - Automatically allocate memory for array entries (with size
+     *   specified in (VMStateField).start) on load phase
+     * - Produce NULL/not-NULL markers in migration stream
+     *
+     * Note: Does not imply VMS_ARRAY_OF_POINTER; it needs to be set explicitly. */
+    VMS_ARRAY_OF_POINTER_ALLOW_NULL = 0x20000,
 };
 
 typedef enum {
@@ -194,6 +206,7 @@ struct VMStateField {
     int version_id;
     int struct_version_id;
     bool (*field_exists)(void *opaque, int version_id);
+    const struct VMStateField *real_field;
 };
 
 struct VMStateDescription {
@@ -268,8 +281,10 @@ extern const VMStateInfo vmstate_info_uint64;
 extern const VMStateInfo vmstate_info_fd;
 
 /** Put this in the stream when migrating a null pointer.*/
-#define VMS_NULLPTR_MARKER (0x30U) /* '0' */
+#define VMS_NULLPTR_MARKER    (0x30U) /* '0' */
+#define VMS_NOTNULLPTR_MARKER (0x31U) /* '1' */
 extern const VMStateInfo vmstate_info_nullptr;
+extern const VMStateInfo vmstate_info_maybeptr;
 
 extern const VMStateInfo vmstate_info_cpudouble;
 
@@ -281,6 +296,7 @@ extern const VMStateInfo vmstate_info_bitmap;
 extern const VMStateInfo vmstate_info_qtailq;
 extern const VMStateInfo vmstate_info_gtree;
 extern const VMStateInfo vmstate_info_qlist;
+extern const VMStateInfo vmstate_info_ptrs_array_entry;
 
 #define type_check_2darray(t1,t2,n,m) ((t1(*)[n][m])0 - (t2*)0)
 /*
@@ -562,6 +578,63 @@ extern const VMStateInfo vmstate_info_qlist;
     .offset     = vmstate_offset_array(_s, _f, _type*, _n),          \
 }
 
+/*
+ * For migrating a dynamically allocated uint{8,32}-indexed array
+ * of pointers to structures (with NULL entries and with auto memory allocation).
+ *
+ * _type: type of structure pointed to
+ * _vmsd: VMSD for structure _type (when VMS_STRUCT is set)
+ * _info: VMStateInfo for _type (when VMS_STRUCT is not set)
+ * start: size of (_type) pointed to (for auto memory allocation)
+ */
+#define VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT8_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+    .vmsd       = &(_vmsd),                                          \
+    .start      = sizeof(_type),                                     \
+    .size       = sizeof(_type *),                                   \
+    .flags      = VMS_POINTER|VMS_VARRAY_UINT8|VMS_ARRAY_OF_POINTER| \
+                  VMS_ARRAY_OF_POINTER_ALLOW_NULL|VMS_STRUCT,        \
+    .offset     = vmstate_offset_pointer(_state, _field, _type *),   \
+}
+
+#define VMSTATE_VARRAY_OF_POINTER_UINT8_ALLOC(_field, _state, _field_num, _version, _info, _type) { \
+    .name       = (stringify(_field)),                               \
+    .version_id = (_version),                                        \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
+    .info       = &(_info),                                          \
+    .start      = sizeof(_type),                                     \
+    .size       = sizeof(_type *),                                   \
+    .flags      = VMS_POINTER|VMS_VARRAY_UINT8|VMS_ARRAY_OF_POINTER| \
+                  VMS_ARRAY_OF_POINTER_ALLOW_NULL,                   \
+    .offset     = vmstate_offset_pointer(_state, _field, _type *),   \
+}
+
+#define VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT32_ALLOC(_field, _state, _field_num, _version, _vmsd, _type) { \
+    .name       = (stringify(_field)),                                \
+    .version_id = (_version),                                         \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+    .vmsd       = &(_vmsd),                                           \
+    .start      = sizeof(_type),                                      \
+    .size       = sizeof(_type *),                                    \
+    .flags      = VMS_POINTER|VMS_VARRAY_UINT32|VMS_ARRAY_OF_POINTER| \
+                  VMS_ARRAY_OF_POINTER_ALLOW_NULL|VMS_STRUCT,         \
+    .offset     = vmstate_offset_pointer(_state, _field, _type *),    \
+}
+
+#define VMSTATE_VARRAY_OF_POINTER_UINT32_ALLOC(_field, _state, _field_num, _version, _info, _type) { \
+    .name       = (stringify(_field)),                                \
+    .version_id = (_version),                                         \
+    .num_offset = vmstate_offset_value(_state, _field_num, uint32_t), \
+    .info       = &(_info),                                           \
+    .start      = sizeof(_type),                                      \
+    .size       = sizeof(_type *),                                    \
+    .flags      = VMS_POINTER|VMS_VARRAY_UINT32|VMS_ARRAY_OF_POINTER| \
+                  VMS_ARRAY_OF_POINTER_ALLOW_NULL,                    \
+    .offset     = vmstate_offset_pointer(_state, _field, _type *),    \
+}
+
 #define VMSTATE_VARRAY_OF_POINTER_UINT32(_field, _state, _field_num, _version, _info, _type) { \
     .name       = (stringify(_field)),                                    \
     .version_id = (_version),                                             \
diff --git a/migration/savevm.c b/migration/savevm.c
index 8115203b518..882c882f684 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -868,6 +868,32 @@ static void vmstate_check(const VMStateDescription *vmsd)
 
     if (field) {
         while (field->name) {
+            /*
+             * VMS_ARRAY_OF_POINTER must be used only together
+             * with one of VMS_(V)ARRAY* flags.
+             */
+            assert(!(field->flags & VMS_ARRAY_OF_POINTER) ||
+                   ((field->flags & (VMS_ARRAY | VMS_VARRAY_INT32 |
+                     VMS_VARRAY_UINT16 | VMS_VARRAY_UINT8 | VMS_VARRAY_UINT32))));
+
+            /*
+             * When VMS_ARRAY_OF_POINTER_ALLOW_NULL is used, we must:
+             * 1. have VMS_ARRAY_OF_POINTER set too;
+             * 2. have ->start field set and it should tell us a size
+             *    of memory chunk we should allocate for every array member.
+             */
+            assert(!(field->flags & VMS_ARRAY_OF_POINTER_ALLOW_NULL) ||
+                   (field->flags & VMS_ARRAY_OF_POINTER));
+            assert(!(field->flags & VMS_ARRAY_OF_POINTER_ALLOW_NULL) ||
+                   field->start);
+
+            /*
+             * (VMStateField).real_field is only for internal purposes
+             * and should never be used by any user-defined VMStateField.
+             * Currently, it is only used by vmsd_create_fake_nullptr_field().
+             */
+            assert(!field->real_field);
+
             if (field->flags & (VMS_STRUCT | VMS_VSTRUCT)) {
                 /* Recurse to sub structures */
                 vmstate_check(field->vmsd);
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index 23f34336964..a55f4d51f4b 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -391,6 +391,97 @@ const VMStateInfo vmstate_info_nullptr = {
     .save = save_nullptr,
 };
 
+static bool load_maybeptr(QEMUFile *f, void *ppv, size_t unused_size,
+                          const VMStateField *field, Error **errp)
+{
+    bool ok = false;
+    const VMStateField *real_field = field->real_field;
+    /* size of structure pointed to by elements of array */
+    size_t size = real_field->start;
+    int marker;
+
+    assert(size);
+
+    if (ppv == NULL) {
+        error_setg(errp, "vmstate: get_maybeptr must be called with ppv != NULL");
+        return false;
+    }
+
+    /*
+     * We start from a clean array, all elements must be NULL, unless
+     * something we haven't prepared for has changed in vmstate_save_state_v().
+     * Let's check for this just in case.
+     */
+    if (*(void **)ppv != NULL) {
+        error_setg(errp, "vmstate: get_maybeptr must be called with *ppv == NULL");
+        return false;
+    }
+
+    marker = qemu_get_byte(f);
+    assert(marker == VMS_NULLPTR_MARKER || marker == VMS_NOTNULLPTR_MARKER);
+
+    if (marker == VMS_NOTNULLPTR_MARKER) {
+        void *pv;
+
+        /* allocate memory for structure */
+        pv = g_malloc0(size);
+
+        ok = vmstate_load_field(f, pv, size, real_field, errp);
+        if (!ok) {
+            g_free(pv);
+            return false;
+        }
+
+        *(void **)ppv = pv;
+    }
+
+    return true;
+}
+
+static bool save_maybeptr(QEMUFile *f, void *ppv, size_t unused_size,
+                          const VMStateField *field, JSONWriter *vmdesc,
+                          Error **errp)
+{
+    const VMStateField *real_field = field->real_field;
+    /* size of structure pointed to by elements of array */
+    size_t size = real_field->start;
+    void *pv;
+
+    assert(size);
+
+    /*
+     * (ppv) is an address of an i-th element of a dynamic array.
+     *
+     * (ppv) can not be NULL unless we have some regression/bug in
+     * vmstate_save_state_v(), because it is result of pointer arithemic like:
+     * first_elem + size * i.
+     */
+    if (ppv == NULL) {
+        error_setg(errp, "vmstate: put_maybeptr must be called with ppv != NULL");
+        return false;
+    }
+
+    /* get a pointer to a structure */
+    pv = *(void **)ppv;
+
+    if (pv == NULL) {
+        /* write a mark telling that there was a NULL pointer */
+        qemu_put_byte(f, VMS_NULLPTR_MARKER);
+        return true;
+    }
+
+    /* if pv is not NULL, write a marker and save field using vmstate_save_field() */
+    qemu_put_byte(f, VMS_NOTNULLPTR_MARKER);
+
+    return vmstate_save_field(f, pv, size, real_field, vmdesc, errp);
+}
+
+const VMStateInfo vmstate_info_maybeptr = {
+    .name = "maybeptr",
+    .load  = load_maybeptr,
+    .save  = save_maybeptr,
+};
+
 /* 64 bit unsigned int. See that the received value is the same than the one
    in the field */
 
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 616eb310e61..29e63751105 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -74,10 +74,15 @@ vmsd_create_fake_nullptr_field(const VMStateField *field)
     /* Do not need "field_exists" check as it always exists (which is null) */
     fake->field_exists = NULL;
 
-    /* See vmstate_info_nullptr - use 1 byte to represent nullptr */
-    fake->size = 1;
-    fake->info = &vmstate_info_nullptr;
-    fake->flags = VMS_SINGLE;
+    if (!(field->flags & VMS_ARRAY_OF_POINTER_ALLOW_NULL)) {
+        /* See vmstate_info_nullptr - use 1 byte to represent nullptr */
+        fake->size = 1;
+        fake->info = &vmstate_info_nullptr;
+        fake->flags = VMS_SINGLE;
+    } else {
+        fake->real_field = field;
+        fake->info = &vmstate_info_maybeptr;
+    }
 
     /* All the rest fields shouldn't matter.. */
 
@@ -258,13 +263,28 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
             for (i = 0; i < n_elems; i++) {
                 bool ok;
                 void *curr_elem = first_elem + size * i;
+                bool need_fake_field = false;
                 const VMStateField *inner_field;
 
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    curr_elem = *(void **)curr_elem;
+                    if (!(field->flags & VMS_ARRAY_OF_POINTER_ALLOW_NULL)) {
+                        assert(curr_elem);
+                        curr_elem = *(void **)curr_elem;
+                        need_fake_field = !curr_elem;
+                    } else {
+                        /*
+                         * We expect array of pointers to be initialized.
+                         * We don't want to overwrite curr_elem with it's
+                         * dereferenced value, because we may need to
+                         * allocate memory (depending on what is in the migration
+                         * stream) and write to it later.
+                         */
+                        assert(!*(void **)curr_elem);
+                        need_fake_field = true;
+                    }
                 }
 
-                if (!curr_elem && size) {
+                if (need_fake_field && size) {
                     /*
                      * If null pointer found (which should only happen in
                      * an array of pointers), use null placeholder and do
@@ -272,6 +292,7 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
                      */
                     inner_field = vmsd_create_fake_nullptr_field(field);
                 } else {
+                    assert(curr_elem || !size);
                     inner_field = field;
                 }
 
@@ -546,25 +567,38 @@ static bool vmstate_save_vmsd_v(QEMUFile *f, const VMStateDescription *vmsd,
 
             for (i = 0; i < n_elems; i++) {
                 void *curr_elem = first_elem + size * i;
+                bool need_fake_field = false;
                 const VMStateField *inner_field;
                 bool is_null;
                 int max_elems = n_elems - i;
 
                 old_offset = qemu_file_transferred(f);
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
-                    assert(curr_elem);
-                    curr_elem = *(void **)curr_elem;
+                    if (!(field->flags & VMS_ARRAY_OF_POINTER_ALLOW_NULL)) {
+                        assert(curr_elem);
+                        curr_elem = *(void **)curr_elem;
+                        need_fake_field = !curr_elem;
+                    } else {
+                        /*
+                         * We always need a fake field to properly handle
+                         * VMS_ARRAY_OF_POINTER_ALLOW_NULL case, because
+                         * even if pointer is not NULL, we still want to
+                         * write a marker in the migration stream.
+                         */
+                        need_fake_field = true;
+                    }
                 }
 
-                if (!curr_elem && size) {
+                if (need_fake_field && size) {
                     /*
                      * If null pointer found (which should only happen in
                      * an array of pointers), use null placeholder and do
                      * not follow.
                      */
                     inner_field = vmsd_create_fake_nullptr_field(field);
-                    is_null = true;
+                    is_null = !curr_elem;
                 } else {
+                    assert(curr_elem || !size);
                     inner_field = field;
                     is_null = false;
                 }
-- 
2.47.3
Re: [PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC
Posted by Peter Xu 2 weeks, 6 days ago
On Tue, Mar 17, 2026 at 11:27:02AM +0100, Alexander Mikhalitsyn wrote:
> From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
> 
> Add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC, which
> helps to save/restore a dynamic array of pointers to
> structures.
> 
> Suggested-by: Peter Xu <peterx@redhat.com>
> Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
> v2:
> - added VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT8_ALLOC
> v4:
> - almost completely reworked, new flag VMS_ARRAY_OF_POINTER_ALLOW_NULL
>   was introduced as suggested by Peter
> v5:
> - rebased on top of https://lore.kernel.org/all/20260304212303.667141-1-vsementsov@yandex-team.ru/
> ---
>  include/migration/vmstate.h | 77 ++++++++++++++++++++++++++++++-
>  migration/savevm.c          | 26 +++++++++++
>  migration/vmstate-types.c   | 91 +++++++++++++++++++++++++++++++++++++
>  migration/vmstate.c         | 54 ++++++++++++++++++----
>  4 files changed, 236 insertions(+), 12 deletions(-)

Hi, Alexander,

Sorry for a late reply, I didn't reply to your previous version on patch 2
because I didn't yet get time to really review this patch. It took me some
time to figure out what's the best way to do this.  I still think it'll be
good to avoid introducing yet another info like what this patch did.  In
general, I want to see if we can reduce get()/put()/save()/load() rather
than adding more.

Meanwhile, there're small issues here and there I saw.

E.g. reusing ->start to cache the object size sounds too tricky.  I am
trying to figure out if we can always stick with ->size to store the object
size; I'll need some even pre-requisite patches though to convert some
existing VMS_ARRAY_OF_POINTER users to reserve the ->size field for that,
though.

The other thing is I had a gut feeling this patch may have issue on the
JSON blob dumped in the migration stream, via the JSONWriter* object passed
into vmstate_save_vmsd_v().

All these made me feel like I'd better go prepare some RFC patches, because
commenting will be too slow.

I'll likely send something very soon just to show what I meant, it'll be
good if you can have a look at that when it comes.

Thanks,

-- 
Peter Xu
Re: [PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC
Posted by Alexander Mikhalitsyn 2 weeks, 5 days ago
Am Di., 17. März 2026 um 22:40 Uhr schrieb Peter Xu <peterx@redhat.com>:
>
> On Tue, Mar 17, 2026 at 11:27:02AM +0100, Alexander Mikhalitsyn wrote:
> > From: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
> >
> > Add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC, which
> > helps to save/restore a dynamic array of pointers to
> > structures.
> >
> > Suggested-by: Peter Xu <peterx@redhat.com>
> > Signed-off-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>
> > v2:
> > - added VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT8_ALLOC
> > v4:
> > - almost completely reworked, new flag VMS_ARRAY_OF_POINTER_ALLOW_NULL
> >   was introduced as suggested by Peter
> > v5:
> > - rebased on top of https://lore.kernel.org/all/20260304212303.667141-1-vsementsov@yandex-team.ru/
> > ---
> >  include/migration/vmstate.h | 77 ++++++++++++++++++++++++++++++-
> >  migration/savevm.c          | 26 +++++++++++
> >  migration/vmstate-types.c   | 91 +++++++++++++++++++++++++++++++++++++
> >  migration/vmstate.c         | 54 ++++++++++++++++++----
> >  4 files changed, 236 insertions(+), 12 deletions(-)
>
> Hi, Alexander,

Hi Peter,

>
> Sorry for a late reply, I didn't reply to your previous version on patch 2
> because I didn't yet get time to really review this patch. It took me some
> time to figure out what's the best way to do this.  I still think it'll be
> good to avoid introducing yet another info like what this patch did.  In
> general, I want to see if we can reduce get()/put()/save()/load() rather
> than adding more.

It took a while for me too, and I've failed :D

>
> Meanwhile, there're small issues here and there I saw.
>
> E.g. reusing ->start to cache the object size sounds too tricky.  I am
> trying to figure out if we can always stick with ->size to store the object
> size; I'll need some even pre-requisite patches though to convert some
> existing VMS_ARRAY_OF_POINTER users to reserve the ->size field for that,
> though.

yep, it completely makes sense. I was using the same approach as we have for
QLIST/QTAILQ/GTREE, but I agree that it's a bit hacky.

>
> The other thing is I had a gut feeling this patch may have issue on the
> JSON blob dumped in the migration stream, via the JSONWriter* object passed
> into vmstate_save_vmsd_v().
>
> All these made me feel like I'd better go prepare some RFC patches, because
> commenting will be too slow.

sure, thank you!

>
> I'll likely send something very soon just to show what I meant, it'll be
> good if you can have a look at that when it comes.

Kind regards,
Alex

>
> Thanks,
>
> --
> Peter Xu
>
Re: [PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC
Posted by Peter Xu 2 weeks, 6 days ago
On Tue, Mar 17, 2026 at 05:39:54PM -0400, Peter Xu wrote:
> I'll likely send something very soon just to show what I meant, it'll be
> good if you can have a look at that when it comes.

I've posted the series as RFC here:

https://lore.kernel.org/r/20260317232332.15209-1-peterx@redhat.com

-- 
Peter Xu
Re: [PATCH v5 2/8] migration: add VMSTATE_VARRAY_OF_POINTER_TO_STRUCT_UINT{8, 32}_ALLOC
Posted by Alexander Mikhalitsyn 2 weeks, 5 days ago
Am Mi., 18. März 2026 um 00:30 Uhr schrieb Peter Xu <peterx@redhat.com>:
>
> On Tue, Mar 17, 2026 at 05:39:54PM -0400, Peter Xu wrote:
> > I'll likely send something very soon just to show what I meant, it'll be
> > good if you can have a look at that when it comes.
>
> I've posted the series as RFC here:
>
> https://lore.kernel.org/r/20260317232332.15209-1-peterx@redhat.com

HUGE thanks for this, Peter!

I've left my RWB and Tested-by tags on that series.

Kind regards,
Alex

>
> --
> Peter Xu
>