[v1] vmstate: Implement VMS_ARRAY_OF_POINTER_AUTO_ALLOC

[PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Peter Xu 2 weeks, 5 days ago

The loader side of ptr marker is pretty straightforward, instead of playing
the inner_field trick, just do the load manually assuming the marker layout
is a stable ABI (which it is true already).

This will remove some logic while loading VMSD, and hopefully it makes it
slightly easier to read.  Unfortunately, we still need to keep the sender
side because of the JSON blob we're maintaining..

This paves way for future processing of non-NULL markers as well.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/vmstate-types.c | 12 ++++--------
 migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
index b31689fc3c..ae465c5c2c 100644
--- a/migration/vmstate-types.c
+++ b/migration/vmstate-types.c
@@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
                             const VMStateField *field, Error **errp)
 
 {
-    int byte = qemu_get_byte(f);
-
-    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
-        /* TODO: process PTR_VALID case */
-        return true;
-    }
-
-    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
+    /*
+     * Load is done in vmstate core, see vmstate_ptr_marker_load().
+     */
+    g_assert_not_reached();
     return false;
 }
 
diff --git a/migration/vmstate.c b/migration/vmstate.c
index a3a5f25946..d65fc84dfa 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
     }
 }
 
+static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
+                                    Error **errp)
+{
+    int byte = qemu_get_byte(f);
+
+    if (byte == VMS_MARKER_PTR_NULL) {
+        /* When it's a null ptr marker, do not continue the load */
+        *load_field = false;
+        return true;
+    }
+
+    error_setg(errp, "Unexpected ptr marker: %d", byte);
+    return false;
+}
+
 static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
                              Error **errp)
 {
@@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
             }
 
             for (i = 0; i < n_elems; i++) {
-                bool ok;
+                /* If we will process the load of field? */
+                bool load_field = true;
+                bool ok = true;
                 void *curr_elem = first_elem + size * i;
-                const VMStateField *inner_field;
 
                 if (field->flags & VMS_ARRAY_OF_POINTER) {
                     curr_elem = *(void **)curr_elem;
                 }
 
                 if (!curr_elem && size) {
-                    /*
-                     * If null pointer found (which should only happen in
-                     * an array of pointers), use null placeholder and do
-                     * not follow.
-                     */
-                    inner_field = vmsd_create_ptr_marker_field(field);
-                } else {
-                    inner_field = field;
+                    /* Read the marker instead of VMSD itself */
+                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
+                        trace_vmstate_load_field_error(field->name, -EINVAL);
+                        return false;
+                    }
                 }
 
-                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
-
-                /* If we used a fake temp field.. free it now */
-                if (inner_field != field) {
-                    g_clear_pointer((gpointer *)&inner_field, g_free);
+                if (load_field) {
+                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
                 }
 
                 if (ok) {
-- 
2.50.1

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Fabiano Rosas 2 weeks, 3 days ago

Peter Xu <peterx@redhat.com> writes:

> The loader side of ptr marker is pretty straightforward, instead of playing
> the inner_field trick, just do the load manually assuming the marker layout
> is a stable ABI (which it is true already).
>
> This will remove some logic while loading VMSD, and hopefully it makes it
> slightly easier to read.  Unfortunately, we still need to keep the sender
> side because of the JSON blob we're maintaining..
>

/ramble
Is the JSON blob ABI? Can we kill it? AFAIR, it only serves to add
complexity and to break analyze-script from time to time. We'd be better
off parsing the actual stream from a file. Could maybe even use the same
loadvm code, but mock the .get functions to print instead of actually
loading.

Having separate code that parses a "thing" that's not the stream, but
that should reflect the stream, but it's not the stream, but pretty
close it's a bit weird to me. I recently had to simply open the raw
stream on emacs and navigate through it because the file: stream was
somehow different from the stream on the qcow2, for the same command
line.

(that's another point, parsing from the qcow2 would be cool, which the
JSON blob doesn't provide today I think)
ramble/

> This paves way for future processing of non-NULL markers as well.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/vmstate-types.c | 12 ++++--------
>  migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
>  2 files changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> index b31689fc3c..ae465c5c2c 100644
> --- a/migration/vmstate-types.c
> +++ b/migration/vmstate-types.c
> @@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
>                              const VMStateField *field, Error **errp)
>  
>  {
> -    int byte = qemu_get_byte(f);
> -
> -    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
> -        /* TODO: process PTR_VALID case */
> -        return true;
> -    }
> -
> -    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
> +    /*
> +     * Load is done in vmstate core, see vmstate_ptr_marker_load().
> +     */
> +    g_assert_not_reached();
>      return false;
>  }
>  
> diff --git a/migration/vmstate.c b/migration/vmstate.c
> index a3a5f25946..d65fc84dfa 100644
> --- a/migration/vmstate.c
> +++ b/migration/vmstate.c
> @@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
>      }
>  }
>  
> +static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
> +                                    Error **errp)
> +{
> +    int byte = qemu_get_byte(f);
> +
> +    if (byte == VMS_MARKER_PTR_NULL) {
> +        /* When it's a null ptr marker, do not continue the load */
> +        *load_field = false;
> +        return true;
> +    }
> +
> +    error_setg(errp, "Unexpected ptr marker: %d", byte);
> +    return false;
> +}
> +
>  static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
>                               Error **errp)
>  {
> @@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
>              }
>  
>              for (i = 0; i < n_elems; i++) {
> -                bool ok;
> +                /* If we will process the load of field? */
> +                bool load_field = true;

maybe valid_ptr would be more clear?

> +                bool ok = true;
>                  void *curr_elem = first_elem + size * i;
> -                const VMStateField *inner_field;
>  
>                  if (field->flags & VMS_ARRAY_OF_POINTER) {
>                      curr_elem = *(void **)curr_elem;
>                  }
>  
>                  if (!curr_elem && size) {
> -                    /*
> -                     * If null pointer found (which should only happen in
> -                     * an array of pointers), use null placeholder and do
> -                     * not follow.
> -                     */
> -                    inner_field = vmsd_create_ptr_marker_field(field);
> -                } else {
> -                    inner_field = field;
> +                    /* Read the marker instead of VMSD itself */
> +                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
> +                        trace_vmstate_load_field_error(field->name, -EINVAL);
> +                        return false;
> +                    }
>                  }
>  
> -                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
> -
> -                /* If we used a fake temp field.. free it now */
> -                if (inner_field != field) {
> -                    g_clear_pointer((gpointer *)&inner_field, g_free);
> +                if (load_field) {
> +                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
>                  }
>  
>                  if (ok) {

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Peter Xu 2 weeks, 3 days ago

On Thu, Mar 19, 2026 at 05:56:52PM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > The loader side of ptr marker is pretty straightforward, instead of playing
> > the inner_field trick, just do the load manually assuming the marker layout
> > is a stable ABI (which it is true already).
> >
> > This will remove some logic while loading VMSD, and hopefully it makes it
> > slightly easier to read.  Unfortunately, we still need to keep the sender
> > side because of the JSON blob we're maintaining..
> >
> 
> /ramble
> Is the JSON blob ABI? Can we kill it?

Yeah, that's a fair ramble.  I had sometimes the same feeling that I wished
it not be there, it'll save quite some work..

Per my limited understanding of reading the code in the past, the plan 10
years ago was having those blob to always be together with the binary
stream so that the stream (especially when causing loading failures..) will
be parsable and it's easier to know why the load fail.

However I also confess in the past few years since I worked on migration, I
never used it to debug any real VMSD issues..

I think one reason might be that when migration was very new and when
device emulation developers were easier to mess things up, crash happen
more, and at that time this tool helps debugging things.  It'll also almost
make the VM dump self-explanatory.

Nowadays, we added more codes into migration to help, e.g., I recall we
added things like QEMU_VM_SECTION_FOOTER and likely more after the vmdesc,
which can also help to identify VMSD issues directly from dest QEMU errors.

Let me copy some more people to see if there's any thoughts or inputs on
the JSON blobs..

> AFAIR, it only serves to add
> complexity and to break analyze-script from time to time. We'd be better
> off parsing the actual stream from a file. Could maybe even use the same
> loadvm code, but mock the .get functions to print instead of actually
> loading.

The problem might be that direct parsing of the binary stream is almost
impossible when without the json blob, due to the fact we have a very
condensed VMSD fields in the stream (we have zero headers for fields..), so
they're very efficient but compat, I think we won't be able to parse an
image if we don't know which version of QEMU it was generated from
otherwise.

But again, I'm not sure we strictly need this in each VM image we send,
especially right now on dest QEMU we explicitly allocates that buffer, load
this vmdesc dump and then drop it.. There's also that postcopy won't dump
this (pretty much because we know it's an active QEMU instance so JSON blob
may not really help..), so it's just a bit weird to not be able to see how
it helps besides debugging a "migrate to file" case with unknown QEMU
versions, for example.  When I know the version of QEMU on src side when
something wrong happened (normally we do..), it may not be very needed.

> 
> Having separate code that parses a "thing" that's not the stream, but
> that should reflect the stream, but it's not the stream, but pretty
> close it's a bit weird to me. I recently had to simply open the raw
> stream on emacs and navigate through it because the file: stream was
> somehow different from the stream on the qcow2, for the same command
> line.

Do you mean snapshot-save v.s. live migrate to file URIs when all caps off?

> 
> (that's another point, parsing from the qcow2 would be cool, which the
> JSON blob doesn't provide today I think)
> ramble/
> 
> > This paves way for future processing of non-NULL markers as well.
> >
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >  migration/vmstate-types.c | 12 ++++--------
> >  migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
> >  2 files changed, 29 insertions(+), 23 deletions(-)
> >
> > diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> > index b31689fc3c..ae465c5c2c 100644
> > --- a/migration/vmstate-types.c
> > +++ b/migration/vmstate-types.c
> > @@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
> >                              const VMStateField *field, Error **errp)
> >  
> >  {
> > -    int byte = qemu_get_byte(f);
> > -
> > -    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
> > -        /* TODO: process PTR_VALID case */
> > -        return true;
> > -    }
> > -
> > -    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
> > +    /*
> > +     * Load is done in vmstate core, see vmstate_ptr_marker_load().
> > +     */
> > +    g_assert_not_reached();
> >      return false;
> >  }
> >  
> > diff --git a/migration/vmstate.c b/migration/vmstate.c
> > index a3a5f25946..d65fc84dfa 100644
> > --- a/migration/vmstate.c
> > +++ b/migration/vmstate.c
> > @@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
> >      }
> >  }
> >  
> > +static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
> > +                                    Error **errp)
> > +{
> > +    int byte = qemu_get_byte(f);
> > +
> > +    if (byte == VMS_MARKER_PTR_NULL) {
> > +        /* When it's a null ptr marker, do not continue the load */
> > +        *load_field = false;
> > +        return true;
> > +    }
> > +
> > +    error_setg(errp, "Unexpected ptr marker: %d", byte);
> > +    return false;
> > +}
> > +
> >  static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
> >                               Error **errp)
> >  {
> > @@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
> >              }
> >  
> >              for (i = 0; i < n_elems; i++) {
> > -                bool ok;
> > +                /* If we will process the load of field? */
> > +                bool load_field = true;
> 
> maybe valid_ptr would be more clear?

I don't think we can?  Note, that we will only update this field iff the
field used the new VMS_ARRAY_OF_POINTER_AUTO_ALLOC, otherwise it simply
should be true always and it says "let's load this VMSD".  IOW, in most
cases there's no ptr, hence "valid_ptr=true" will be weird on its own I
think..

> 
> > +                bool ok = true;
> >                  void *curr_elem = first_elem + size * i;
> > -                const VMStateField *inner_field;
> >  
> >                  if (field->flags & VMS_ARRAY_OF_POINTER) {
> >                      curr_elem = *(void **)curr_elem;
> >                  }
> >  
> >                  if (!curr_elem && size) {
> > -                    /*
> > -                     * If null pointer found (which should only happen in
> > -                     * an array of pointers), use null placeholder and do
> > -                     * not follow.
> > -                     */
> > -                    inner_field = vmsd_create_ptr_marker_field(field);
> > -                } else {
> > -                    inner_field = field;
> > +                    /* Read the marker instead of VMSD itself */
> > +                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
> > +                        trace_vmstate_load_field_error(field->name, -EINVAL);
> > +                        return false;
> > +                    }
> >                  }
> >  
> > -                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
> > -
> > -                /* If we used a fake temp field.. free it now */
> > -                if (inner_field != field) {
> > -                    g_clear_pointer((gpointer *)&inner_field, g_free);
> > +                if (load_field) {
> > +                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
> >                  }
> >  
> >                  if (ok) {
> 

-- 
Peter Xu

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Fabiano Rosas 2 weeks, 3 days ago

Peter Xu <peterx@redhat.com> writes:

> On Thu, Mar 19, 2026 at 05:56:52PM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>> 
>> > The loader side of ptr marker is pretty straightforward, instead of playing
>> > the inner_field trick, just do the load manually assuming the marker layout
>> > is a stable ABI (which it is true already).
>> >
>> > This will remove some logic while loading VMSD, and hopefully it makes it
>> > slightly easier to read.  Unfortunately, we still need to keep the sender
>> > side because of the JSON blob we're maintaining..
>> >
>> 
>> /ramble
>> Is the JSON blob ABI? Can we kill it?
>
> Yeah, that's a fair ramble.  I had sometimes the same feeling that I wished
> it not be there, it'll save quite some work..
>
> Per my limited understanding of reading the code in the past, the plan 10
> years ago was having those blob to always be together with the binary
> stream so that the stream (especially when causing loading failures..) will
> be parsable and it's easier to know why the load fail.
>

This would only work for a file migration or snapshots. The live
migration wouldn't have received the blob at the point it fails I
think. That's ok, and it's not a bad idea, but I think we're not making
good use of it currently.

> However I also confess in the past few years since I worked on migration, I
> never used it to debug any real VMSD issues..
>

I actually do use it a few times a year, pretty much to diff the stream
between two different QEMU versions, like Alexander mentioned. From the
recent mips bug:

(echo "migrate file:migfile-11"; echo "quit") | ./build/qemu-system-mips64 -monitor stdio
(echo "migrate file:migfile-10"; echo "quit") | ./build-10.2/qemu-system-mips64 -monitor stdio
diff -y <(./scripts/analyze-migration.py -f migfile-10)
<(./scripts/analyze-migration.py -f migfile-11) | less

As you can see, usability is weird. A QEMU command that dumps a JSON
already formatted would be a good improvement. -dump-vmstate does it,
but there it's the dump of all vmstates like they were registered, not
what's actually migrated and of course it doesn't contain the data.

> I think one reason might be that when migration was very new and when
> device emulation developers were easier to mess things up, crash happen
> more, and at that time this tool helps debugging things.  It'll also almost
> make the VM dump self-explanatory.
>
> Nowadays, we added more codes into migration to help, e.g., I recall we
> added things like QEMU_VM_SECTION_FOOTER and likely more after the vmdesc,
> which can also help to identify VMSD issues directly from dest QEMU errors.
>
> Let me copy some more people to see if there's any thoughts or inputs on
> the JSON blobs..
>
>> AFAIR, it only serves to add
>> complexity and to break analyze-script from time to time. We'd be better
>> off parsing the actual stream from a file. Could maybe even use the same
>> loadvm code, but mock the .get functions to print instead of actually
>> loading.
>
> The problem might be that direct parsing of the binary stream is almost
> impossible when without the json blob, due to the fact we have a very
> condensed VMSD fields in the stream (we have zero headers for fields..), so
> they're very efficient but compat, I think we won't be able to parse an
> image if we don't know which version of QEMU it was generated from
> otherwise.
>

I was thinking of using the same QEMU version that produced the binary
stream indeed. But yeah, it's probably not very useful.

Now, if QEMU can load the binary stream, it could dry-load it
somehow. We currently have the vmstate-walking code tightly coupled with
the actual loading of the data, I can see a scenario where these things
are separated and we can decided what operation to perform on a walk.

Would walking the vmstates and generating the JSON beforehand be useful?
We could then move it to the start of the binary stream, could have a
QEMU command that dumps it, could even use that set of data structures
in JSON format to then guide a second walk which either calls the
devices VMStateInfo or prints the data to a file for debugging. Thinking
out loud here...

> But again, I'm not sure we strictly need this in each VM image we send,
> especially right now on dest QEMU we explicitly allocates that buffer, load
> this vmdesc dump and then drop it.. There's also that postcopy won't dump
> this (pretty much because we know it's an active QEMU instance so JSON blob
> may not really help..), so it's just a bit weird to not be able to see how
> it helps besides debugging a "migrate to file" case with unknown QEMU
> versions, for example.  When I know the version of QEMU on src side when
> something wrong happened (normally we do..), it may not be very needed.
>

Well, putting it on the binary stream for TCP migration might be useless
because we never do anything with it. And it's usually migrating live,
so we don't even get to it if anything fails. Putting it on the .qcow2
file is also useless because there's no tooling to inspect the binary
stream, although we could adapt analyze-migration to take an offset and
read from the middle of the file. And hexdump'ing it is not super hard
as well, but it's less likely anyone will do it.

Even the migrate to file approach is frail because depending on the
changes between the QEMU that produced the stream and the one we took
analyze-migration from, the script might not be able to parse the JSON
at all. Such as with this series.

>> 
>> Having separate code that parses a "thing" that's not the stream, but
>> that should reflect the stream, but it's not the stream, but pretty
>> close it's a bit weird to me. I recently had to simply open the raw
>> stream on emacs and navigate through it because the file: stream was
>> somehow different from the stream on the qcow2, for the same command
>> line.
>
> Do you mean snapshot-save v.s. live migrate to file URIs when all caps off?
>

I was trying to ensure migration to file was producing the same binary
stream as snapshot-save. For a same command line, the migration was
succeeding, but the loadm was failing. So I resorted to opening the
binaries and searching for the QEVM marker.

>> 
>> (that's another point, parsing from the qcow2 would be cool, which the
>> JSON blob doesn't provide today I think)
>> ramble/
>> 
>> > This paves way for future processing of non-NULL markers as well.
>> >
>> > Signed-off-by: Peter Xu <peterx@redhat.com>
>> > ---
>> >  migration/vmstate-types.c | 12 ++++--------
>> >  migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
>> >  2 files changed, 29 insertions(+), 23 deletions(-)
>> >
>> > diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
>> > index b31689fc3c..ae465c5c2c 100644
>> > --- a/migration/vmstate-types.c
>> > +++ b/migration/vmstate-types.c
>> > @@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
>> >                              const VMStateField *field, Error **errp)
>> >  
>> >  {
>> > -    int byte = qemu_get_byte(f);
>> > -
>> > -    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
>> > -        /* TODO: process PTR_VALID case */
>> > -        return true;
>> > -    }
>> > -
>> > -    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
>> > +    /*
>> > +     * Load is done in vmstate core, see vmstate_ptr_marker_load().
>> > +     */
>> > +    g_assert_not_reached();
>> >      return false;
>> >  }
>> >  
>> > diff --git a/migration/vmstate.c b/migration/vmstate.c
>> > index a3a5f25946..d65fc84dfa 100644
>> > --- a/migration/vmstate.c
>> > +++ b/migration/vmstate.c
>> > @@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
>> >      }
>> >  }
>> >  
>> > +static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
>> > +                                    Error **errp)
>> > +{
>> > +    int byte = qemu_get_byte(f);
>> > +
>> > +    if (byte == VMS_MARKER_PTR_NULL) {
>> > +        /* When it's a null ptr marker, do not continue the load */
>> > +        *load_field = false;
>> > +        return true;
>> > +    }
>> > +
>> > +    error_setg(errp, "Unexpected ptr marker: %d", byte);
>> > +    return false;
>> > +}
>> > +
>> >  static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
>> >                               Error **errp)
>> >  {
>> > @@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
>> >              }
>> >  
>> >              for (i = 0; i < n_elems; i++) {
>> > -                bool ok;
>> > +                /* If we will process the load of field? */
>> > +                bool load_field = true;
>> 
>> maybe valid_ptr would be more clear?
>
> I don't think we can?  Note, that we will only update this field iff the
> field used the new VMS_ARRAY_OF_POINTER_AUTO_ALLOC, otherwise it simply
> should be true always and it says "let's load this VMSD".  IOW, in most
> cases there's no ptr, hence "valid_ptr=true" will be weird on its own I
> think..
>

Ah I see. I was looking at the save side and thinking we can maybe
simplify the inner_field logic a bit, but it doesn't apply here. I'll
try to write a patch on top of your series with my ideas.

>> 
>> > +                bool ok = true;
>> >                  void *curr_elem = first_elem + size * i;
>> > -                const VMStateField *inner_field;
>> >  
>> >                  if (field->flags & VMS_ARRAY_OF_POINTER) {
>> >                      curr_elem = *(void **)curr_elem;
>> >                  }
>> >  
>> >                  if (!curr_elem && size) {
>> > -                    /*
>> > -                     * If null pointer found (which should only happen in
>> > -                     * an array of pointers), use null placeholder and do
>> > -                     * not follow.
>> > -                     */
>> > -                    inner_field = vmsd_create_ptr_marker_field(field);
>> > -                } else {
>> > -                    inner_field = field;
>> > +                    /* Read the marker instead of VMSD itself */
>> > +                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
>> > +                        trace_vmstate_load_field_error(field->name, -EINVAL);
>> > +                        return false;
>> > +                    }
>> >                  }
>> >  
>> > -                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
>> > -
>> > -                /* If we used a fake temp field.. free it now */
>> > -                if (inner_field != field) {
>> > -                    g_clear_pointer((gpointer *)&inner_field, g_free);
>> > +                if (load_field) {
>> > +                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
>> >                  }
>> >  
>> >                  if (ok) {
>>

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Peter Xu 2 weeks, 3 days ago

On Fri, Mar 20, 2026 at 10:03:54AM -0300, Fabiano Rosas wrote:
> Peter Xu <peterx@redhat.com> writes:
> 
> > On Thu, Mar 19, 2026 at 05:56:52PM -0300, Fabiano Rosas wrote:
> >> Peter Xu <peterx@redhat.com> writes:
> >> 
> >> > The loader side of ptr marker is pretty straightforward, instead of playing
> >> > the inner_field trick, just do the load manually assuming the marker layout
> >> > is a stable ABI (which it is true already).
> >> >
> >> > This will remove some logic while loading VMSD, and hopefully it makes it
> >> > slightly easier to read.  Unfortunately, we still need to keep the sender
> >> > side because of the JSON blob we're maintaining..
> >> >
> >> 
> >> /ramble
> >> Is the JSON blob ABI? Can we kill it?
> >
> > Yeah, that's a fair ramble.  I had sometimes the same feeling that I wished
> > it not be there, it'll save quite some work..
> >
> > Per my limited understanding of reading the code in the past, the plan 10
> > years ago was having those blob to always be together with the binary
> > stream so that the stream (especially when causing loading failures..) will
> > be parsable and it's easier to know why the load fail.
> >
> 
> This would only work for a file migration or snapshots. The live
> migration wouldn't have received the blob at the point it fails I
> think. That's ok, and it's not a bad idea, but I think we're not making
> good use of it currently.

Now I do start to question more if it's a good idea to put it into a live
stream, after I read both your reply and Alex's.

I think I might have over-thought it to be "working for any version of
QEMU".  Maybe nobody (or at least less than I was expecting yesterday) that
really needs this functionality.  Likely in 99.9999% cases, we always know
the version of QEMU binary together with the stream.

And when put into the live stream... I want to discuss the possibility of
removing it at least there.  Destination read it and drop it right now: it
not only makes save() slower, and load() slower too..  not too much, just
not necessary.

> 
> > However I also confess in the past few years since I worked on migration, I
> > never used it to debug any real VMSD issues..
> >
> 
> I actually do use it a few times a year, pretty much to diff the stream
> between two different QEMU versions, like Alexander mentioned. From the
> recent mips bug:
> 
> (echo "migrate file:migfile-11"; echo "quit") | ./build/qemu-system-mips64 -monitor stdio
> (echo "migrate file:migfile-10"; echo "quit") | ./build-10.2/qemu-system-mips64 -monitor stdio
> diff -y <(./scripts/analyze-migration.py -f migfile-10)
> <(./scripts/analyze-migration.py -f migfile-11) | less
> 
> As you can see, usability is weird. A QEMU command that dumps a JSON
> already formatted would be a good improvement. -dump-vmstate does it,
> but there it's the dump of all vmstates like they were registered, not
> what's actually migrated and of course it doesn't contain the data.

This is a point almost to say it would be nice we keep this (or some
similar function that do more than -dump-vmstate) to be around.

> 
> > I think one reason might be that when migration was very new and when
> > device emulation developers were easier to mess things up, crash happen
> > more, and at that time this tool helps debugging things.  It'll also almost
> > make the VM dump self-explanatory.
> >
> > Nowadays, we added more codes into migration to help, e.g., I recall we
> > added things like QEMU_VM_SECTION_FOOTER and likely more after the vmdesc,
> > which can also help to identify VMSD issues directly from dest QEMU errors.
> >
> > Let me copy some more people to see if there's any thoughts or inputs on
> > the JSON blobs..
> >
> >> AFAIR, it only serves to add
> >> complexity and to break analyze-script from time to time. We'd be better
> >> off parsing the actual stream from a file. Could maybe even use the same
> >> loadvm code, but mock the .get functions to print instead of actually
> >> loading.
> >
> > The problem might be that direct parsing of the binary stream is almost
> > impossible when without the json blob, due to the fact we have a very
> > condensed VMSD fields in the stream (we have zero headers for fields..), so
> > they're very efficient but compat, I think we won't be able to parse an
> > image if we don't know which version of QEMU it was generated from
> > otherwise.
> >
> 
> I was thinking of using the same QEMU version that produced the binary
> stream indeed. But yeah, it's probably not very useful.

I revoke my demand on "it needs to work on unknown QEMU version" now.

> 
> Now, if QEMU can load the binary stream, it could dry-load it
> somehow. We currently have the vmstate-walking code tightly coupled with
> the actual loading of the data, I can see a scenario where these things
> are separated and we can decided what operation to perform on a walk.

Am I the only one who thinks this might be a very good idea? :)

> 
> Would walking the vmstates and generating the JSON beforehand be useful?
> We could then move it to the start of the binary stream, could have a
> QEMU command that dumps it, could even use that set of data structures
> in JSON format to then guide a second walk which either calls the
> devices VMStateInfo or prints the data to a file for debugging. Thinking
> out loud here...

I think it won't work: the migration stream has dependency on device states
(including ram).  It may dump different things when done at the beginning
v.s. when done at switchover, because the VM is running and device states
can change.

Consider one .needed() of a VMSD field that report different things from
time to time (say, the guest driver enabled some device feature during
migration running).

> 
> > But again, I'm not sure we strictly need this in each VM image we send,
> > especially right now on dest QEMU we explicitly allocates that buffer, load
> > this vmdesc dump and then drop it.. There's also that postcopy won't dump
> > this (pretty much because we know it's an active QEMU instance so JSON blob
> > may not really help..), so it's just a bit weird to not be able to see how
> > it helps besides debugging a "migrate to file" case with unknown QEMU
> > versions, for example.  When I know the version of QEMU on src side when
> > something wrong happened (normally we do..), it may not be very needed.
> >
> 
> Well, putting it on the binary stream for TCP migration might be useless
> because we never do anything with it. And it's usually migrating live,
> so we don't even get to it if anything fails. Putting it on the .qcow2
> file is also useless because there's no tooling to inspect the binary
> stream, although we could adapt analyze-migration to take an offset and
> read from the middle of the file. And hexdump'ing it is not super hard
> as well, but it's less likely anyone will do it.
> 
> Even the migrate to file approach is frail because depending on the
> changes between the QEMU that produced the stream and the one we took
> analyze-migration from, the script might not be able to parse the JSON
> at all. Such as with this series.

I agree. So I think we can discuss two separate things:

(1) removal of JSON blob in TCP live streaming

(2) removal of JSON blob in file migration, and replace it with the vmsd
    walker idea you proposed above

One thing funny about (1) is, since (please anyone correct me otherwise..)
all QEMU binaries in the past expect QEMU_VM_VMDESCRIPTION to be present,
but drop the data all the time, we may even consider breaking the ABI
immediately, by sending some fake strings inside without generating the
real JSON..  Old QEMUs migration shouldn't be broken because they'll just
read less and drop less.

I had a feeling (2) should be able to fully replace the current analyze
script, meanwhile we can remove the script completely, and meanwhile remove
all the tricks we have done with the json writer like commit 9867c3a7ced,
35049eb0d2fc, etc., which affects production code paths very much...

> 
> >> 
> >> Having separate code that parses a "thing" that's not the stream, but
> >> that should reflect the stream, but it's not the stream, but pretty
> >> close it's a bit weird to me. I recently had to simply open the raw
> >> stream on emacs and navigate through it because the file: stream was
> >> somehow different from the stream on the qcow2, for the same command
> >> line.
> >
> > Do you mean snapshot-save v.s. live migrate to file URIs when all caps off?
> >
> 
> I was trying to ensure migration to file was producing the same binary
> stream as snapshot-save. For a same command line, the migration was
> succeeding, but the loadm was failing. So I resorted to opening the
> binaries and searching for the QEVM marker.

OK. I hope I'm right we're all moving towards the file: URIs and maybe some
day we can obsolete the old snapshots.

> 
> >> 
> >> (that's another point, parsing from the qcow2 would be cool, which the
> >> JSON blob doesn't provide today I think)
> >> ramble/
> >> 
> >> > This paves way for future processing of non-NULL markers as well.
> >> >
> >> > Signed-off-by: Peter Xu <peterx@redhat.com>
> >> > ---
> >> >  migration/vmstate-types.c | 12 ++++--------
> >> >  migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
> >> >  2 files changed, 29 insertions(+), 23 deletions(-)
> >> >
> >> > diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> >> > index b31689fc3c..ae465c5c2c 100644
> >> > --- a/migration/vmstate-types.c
> >> > +++ b/migration/vmstate-types.c
> >> > @@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
> >> >                              const VMStateField *field, Error **errp)
> >> >  
> >> >  {
> >> > -    int byte = qemu_get_byte(f);
> >> > -
> >> > -    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
> >> > -        /* TODO: process PTR_VALID case */
> >> > -        return true;
> >> > -    }
> >> > -
> >> > -    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
> >> > +    /*
> >> > +     * Load is done in vmstate core, see vmstate_ptr_marker_load().
> >> > +     */
> >> > +    g_assert_not_reached();
> >> >      return false;
> >> >  }
> >> >  
> >> > diff --git a/migration/vmstate.c b/migration/vmstate.c
> >> > index a3a5f25946..d65fc84dfa 100644
> >> > --- a/migration/vmstate.c
> >> > +++ b/migration/vmstate.c
> >> > @@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
> >> >      }
> >> >  }
> >> >  
> >> > +static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
> >> > +                                    Error **errp)
> >> > +{
> >> > +    int byte = qemu_get_byte(f);
> >> > +
> >> > +    if (byte == VMS_MARKER_PTR_NULL) {
> >> > +        /* When it's a null ptr marker, do not continue the load */
> >> > +        *load_field = false;
> >> > +        return true;
> >> > +    }
> >> > +
> >> > +    error_setg(errp, "Unexpected ptr marker: %d", byte);
> >> > +    return false;
> >> > +}
> >> > +
> >> >  static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
> >> >                               Error **errp)
> >> >  {
> >> > @@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
> >> >              }
> >> >  
> >> >              for (i = 0; i < n_elems; i++) {
> >> > -                bool ok;
> >> > +                /* If we will process the load of field? */
> >> > +                bool load_field = true;
> >> 
> >> maybe valid_ptr would be more clear?
> >
> > I don't think we can?  Note, that we will only update this field iff the
> > field used the new VMS_ARRAY_OF_POINTER_AUTO_ALLOC, otherwise it simply
> > should be true always and it says "let's load this VMSD".  IOW, in most
> > cases there's no ptr, hence "valid_ptr=true" will be weird on its own I
> > think..
> >
> 
> Ah I see. I was looking at the save side and thinking we can maybe
> simplify the inner_field logic a bit, but it doesn't apply here. I'll
> try to write a patch on top of your series with my ideas.

Sure, I'll read it when it comes.  Or if you think I'm adding code will be
removed soon, just shoot and we discuss to see if we can avoid adding it in
the first place.

> 
> >> 
> >> > +                bool ok = true;
> >> >                  void *curr_elem = first_elem + size * i;
> >> > -                const VMStateField *inner_field;
> >> >  
> >> >                  if (field->flags & VMS_ARRAY_OF_POINTER) {
> >> >                      curr_elem = *(void **)curr_elem;
> >> >                  }
> >> >  
> >> >                  if (!curr_elem && size) {
> >> > -                    /*
> >> > -                     * If null pointer found (which should only happen in
> >> > -                     * an array of pointers), use null placeholder and do
> >> > -                     * not follow.
> >> > -                     */
> >> > -                    inner_field = vmsd_create_ptr_marker_field(field);
> >> > -                } else {
> >> > -                    inner_field = field;
> >> > +                    /* Read the marker instead of VMSD itself */
> >> > +                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
> >> > +                        trace_vmstate_load_field_error(field->name, -EINVAL);
> >> > +                        return false;
> >> > +                    }
> >> >                  }
> >> >  
> >> > -                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
> >> > -
> >> > -                /* If we used a fake temp field.. free it now */
> >> > -                if (inner_field != field) {
> >> > -                    g_clear_pointer((gpointer *)&inner_field, g_free);
> >> > +                if (load_field) {
> >> > +                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
> >> >                  }
> >> >  
> >> >                  if (ok) {
> >> 
> 

-- 
Peter Xu

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Alexander Graf 2 weeks, 3 days ago

On 19.03.26 22:57, Peter Xu wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> On Thu, Mar 19, 2026 at 05:56:52PM -0300, Fabiano Rosas wrote:
>> Peter Xu <peterx@redhat.com> writes:
>>
>>> The loader side of ptr marker is pretty straightforward, instead of playing
>>> the inner_field trick, just do the load manually assuming the marker layout
>>> is a stable ABI (which it is true already).
>>>
>>> This will remove some logic while loading VMSD, and hopefully it makes it
>>> slightly easier to read.  Unfortunately, we still need to keep the sender
>>> side because of the JSON blob we're maintaining..
>>>
>> /ramble
>> Is the JSON blob ABI? Can we kill it?
> Yeah, that's a fair ramble.  I had sometimes the same feeling that I wished
> it not be there, it'll save quite some work..
>
> Per my limited understanding of reading the code in the past, the plan 10
> years ago was having those blob to always be together with the binary
> stream so that the stream (especially when causing loading failures..) will
> be parsable and it's easier to know why the load fail.
>
> However I also confess in the past few years since I worked on migration, I
> never used it to debug any real VMSD issues..
>
> I think one reason might be that when migration was very new and when
> device emulation developers were easier to mess things up, crash happen
> more, and at that time this tool helps debugging things.  It'll also almost
> make the VM dump self-explanatory.
>
> Nowadays, we added more codes into migration to help, e.g., I recall we
> added things like QEMU_VM_SECTION_FOOTER and likely more after the vmdesc,
> which can also help to identify VMSD issues directly from dest QEMU errors.
>
> Let me copy some more people to see if there's any thoughts or inputs on
> the JSON blobs..


I added and used it for 2 purposes:

1. To make the actual content of the device state more clearly visible. 
It can help to stare at a device state dump to find out what's going wrong.
2. To allow for state diffing. When I introduced it, I would do a 
migration to file, run my test, do another migration and then diff the 
vmstates. That way I could easily see what actually changed between the 
runs. It helped me identify a few CPU register state issues.

I don't believe I ever ended up using it to debug live migration issues :).


Alex




Amazon Web Services Development Center Germany GmbH
Tamara-Danz-Str. 13
10243 Berlin
Geschaeftsfuehrung: Christof Hellmis, Andreas Stieger
Eingetragen am Amtsgericht Charlottenburg unter HRB 257764 B
Sitz: Berlin
Ust-ID: DE 365 538 597

Re: [PATCH RFC 08/10] vmstate: Implement load of ptr marker in vmstate core

Posted by Alexander Mikhalitsyn 2 weeks, 5 days ago

Am Mi., 18. März 2026 um 00:23 Uhr schrieb Peter Xu <peterx@redhat.com>:
>
> The loader side of ptr marker is pretty straightforward, instead of playing
> the inner_field trick, just do the load manually assuming the marker layout
> is a stable ABI (which it is true already).
>
> This will remove some logic while loading VMSD, and hopefully it makes it
> slightly easier to read.  Unfortunately, we still need to keep the sender
> side because of the JSON blob we're maintaining..
>
> This paves way for future processing of non-NULL markers as well.
>
> Signed-off-by: Peter Xu <peterx@redhat.com>

Reviewed-by: Alexander Mikhalitsyn <aleksandr.mikhalitsyn@futurfusion.io>

> ---
>  migration/vmstate-types.c | 12 ++++--------
>  migration/vmstate.c       | 40 ++++++++++++++++++++++++---------------
>  2 files changed, 29 insertions(+), 23 deletions(-)
>
> diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
> index b31689fc3c..ae465c5c2c 100644
> --- a/migration/vmstate-types.c
> +++ b/migration/vmstate-types.c
> @@ -363,14 +363,10 @@ static bool load_ptr_marker(QEMUFile *f, void *pv, size_t size,
>                              const VMStateField *field, Error **errp)
>
>  {
> -    int byte = qemu_get_byte(f);
> -
> -    if (byte == VMS_MARKER_PTR_NULL || byte == VMS_MARKER_PTR_VALID) {
> -        /* TODO: process PTR_VALID case */
> -        return true;
> -    }
> -
> -    error_setg(errp, "%s: unexpected ptr marker: %d", __func__, byte);
> +    /*
> +     * Load is done in vmstate core, see vmstate_ptr_marker_load().
> +     */
> +    g_assert_not_reached();
>      return false;
>  }
>
> diff --git a/migration/vmstate.c b/migration/vmstate.c
> index a3a5f25946..d65fc84dfa 100644
> --- a/migration/vmstate.c
> +++ b/migration/vmstate.c
> @@ -142,6 +142,21 @@ static void vmstate_handle_alloc(void *ptr, const VMStateField *field,
>      }
>  }
>
> +static bool vmstate_ptr_marker_load(QEMUFile *f, bool *load_field,
> +                                    Error **errp)
> +{
> +    int byte = qemu_get_byte(f);
> +
> +    if (byte == VMS_MARKER_PTR_NULL) {
> +        /* When it's a null ptr marker, do not continue the load */
> +        *load_field = false;
> +        return true;
> +    }
> +
> +    error_setg(errp, "Unexpected ptr marker: %d", byte);
> +    return false;
> +}
> +
>  static bool vmstate_pre_load(const VMStateDescription *vmsd, void *opaque,
>                               Error **errp)
>  {
> @@ -264,30 +279,25 @@ bool vmstate_load_vmsd(QEMUFile *f, const VMStateDescription *vmsd,
>              }
>
>              for (i = 0; i < n_elems; i++) {
> -                bool ok;
> +                /* If we will process the load of field? */
> +                bool load_field = true;
> +                bool ok = true;
>                  void *curr_elem = first_elem + size * i;
> -                const VMStateField *inner_field;
>
>                  if (field->flags & VMS_ARRAY_OF_POINTER) {
>                      curr_elem = *(void **)curr_elem;
>                  }
>
>                  if (!curr_elem && size) {
> -                    /*
> -                     * If null pointer found (which should only happen in
> -                     * an array of pointers), use null placeholder and do
> -                     * not follow.
> -                     */
> -                    inner_field = vmsd_create_ptr_marker_field(field);
> -                } else {
> -                    inner_field = field;
> +                    /* Read the marker instead of VMSD itself */
> +                    if (!vmstate_ptr_marker_load(f, &load_field, errp)) {
> +                        trace_vmstate_load_field_error(field->name, -EINVAL);
> +                        return false;
> +                    }
>                  }
>
> -                ok = vmstate_load_field(f, curr_elem, size, inner_field, errp);
> -
> -                /* If we used a fake temp field.. free it now */
> -                if (inner_field != field) {
> -                    g_clear_pointer((gpointer *)&inner_field, g_free);
> +                if (load_field) {
> +                    ok = vmstate_load_field(f, curr_elem, size, field, errp);
>                  }
>
>                  if (ok) {
> --
> 2.50.1
>