[PATCH 08/14] migration: Make qemu_savevm_query_pending() available anytime

Peter Xu posted 14 patches 3 days ago
Maintainers: Pierrick Bouvier <pierrick.bouvier@linaro.org>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Jason Herne <jjherne@linux.ibm.com>, Richard Henderson <richard.henderson@linaro.org>, Ilya Leoshkevich <iii@linux.ibm.com>, David Hildenbrand <david@kernel.org>, Cornelia Huck <cohuck@redhat.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Eric Blake <eblake@redhat.com>, Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>, John Snow <jsnow@redhat.com>, Markus Armbruster <armbru@redhat.com>
[PATCH 08/14] migration: Make qemu_savevm_query_pending() available anytime
Posted by Peter Xu 3 days ago
After qemu_savevm_query_pending() be exposed to more code paths, it can be
used at very early stage when migration started and this may expose some
race conditions that we don't use to have.  This patch make it prepared
for such use cases so this API is fine to be used almost anytime.

What matters here is, querying pending for each module normally depends on
save_setup() being run first, otherwise modules may not be ready for the
query request.

Consider an early cancellation of migration after SETUP status but before
invocations of save_setup() hooks, source QEMU may fall into CANCELLING
stage directly from SETUP (not ACTIVE, which is the normal use case), in
which case save_setup() may not have been invoked and modules are not
ready.  However qemu_savevm_query_pending() may still be used in QMP
commands like query-migrate and causing crashes.

Guard such use case by introducing a boolean reflecting the availability of
vmstate save handlers on correct completions of save_setup()s.  So far,
only protect qemu_savevm_query_pending() with it.  Logically other hooks
face similar concern, but most of them shouldn't be reachable from random
code path except migration thread so it should be fine.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/migration.h |  8 ++++++++
 migration/savevm.h    |  2 +-
 migration/migration.c |  2 +-
 migration/savevm.c    | 37 +++++++++++++++++++++++++++++++++----
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/migration/migration.h b/migration/migration.h
index b6888daced..e504df6915 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -522,6 +522,14 @@ struct MigrationState {
      * anything as input.
      */
     bool has_block_bitmap_mapping;
+
+    /*
+     * This boolean reflects if the vmstate handlers have been properly
+     * setup on source side.  It is set after vmstate save_setup() hooks
+     * are successfully invoked, and cleared after save_cleanup()s.  It
+     * reflects a general availability of vmstate hooks on the source side.
+     */
+    bool save_setup_ready;
 };
 
 void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
diff --git a/migration/savevm.h b/migration/savevm.h
index 96fdf96d4e..04ed09cec2 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -42,7 +42,7 @@ int qemu_savevm_state_resume_prepare(MigrationState *s);
 void qemu_savevm_send_header(QEMUFile *f);
 void qemu_savevm_state_header(QEMUFile *f);
 int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
-void qemu_savevm_state_cleanup(void);
+void qemu_savevm_state_cleanup(MigrationState *s);
 void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(MigrationState *s);
 void qemu_savevm_query_pending(MigPendingData *pending, bool exact);
diff --git a/migration/migration.c b/migration/migration.c
index bb17bd0e68..a9ee3360e1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1283,7 +1283,7 @@ static void migration_cleanup(MigrationState *s)
     g_free(s->hostname);
     s->hostname = NULL;
 
-    qemu_savevm_state_cleanup();
+    qemu_savevm_state_cleanup(s);
     cpr_state_close();
     cpr_transfer_source_destroy(s);
 
diff --git a/migration/savevm.c b/migration/savevm.c
index b75c311a95..1d3fce45b9 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1387,7 +1387,8 @@ int qemu_savevm_state_non_iterable_early(QEMUFile *f,
     return 0;
 }
 
-static int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
+static int qemu_savevm_state_setup(MigrationState *s, QEMUFile *f,
+                                   Error **errp)
 {
     SaveStateEntry *se;
     int ret;
@@ -1409,6 +1410,13 @@ static int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
         }
     }
 
+    /*
+     * Logically, it should be paired with any hook being used who needs to
+     * load_acquire() the flag first.  So far, only save_query_pending()
+     * uses it.
+     */
+    qatomic_store_release(&s->save_setup_ready, true);
+
     return 0;
 }
 
@@ -1429,7 +1437,7 @@ int qemu_savevm_state_do_setup(QEMUFile *f, Error **errp)
         return ret;
     }
 
-    ret = qemu_savevm_state_setup(f, errp);
+    ret = qemu_savevm_state_setup(ms, f, errp);
     if (ret) {
         return ret;
     }
@@ -1764,10 +1772,23 @@ int qemu_savevm_state_complete_precopy(MigrationState *s)
 
 void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
 {
+    MigrationState *s = migrate_get_current();
     SaveStateEntry *se;
 
     memset(pending, 0, sizeof(*pending));
 
+    /*
+     * This API can be invoked very early before SETUP is properly done, in
+     * that case don't invoke module queries because they're not ready.
+     * Just report all zeros.
+     *
+     * This is paired with save_setup_ready updates on save_setup() and
+     * save_cleanup().
+     */
+    if (!s || !qatomic_load_acquire(&s->save_setup_ready)) {
+        return;
+    }
+
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (!se->ops || !se->ops->save_query_pending) {
             continue;
@@ -1786,7 +1807,7 @@ void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
                                     pending->postcopy_bytes);
 }
 
-void qemu_savevm_state_cleanup(void)
+void qemu_savevm_state_cleanup(MigrationState *s)
 {
     SaveStateEntry *se;
     Error *local_err = NULL;
@@ -1795,6 +1816,14 @@ void qemu_savevm_state_cleanup(void)
         error_report_err(local_err);
     }
 
+    s->save_setup_ready = false;
+    /*
+     * Make sure we clear the flag before invoking save_cleanup(), so any
+     * racy QMP query-migrate won't try to invoke any save hooks.  Just use
+     * an explicit barrier to be simple.
+     */
+    smp_mb();
+
     trace_savevm_state_cleanup();
     QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
         if (se->ops && se->ops->save_cleanup) {
@@ -1841,7 +1870,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
         error_setg_errno(errp, -ret, "Error while writing VM state");
     }
 cleanup:
-    qemu_savevm_state_cleanup();
+    qemu_savevm_state_cleanup(ms);
 
     if (ret != 0) {
         status = MIGRATION_STATUS_FAILED;
-- 
2.53.0
Re: [PATCH 08/14] migration: Make qemu_savevm_query_pending() available anytime
Posted by Juraj Marcin 1 day, 23 hours ago
On 2026-04-08 12:55, Peter Xu wrote:
> After qemu_savevm_query_pending() be exposed to more code paths, it can be
> used at very early stage when migration started and this may expose some
> race conditions that we don't use to have.  This patch make it prepared
> for such use cases so this API is fine to be used almost anytime.
> 
> What matters here is, querying pending for each module normally depends on
> save_setup() being run first, otherwise modules may not be ready for the
> query request.
> 
> Consider an early cancellation of migration after SETUP status but before
> invocations of save_setup() hooks, source QEMU may fall into CANCELLING
> stage directly from SETUP (not ACTIVE, which is the normal use case), in
> which case save_setup() may not have been invoked and modules are not
> ready.  However qemu_savevm_query_pending() may still be used in QMP
> commands like query-migrate and causing crashes.
> 
> Guard such use case by introducing a boolean reflecting the availability of
> vmstate save handlers on correct completions of save_setup()s.  So far,
> only protect qemu_savevm_query_pending() with it.  Logically other hooks
> face similar concern, but most of them shouldn't be reachable from random
> code path except migration thread so it should be fine.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>  migration/migration.h |  8 ++++++++
>  migration/savevm.h    |  2 +-
>  migration/migration.c |  2 +-
>  migration/savevm.c    | 37 +++++++++++++++++++++++++++++++++----
>  4 files changed, 43 insertions(+), 6 deletions(-)
> 
> diff --git a/migration/migration.h b/migration/migration.h
> index b6888daced..e504df6915 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -522,6 +522,14 @@ struct MigrationState {
>       * anything as input.
>       */
>      bool has_block_bitmap_mapping;
> +
> +    /*
> +     * This boolean reflects if the vmstate handlers have been properly
> +     * setup on source side.  It is set after vmstate save_setup() hooks
> +     * are successfully invoked, and cleared after save_cleanup()s.  It
> +     * reflects a general availability of vmstate hooks on the source side.
> +     */
> +    bool save_setup_ready;
>  };
>  
>  void migrate_set_state(MigrationStatus *state, MigrationStatus old_state,
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 96fdf96d4e..04ed09cec2 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -42,7 +42,7 @@ int qemu_savevm_state_resume_prepare(MigrationState *s);
>  void qemu_savevm_send_header(QEMUFile *f);
>  void qemu_savevm_state_header(QEMUFile *f);
>  int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy);
> -void qemu_savevm_state_cleanup(void);
> +void qemu_savevm_state_cleanup(MigrationState *s);
>  void qemu_savevm_state_complete_postcopy(QEMUFile *f);
>  int qemu_savevm_state_complete_precopy(MigrationState *s);
>  void qemu_savevm_query_pending(MigPendingData *pending, bool exact);
> diff --git a/migration/migration.c b/migration/migration.c
> index bb17bd0e68..a9ee3360e1 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -1283,7 +1283,7 @@ static void migration_cleanup(MigrationState *s)
>      g_free(s->hostname);
>      s->hostname = NULL;
>  
> -    qemu_savevm_state_cleanup();
> +    qemu_savevm_state_cleanup(s);
>      cpr_state_close();
>      cpr_transfer_source_destroy(s);
>  
> diff --git a/migration/savevm.c b/migration/savevm.c
> index b75c311a95..1d3fce45b9 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -1387,7 +1387,8 @@ int qemu_savevm_state_non_iterable_early(QEMUFile *f,
>      return 0;
>  }
>  
> -static int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
> +static int qemu_savevm_state_setup(MigrationState *s, QEMUFile *f,
> +                                   Error **errp)
>  {
>      SaveStateEntry *se;
>      int ret;
> @@ -1409,6 +1410,13 @@ static int qemu_savevm_state_setup(QEMUFile *f, Error **errp)
>          }
>      }
>  
> +    /*
> +     * Logically, it should be paired with any hook being used who needs to
> +     * load_acquire() the flag first.  So far, only save_query_pending()
> +     * uses it.
> +     */
> +    qatomic_store_release(&s->save_setup_ready, true);

What other savevm functions would benefit from this? Would it make sense
to include them in this patch/series?

> +
>      return 0;
>  }
>  
> @@ -1429,7 +1437,7 @@ int qemu_savevm_state_do_setup(QEMUFile *f, Error **errp)
>          return ret;
>      }
>  
> -    ret = qemu_savevm_state_setup(f, errp);
> +    ret = qemu_savevm_state_setup(ms, f, errp);
>      if (ret) {
>          return ret;
>      }
> @@ -1764,10 +1772,23 @@ int qemu_savevm_state_complete_precopy(MigrationState *s)
>  
>  void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
>  {
> +    MigrationState *s = migrate_get_current();
>      SaveStateEntry *se;
>  
>      memset(pending, 0, sizeof(*pending));
>  
> +    /*
> +     * This API can be invoked very early before SETUP is properly done, in
> +     * that case don't invoke module queries because they're not ready.
> +     * Just report all zeros.
> +     *
> +     * This is paired with save_setup_ready updates on save_setup() and
> +     * save_cleanup().
> +     */
> +    if (!s || !qatomic_load_acquire(&s->save_setup_ready)) {
> +        return;
> +    }
> +
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (!se->ops || !se->ops->save_query_pending) {
>              continue;
> @@ -1786,7 +1807,7 @@ void qemu_savevm_query_pending(MigPendingData *pending, bool exact)
>                                      pending->postcopy_bytes);
>  }
>  
> -void qemu_savevm_state_cleanup(void)
> +void qemu_savevm_state_cleanup(MigrationState *s)
>  {
>      SaveStateEntry *se;
>      Error *local_err = NULL;
> @@ -1795,6 +1816,14 @@ void qemu_savevm_state_cleanup(void)
>          error_report_err(local_err);
>      }
>  
> +    s->save_setup_ready = false;
> +    /*
> +     * Make sure we clear the flag before invoking save_cleanup(), so any
> +     * racy QMP query-migrate won't try to invoke any save hooks.  Just use
> +     * an explicit barrier to be simple.
> +     */
> +    smp_mb();
> +
>      trace_savevm_state_cleanup();
>      QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
>          if (se->ops && se->ops->save_cleanup) {
> @@ -1841,7 +1870,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp)
>          error_setg_errno(errp, -ret, "Error while writing VM state");
>      }
>  cleanup:
> -    qemu_savevm_state_cleanup();
> +    qemu_savevm_state_cleanup(ms);
>  
>      if (ret != 0) {
>          status = MIGRATION_STATUS_FAILED;
> -- 
> 2.53.0
>