[PATCH 07/13] migration: Pass in bql_held information from qemu_loadvm_state()

Peter Xu posted 13 patches 3 weeks, 1 day ago
Maintainers: Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Hailiang Zhang <zhanghailiang@xfusion.com>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, Li Zhijian <lizhijian@fujitsu.com>
[PATCH 07/13] migration: Pass in bql_held information from qemu_loadvm_state()
Posted by Peter Xu 3 weeks, 1 day ago
Teach qemu_loadvm_state() and some of the internal functions to know
whether we're holding BQL or not.

So far, all the callers still always pass in TRUE, hence no functional
change expected.  But it may change in the near future.

To reviewers: even if this is not functional change yet, it'll be the major
core functional change after we switch to threadified loadvm soon.  Please
Treat it as one to add explicit code to mark out which part of incoming
live migration would need to be executed always with the BQL, or would need
to be run always without BQL.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 migration/savevm.h    |  4 +--
 migration/colo.c      |  2 +-
 migration/migration.c |  2 +-
 migration/savevm.c    | 72 ++++++++++++++++++++++++++++++-------------
 4 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/migration/savevm.h b/migration/savevm.h
index c337e3e3d1..a04dee4166 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -64,10 +64,10 @@ void qemu_savevm_send_colo_enable(QEMUFile *f);
 void qemu_savevm_live_state(QEMUFile *f);
 int qemu_save_device_state(QEMUFile *f);
 
-int qemu_loadvm_state(QEMUFile *f, Error **errp);
+int qemu_loadvm_state(QEMUFile *f, bool bql_held, Error **errp);
 void qemu_loadvm_state_cleanup(MigrationIncomingState *mis);
 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
-                           Error **errp);
+                           bool bql_held, Error **errp);
 int qemu_load_device_state(QEMUFile *f, Error **errp);
 int qemu_loadvm_approve_switchover(void);
 int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
diff --git a/migration/colo.c b/migration/colo.c
index db783f6fa7..4fd586951a 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -686,7 +686,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
 
     bql_lock();
     cpu_synchronize_all_states();
-    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
+    ret = qemu_loadvm_state_main(mis->from_src_file, mis, true, errp);
     bql_unlock();
 
     if (ret < 0) {
diff --git a/migration/migration.c b/migration/migration.c
index 4ed2a2e881..38a584afae 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -878,7 +878,7 @@ process_incoming_migration_co(void *opaque)
                       MIGRATION_STATUS_ACTIVE);
 
     mis->loadvm_co = qemu_coroutine_self();
-    ret = qemu_loadvm_state(mis->from_src_file, &local_err);
+    ret = qemu_loadvm_state(mis->from_src_file, true, &local_err);
     mis->loadvm_co = NULL;
 
     trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
diff --git a/migration/savevm.c b/migration/savevm.c
index 232cae090b..44aadc2f51 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -154,11 +154,12 @@ static void qemu_loadvm_thread_pool_destroy(MigrationIncomingState *mis)
 }
 
 static bool qemu_loadvm_thread_pool_wait(MigrationState *s,
-                                         MigrationIncomingState *mis)
+                                         MigrationIncomingState *mis,
+                                         bool bql_held)
 {
-    bql_unlock(); /* Let load threads do work requiring BQL */
-    thread_pool_wait(mis->load_threads);
-    bql_lock();
+    WITH_BQL_RELEASED(bql_held) {
+        thread_pool_wait(mis->load_threads);
+    }
 
     return !migrate_has_error(s);
 }
@@ -2117,7 +2118,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
     qemu_file_set_blocking(f, true, &error_fatal);
 
     /* TODO: sanity check that only postcopiable data will be loaded here */
-    load_res = qemu_loadvm_state_main(f, mis, &local_err);
+    load_res = qemu_loadvm_state_main(f, mis, true, &local_err);
 
     /*
      * This is tricky, but, mis->from_src_file can change after it
@@ -2420,7 +2421,8 @@ static void loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
  * Returns: Negative values on error
  *
  */
-static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis, Error **errp)
+static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis,
+                                      bool bql_held, Error **errp)
 {
     int ret;
     size_t length;
@@ -2471,7 +2473,7 @@ static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis, Error **errp)
         qemu_coroutine_yield();
     } while (1);
 
-    ret = qemu_loadvm_state_main(packf, mis, errp);
+    ret = qemu_loadvm_state_main(packf, mis, bql_held, errp);
     trace_loadvm_handle_cmd_packaged_main(ret);
     qemu_fclose(packf);
     object_unref(OBJECT(bioc));
@@ -2571,7 +2573,7 @@ static int loadvm_postcopy_handle_switchover_start(Error **errp)
  * LOADVM_QUIT All good, but exit the loop
  * <0          Error
  */
-static int loadvm_process_command(QEMUFile *f, Error **errp)
+static int loadvm_process_command(QEMUFile *f, bool bql_held, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     uint16_t cmd;
@@ -2641,7 +2643,8 @@ static int loadvm_process_command(QEMUFile *f, Error **errp)
         break;
 
     case MIG_CMD_PACKAGED:
-        return loadvm_handle_cmd_packaged(mis, errp);
+        /* PACKAGED may have bql dependency internally */
+        return loadvm_handle_cmd_packaged(mis, bql_held, errp);
 
     case MIG_CMD_POSTCOPY_ADVISE:
         return loadvm_postcopy_handle_advise(mis, len, errp);
@@ -2666,7 +2669,11 @@ static int loadvm_process_command(QEMUFile *f, Error **errp)
         return loadvm_process_enable_colo(mis, errp);
 
     case MIG_CMD_SWITCHOVER_START:
-        return loadvm_postcopy_handle_switchover_start(errp);
+        WITH_BQL_HELD(bql_held) {
+            /* TODO: drop the BQL dependency */
+            ret = loadvm_postcopy_handle_switchover_start(errp);
+        }
+        return ret;
     }
 
     return 0;
@@ -2882,6 +2889,10 @@ static int qemu_loadvm_state_header(QEMUFile *f, Error **errp)
             return -EINVAL;
         }
 
+        /*
+         * NOTE: this can be invoked with/without BQL.  It's safe because
+         * vmstate_configuration (and its hooks) doesn't rely on BQL status.
+         */
         ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0,
                                  errp);
         if (ret) {
@@ -3072,7 +3083,7 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
 }
 
 int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis,
-                           Error **errp)
+                           bool bql_held, Error **errp)
 {
     ERRP_GUARD();
     uint8_t section_type;
@@ -3094,20 +3105,33 @@ retry:
         switch (section_type) {
         case QEMU_VM_SECTION_START:
         case QEMU_VM_SECTION_FULL:
-            ret = qemu_loadvm_section_start_full(f, section_type, errp);
+            /*
+             * FULL should normally require BQL, e.g. during post_load()
+             * there can be memory region updates.  START may or may not
+             * require it, but just to keep it simple to always hold BQL
+             * for now.
+             */
+            WITH_BQL_HELD(bql_held) {
+                ret = qemu_loadvm_section_start_full(f, section_type, errp);
+            }
             if (ret < 0) {
                 goto out;
             }
             break;
         case QEMU_VM_SECTION_PART:
         case QEMU_VM_SECTION_END:
+            /* PART / END may be loaded without BQL */
             ret = qemu_loadvm_section_part_end(f, section_type, errp);
             if (ret < 0) {
                 goto out;
             }
             break;
         case QEMU_VM_COMMAND:
-            ret = loadvm_process_command(f, errp);
+            /*
+             * Be careful; QEMU_VM_COMMAND can embed FULL sections, so it
+             * may internally need BQL.
+             */
+            ret = loadvm_process_command(f, bql_held, errp);
             trace_qemu_loadvm_state_section_command(ret);
             if ((ret < 0) || (ret == LOADVM_QUIT)) {
                 goto out;
@@ -3152,7 +3176,7 @@ out:
     return ret;
 }
 
-int qemu_loadvm_state(QEMUFile *f, Error **errp)
+int qemu_loadvm_state(QEMUFile *f, bool bql_held, Error **errp)
 {
     MigrationState *s = migrate_get_current();
     MigrationIncomingState *mis = migration_incoming_get_current();
@@ -3177,9 +3201,12 @@ int qemu_loadvm_state(QEMUFile *f, Error **errp)
         qemu_loadvm_state_switchover_ack_needed(mis);
     }
 
-    cpu_synchronize_all_pre_loadvm();
+    /* run_on_cpu() requires BQL */
+    WITH_BQL_HELD(bql_held) {
+        cpu_synchronize_all_pre_loadvm();
+    }
 
-    ret = qemu_loadvm_state_main(f, mis, errp);
+    ret = qemu_loadvm_state_main(f, mis, bql_held, errp);
     qemu_event_set(&mis->main_thread_load_event);
 
     trace_qemu_loadvm_state_post_main(ret);
@@ -3195,7 +3222,7 @@ int qemu_loadvm_state(QEMUFile *f, Error **errp)
     /* When reaching here, it must be precopy */
     if (ret == 0) {
         if (migrate_has_error(migrate_get_current()) ||
-            !qemu_loadvm_thread_pool_wait(s, mis)) {
+            !qemu_loadvm_thread_pool_wait(s, mis, bql_held)) {
             ret = -EINVAL;
             error_setg(errp,
                        "Error while loading vmstate");
@@ -3249,7 +3276,10 @@ int qemu_loadvm_state(QEMUFile *f, Error **errp)
         }
     }
 
-    cpu_synchronize_all_post_init();
+    /* run_on_cpu() requires BQL */
+    WITH_BQL_HELD(bql_held) {
+        cpu_synchronize_all_post_init();
+    }
 
     return ret;
 }
@@ -3260,7 +3290,7 @@ int qemu_load_device_state(QEMUFile *f, Error **errp)
     int ret;
 
     /* Load QEMU_VM_SECTION_FULL section */
-    ret = qemu_loadvm_state_main(f, mis, errp);
+    ret = qemu_loadvm_state_main(f, mis, true, errp);
     if (ret < 0) {
         return ret;
     }
@@ -3495,7 +3525,7 @@ void qmp_xen_load_devices_state(const char *filename, Error **errp)
     f = qemu_file_new_input(QIO_CHANNEL(ioc));
     object_unref(OBJECT(ioc));
 
-    ret = qemu_loadvm_state(f, errp);
+    ret = qemu_loadvm_state(f, true, errp);
     qemu_fclose(f);
     if (ret < 0) {
         error_prepend(errp, "loading Xen device state failed: ");
@@ -3573,7 +3603,7 @@ bool load_snapshot(const char *name, const char *vmstate,
         ret = -EINVAL;
         goto err_drain;
     }
-    ret = qemu_loadvm_state(f, errp);
+    ret = qemu_loadvm_state(f, true, errp);
     migration_incoming_state_destroy();
 
     bdrv_drain_all_end();
-- 
2.50.1
Re: [PATCH 07/13] migration: Pass in bql_held information from qemu_loadvm_state()
Posted by Vladimir Sementsov-Ogievskiy 2 weeks, 3 days ago
On 22.10.25 22:26, Peter Xu wrote:
> Teach qemu_loadvm_state() and some of the internal functions to know
> whether we're holding BQL or not.

Actually, this commit does more: not only pass the bql_held information,
but also by introduce some WITH_BQL_HELD() sections.

IMHO it could be split:

1. only add bql_held parameters, which are used only to passthorough to
called functions. That's just to simplify further commits. Make the
information available. At this commit, we only need to check, that
passed information is correct (is it really held, may be add some
comments/assertions to make it obvious)

2. one or more commits, which prepares different functions to be called
in thread: support bql_held=false by introducing WITH_BQL_HELD() sections.
In such commit, we should lookthorgh the whole function and check that it
actually prepared to be called from thread.


Hmm, or without [1.], there may be several commits to prepare different
functions. Or maybe, even one commit as it is, but change commit subject
and message somehow, to reflect all the changes..

> 
> So far, all the callers still always pass in TRUE, hence no functional
> change expected.  But it may change in the near future.
> 
> To reviewers: even if this is not functional change yet, it'll be the major
> core functional change after we switch to threadified loadvm soon.  Please
> Treat it as one to add explicit code to mark out which part of incoming
> live migration would need to be executed always with the BQL, or would need
> to be run always without BQL.
> 
> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---

[..]

> diff --git a/migration/colo.c b/migration/colo.c
> index db783f6fa7..4fd586951a 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -686,7 +686,7 @@ static void colo_incoming_process_checkpoint(MigrationIncomingState *mis,
>   
>       bql_lock();
>       cpu_synchronize_all_states();
> -    ret = qemu_loadvm_state_main(mis->from_src_file, mis, errp);
> +    ret = qemu_loadvm_state_main(mis->from_src_file, mis, true, errp);

That one is obvious..

>       bql_unlock();
>   
>       if (ret < 0) {
> diff --git a/migration/migration.c b/migration/migration.c
> index 4ed2a2e881..38a584afae 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -878,7 +878,7 @@ process_incoming_migration_co(void *opaque)
>                         MIGRATION_STATUS_ACTIVE);
>   
>       mis->loadvm_co = qemu_coroutine_self();
> -    ret = qemu_loadvm_state(mis->from_src_file, &local_err);
> +    ret = qemu_loadvm_state(mis->from_src_file, true, &local_err);

Here, why are we sure? Are coroutines triggered by QMP command always run under BQL?

Maybe, worth an assertion.

>       mis->loadvm_co = NULL;
>   
>       trace_vmstate_downtime_checkpoint("dst-precopy-loadvm-completed");
> diff --git a/migration/savevm.c b/migration/savevm.c
> index 232cae090b..44aadc2f51 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -154,11 +154,12 @@ static void qemu_loadvm_thread_pool_destroy(MigrationIncomingState *mis)
>   }
>   
>   static bool qemu_loadvm_thread_pool_wait(MigrationState *s,
> -                                         MigrationIncomingState *mis)
> +                                         MigrationIncomingState *mis,
> +                                         bool bql_held)
>   {
> -    bql_unlock(); /* Let load threads do work requiring BQL */
> -    thread_pool_wait(mis->load_threads);
> -    bql_lock();
> +    WITH_BQL_RELEASED(bql_held) {
> +        thread_pool_wait(mis->load_threads);
> +    }
>   
>       return !migrate_has_error(s);

The function is now prepared to be called from thread, as migrate_has_error() has own mutex.

>   }
> @@ -2117,7 +2118,7 @@ static void *postcopy_ram_listen_thread(void *opaque)
>       qemu_file_set_blocking(f, true, &error_fatal);
>   
>       /* TODO: sanity check that only postcopiable data will be loaded here */
> -    load_res = qemu_loadvm_state_main(f, mis, &local_err);
> +    load_res = qemu_loadvm_state_main(f, mis, true, &local_err);

Is it correct? I see, a bit later, postcopy_ram_listen_thread() does

     bql_lock();
     migration_incoming_state_destroy();
     bql_unlock();

so, I assume, that before this, when we call qemu_loadvm_state_main(), BQL is not actually locked?

>   
>       /*
>        * This is tricky, but, mis->from_src_file can change after it
> @@ -2420,7 +2421,8 @@ static void loadvm_postcopy_handle_resume(MigrationIncomingState *mis)
>    * Returns: Negative values on error
>    *
>    */
> -static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis, Error **errp)
> +static int loadvm_handle_cmd_packaged(MigrationIncomingState *mis,
> +                                      bool bql_held, Error **errp)
>   {
>       int ret;

[..]

-- 
Best regards,
Vladimir