[PATCH v3 8/9] replay: Fix migration replay_mutex locking

Nicholas Piggin posted 9 patches 9 months ago
Maintainers: Richard Henderson <richard.henderson@linaro.org>, Paolo Bonzini <pbonzini@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>, Peter Xu <peterx@redhat.com>, Fabiano Rosas <farosas@suse.de>, John Snow <jsnow@redhat.com>, Cleber Rosa <crosa@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Wainer dos Santos Moschetta <wainersm@redhat.com>, Beraldo Leal <bleal@redhat.com>
There is a newer version of this series
[PATCH v3 8/9] replay: Fix migration replay_mutex locking
Posted by Nicholas Piggin 9 months ago
Migration causes a number of events that need to go in the replay
trace, such as vm state transitions. The replay_mutex lock needs to
be held for these.

The simplest approach seems to be just take it up-front when taking
the bql.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 migration/migration.h |  2 --
 migration/migration.c | 11 ++++++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/migration/migration.h b/migration/migration.h
index f2c8b8f286..0621479a4e 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -543,6 +543,4 @@ int migration_rp_wait(MigrationState *s);
  */
 void migration_rp_kick(MigrationState *s);
 
-int migration_stop_vm(RunState state);
-
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 2e794db75c..80a5ce17d1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@
 #include "socket.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
 #include "sysemu/cpu-throttle.h"
 #include "rdma.h"
 #include "ram.h"
@@ -162,7 +163,7 @@ static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
     return (a > b) - (a < b);
 }
 
-int migration_stop_vm(RunState state)
+static int migration_stop_vm(RunState state)
 {
     int ret = vm_stop_force_state(state);
 
@@ -2433,6 +2434,7 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     }
 
     trace_postcopy_start();
+    replay_mutex_lock();
     bql_lock();
     trace_postcopy_start_set_run();
 
@@ -2542,6 +2544,7 @@ static int postcopy_start(MigrationState *ms, Error **errp)
     migration_downtime_end(ms);
 
     bql_unlock();
+    replay_mutex_unlock();
 
     if (migrate_postcopy_ram()) {
         /*
@@ -2583,6 +2586,7 @@ fail:
         }
     }
     bql_unlock();
+    replay_mutex_unlock();
     return -1;
 }
 
@@ -2634,6 +2638,7 @@ static int migration_completion_precopy(MigrationState *s,
 {
     int ret;
 
+    replay_mutex_lock();
     bql_lock();
     migration_downtime_start(s);
 
@@ -2662,6 +2667,7 @@ static int migration_completion_precopy(MigrationState *s,
                                              s->block_inactive);
 out_unlock:
     bql_unlock();
+    replay_mutex_unlock();
     return ret;
 }
 
@@ -3485,6 +3491,7 @@ static void *bg_migration_thread(void *opaque)
     trace_migration_thread_setup_complete();
     migration_downtime_start(s);
 
+    replay_mutex_lock();
     bql_lock();
 
     s->vm_old_state = runstate_get();
@@ -3522,6 +3529,7 @@ static void *bg_migration_thread(void *opaque)
      */
     migration_bh_schedule(bg_migration_vm_start_bh, s);
     bql_unlock();
+    replay_mutex_unlock();
 
     while (migration_is_active(s)) {
         MigIterateState iter_state = bg_migration_iteration_run(s);
@@ -3551,6 +3559,7 @@ fail:
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                 MIGRATION_STATUS_FAILED);
         bql_unlock();
+        replay_mutex_unlock();
     }
 
     bg_migration_iteration_finish(s);
-- 
2.42.0