diff mbox series

[v3,8/9] replay: Fix migration replay_mutex locking

Message ID 20240226082945.1452499-9-npiggin@gmail.com
State New
Headers show
Series replay: fixes and new test cases | expand

Commit Message

Nicholas Piggin Feb. 26, 2024, 8:29 a.m. UTC
Migration causes a number of events that need to go in the replay
trace, such as vm state transitions. The replay_mutex lock needs to
be held for these.

The simplest approach seems to be just take it up-front when taking
the bql.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 migration/migration.h |  2 --
 migration/migration.c | 11 ++++++++++-
 2 files changed, 10 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/migration/migration.h b/migration/migration.h
index f2c8b8f286..0621479a4e 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -543,6 +543,4 @@  int migration_rp_wait(MigrationState *s);
  */
 void migration_rp_kick(MigrationState *s);
 
-int migration_stop_vm(RunState state);
-
 #endif
diff --git a/migration/migration.c b/migration/migration.c
index 2e794db75c..80a5ce17d1 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -24,6 +24,7 @@ 
 #include "socket.h"
 #include "sysemu/runstate.h"
 #include "sysemu/sysemu.h"
+#include "sysemu/replay.h"
 #include "sysemu/cpu-throttle.h"
 #include "rdma.h"
 #include "ram.h"
@@ -162,7 +163,7 @@  static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
     return (a > b) - (a < b);
 }
 
-int migration_stop_vm(RunState state)
+static int migration_stop_vm(RunState state)
 {
     int ret = vm_stop_force_state(state);
 
@@ -2433,6 +2434,7 @@  static int postcopy_start(MigrationState *ms, Error **errp)
     }
 
     trace_postcopy_start();
+    replay_mutex_lock();
     bql_lock();
     trace_postcopy_start_set_run();
 
@@ -2542,6 +2544,7 @@  static int postcopy_start(MigrationState *ms, Error **errp)
     migration_downtime_end(ms);
 
     bql_unlock();
+    replay_mutex_unlock();
 
     if (migrate_postcopy_ram()) {
         /*
@@ -2583,6 +2586,7 @@  fail:
         }
     }
     bql_unlock();
+    replay_mutex_unlock();
     return -1;
 }
 
@@ -2634,6 +2638,7 @@  static int migration_completion_precopy(MigrationState *s,
 {
     int ret;
 
+    replay_mutex_lock();
     bql_lock();
     migration_downtime_start(s);
 
@@ -2662,6 +2667,7 @@  static int migration_completion_precopy(MigrationState *s,
                                              s->block_inactive);
 out_unlock:
     bql_unlock();
+    replay_mutex_unlock();
     return ret;
 }
 
@@ -3485,6 +3491,7 @@  static void *bg_migration_thread(void *opaque)
     trace_migration_thread_setup_complete();
     migration_downtime_start(s);
 
+    replay_mutex_lock();
     bql_lock();
 
     s->vm_old_state = runstate_get();
@@ -3522,6 +3529,7 @@  static void *bg_migration_thread(void *opaque)
      */
     migration_bh_schedule(bg_migration_vm_start_bh, s);
     bql_unlock();
+    replay_mutex_unlock();
 
     while (migration_is_active(s)) {
         MigIterateState iter_state = bg_migration_iteration_run(s);
@@ -3551,6 +3559,7 @@  fail:
         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
                 MIGRATION_STATUS_FAILED);
         bql_unlock();
+        replay_mutex_unlock();
     }
 
     bg_migration_iteration_finish(s);