diff mbox series

[V6,07/14] migration: preserve suspended for snapshot

Message ID 1701380247-340457-8-git-send-email-steven.sistare@oracle.com
State New
Headers show
Series fix migration of suspended runstate | expand

Commit Message

Steven Sistare Nov. 30, 2023, 9:37 p.m. UTC
Restoring a snapshot can break a suspended guest.  Snapshots suffer from
the same suspended-state issues that affect live migration, plus they must
handle an additional problematic scenario, which is that a running vm must
remain running if it loads a suspended snapshot.  Currently, after loading
such a snapshot, the vm_start fails.  The runstate is RUNNING, but the guest
is not.

To save, the vm_stop call now completely stops the suspended state, courtesy
of a recent patch.  Finish with vm_resume to leave the vm in the state it had
prior to the save, correctly restoring the suspended state.

To load, if the snapshot is not suspended, then vm_stop + vm_resume
correctly handles all states, and leaves the vm in the state it had prior
to the load.  However, if the snapshot is suspended, restoration is
trickier.  First, call vm_resume to restore the state to suspended so the
current state matches the saved state.  Then, if the pre-load state is
running, call wakeup to resume running.

Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
RUN_STATE_RESTORE_VM did not change runstate if the current state was
paused, suspended, or prelaunch, but now it does, so allow these
transitions.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 include/migration/snapshot.h   |  7 +++++++
 migration/migration-hmp-cmds.c |  8 +++++---
 migration/savevm.c             | 23 +++++++++++++----------
 system/runstate.c              |  5 +++++
 system/vl.c                    |  2 ++
 5 files changed, 32 insertions(+), 13 deletions(-)

Comments

Peter Xu Dec. 5, 2023, 9:35 p.m. UTC | #1
On Thu, Nov 30, 2023 at 01:37:20PM -0800, Steve Sistare wrote:
> Restoring a snapshot can break a suspended guest.  Snapshots suffer from
> the same suspended-state issues that affect live migration, plus they must
> handle an additional problematic scenario, which is that a running vm must
> remain running if it loads a suspended snapshot.  Currently, after loading
> such a snapshot, the vm_start fails.  The runstate is RUNNING, but the guest
> is not.
> 
> To save, the vm_stop call now completely stops the suspended state, courtesy
> of a recent patch.  Finish with vm_resume to leave the vm in the state it had
> prior to the save, correctly restoring the suspended state.
> 
> To load, if the snapshot is not suspended, then vm_stop + vm_resume
> correctly handles all states, and leaves the vm in the state it had prior
> to the load.  However, if the snapshot is suspended, restoration is
> trickier.  First, call vm_resume to restore the state to suspended so the
> current state matches the saved state.  Then, if the pre-load state is
> running, call wakeup to resume running.
> 
> Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and
> RUN_STATE_RESTORE_VM did not change runstate if the current state was
> paused, suspended, or prelaunch, but now it does, so allow these
> transitions.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>

Reviewed-by: Peter Xu <peterx@redhat.com>

One nitpick below:

[...]

> +void load_snapshot_resume(RunState state)
> +{
> +    vm_resume(state);
> +    if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
> +        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);

Please avoid using NULL for Error**.  &error_abort seems apropriate.

Thanks,
diff mbox series

Patch

diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index e72083b..9e4dcaa 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -16,6 +16,7 @@ 
 #define QEMU_MIGRATION_SNAPSHOT_H
 
 #include "qapi/qapi-builtin-types.h"
+#include "qapi/qapi-types-run-state.h"
 
 /**
  * save_snapshot: Save an internal snapshot.
@@ -61,4 +62,10 @@  bool delete_snapshot(const char *name,
                     bool has_devices, strList *devices,
                     Error **errp);
 
+/**
+ * load_snapshot_resume: Restore runstate after loading snapshot.
+ * @state: state to restore
+ */
+void load_snapshot_resume(RunState state);
+
 #endif
diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
index 86ae832..c8d70bc 100644
--- a/migration/migration-hmp-cmds.c
+++ b/migration/migration-hmp-cmds.c
@@ -399,15 +399,17 @@  void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict)
 
 void hmp_loadvm(Monitor *mon, const QDict *qdict)
 {
-    int saved_vm_running  = runstate_is_running();
+    RunState saved_state = runstate_get();
+
     const char *name = qdict_get_str(qdict, "name");
     Error *err = NULL;
 
     vm_stop(RUN_STATE_RESTORE_VM);
 
-    if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) {
-        vm_start();
+    if (load_snapshot(name, NULL, false, NULL, &err)) {
+        load_snapshot_resume(saved_state);
     }
+
     hmp_handle_error(mon, err);
 }
 
diff --git a/migration/savevm.c b/migration/savevm.c
index eec5503..78697c0 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -3046,7 +3046,7 @@  bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
     QEMUSnapshotInfo sn1, *sn = &sn1;
     int ret = -1, ret2;
     QEMUFile *f;
-    int saved_vm_running;
+    RunState saved_state = runstate_get();
     uint64_t vm_state_size;
     g_autoptr(GDateTime) now = g_date_time_new_now_local();
     AioContext *aio_context;
@@ -3094,8 +3094,6 @@  bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
     }
     aio_context = bdrv_get_aio_context(bs);
 
-    saved_vm_running = runstate_is_running();
-
     global_state_store();
     vm_stop(RUN_STATE_SAVE_VM);
 
@@ -3163,9 +3161,7 @@  bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
 
     bdrv_drain_all_end();
 
-    if (saved_vm_running) {
-        vm_start();
-    }
+    vm_resume(saved_state);
     return ret == 0;
 }
 
@@ -3339,6 +3335,14 @@  err_drain:
     return false;
 }
 
+void load_snapshot_resume(RunState state)
+{
+    vm_resume(state);
+    if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) {
+        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+    }
+}
+
 bool delete_snapshot(const char *name, bool has_devices,
                      strList *devices, Error **errp)
 {
@@ -3403,16 +3407,15 @@  static void snapshot_load_job_bh(void *opaque)
 {
     Job *job = opaque;
     SnapshotJob *s = container_of(job, SnapshotJob, common);
-    int orig_vm_running;
+    RunState orig_state = runstate_get();
 
     job_progress_set_remaining(&s->common, 1);
 
-    orig_vm_running = runstate_is_running();
     vm_stop(RUN_STATE_RESTORE_VM);
 
     s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp);
-    if (s->ret && orig_vm_running) {
-        vm_start();
+    if (s->ret) {
+        load_snapshot_resume(orig_state);
     }
 
     job_progress_update(&s->common, 1);
diff --git a/system/runstate.c b/system/runstate.c
index e2fa204..ca9eb54 100644
--- a/system/runstate.c
+++ b/system/runstate.c
@@ -77,6 +77,7 @@  typedef struct {
 
 static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
+    { RUN_STATE_PRELAUNCH, RUN_STATE_SUSPENDED },
 
     { RUN_STATE_DEBUG, RUN_STATE_RUNNING },
     { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
@@ -132,6 +133,7 @@  static const RunStateTransition runstate_transitions_def[] = {
 
     { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
     { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
+    { RUN_STATE_RESTORE_VM, RUN_STATE_SUSPENDED },
 
     { RUN_STATE_COLO, RUN_STATE_RUNNING },
     { RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
@@ -150,6 +152,7 @@  static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_RUNNING, RUN_STATE_COLO},
 
     { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
+    { RUN_STATE_SAVE_VM, RUN_STATE_SUSPENDED },
 
     { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
     { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
@@ -163,6 +166,8 @@  static const RunStateTransition runstate_transitions_def[] = {
     { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
     { RUN_STATE_SUSPENDED, RUN_STATE_COLO},
     { RUN_STATE_SUSPENDED, RUN_STATE_PAUSED},
+    { RUN_STATE_SUSPENDED, RUN_STATE_SAVE_VM },
+    { RUN_STATE_SUSPENDED, RUN_STATE_RESTORE_VM },
 
     { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
     { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
diff --git a/system/vl.c b/system/vl.c
index 2bcd9ef..2fbbbba 100644
--- a/system/vl.c
+++ b/system/vl.c
@@ -2706,7 +2706,9 @@  void qmp_x_exit_preconfig(Error **errp)
     qemu_machine_creation_done();
 
     if (loadvm) {
+        RunState state = autostart ? RUN_STATE_RUNNING : runstate_get();
         load_snapshot(loadvm, NULL, false, NULL, &error_fatal);
+        load_snapshot_resume(state);
     }
     if (replay_mode != REPLAY_MODE_NONE) {
         replay_vmstate_init();