diff mbox

[RFC,v3,39/49] replay: snapshotting the virtual machine

Message ID 20140731125708.1600.49349.stgit@PASHA-ISP.novsu.ac.ru
State New
Headers show

Commit Message

Pavel Dovgalyuk July 31, 2014, 12:57 p.m. UTC
This patch adds 'period' parameter to the 'record' command line option. This
parameters turns on periodic snapshotting of the VM which could be used by
replay to move forward and backward in time.
If 'period' parameter is not specified, only one snapshot is made at the start
of the virtual machine.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
---
 cpus.c                   |   46 ++++++++-
 qemu-options.hx          |    5 +
 qemu-timer.c             |    4 +
 replay/replay-internal.h |   18 ++++
 replay/replay.c          |  226 ++++++++++++++++++++++++++++++++++++++++++++++
 vl.c                     |    3 +
 6 files changed, 294 insertions(+), 8 deletions(-)
diff mbox

Patch

diff --git a/cpus.c b/cpus.c
index 70df028..cd739d2 100644
--- a/cpus.c
+++ b/cpus.c
@@ -231,12 +231,23 @@  int64_t cpu_get_clock(void)
  */
 void cpu_enable_ticks(void)
 {
+    int64_t ti;
     /* Here, the really thing protected by seqlock is cpu_clock_offset. */
     seqlock_write_lock(&timers_state.vm_clock_seqlock);
     if (!timers_state.cpu_ticks_enabled) {
         if (!replay_icount) {
             timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
-            timers_state.cpu_clock_offset -= get_clock();
+
+            if (replay_mode == REPLAY_MODE_RECORD) {
+                ti = get_clock();
+                replay_save_clock(REPLAY_CLOCK_VIRTUAL, ti);
+            } else if (replay_mode == REPLAY_MODE_PLAY) {
+                ti = replay_read_clock(REPLAY_CLOCK_VIRTUAL);
+            } else {
+                ti = get_clock();
+            }
+
+            timers_state.cpu_clock_offset -= ti;
         }
         timers_state.cpu_ticks_enabled = 1;
     }
@@ -449,6 +460,22 @@  void qemu_clock_warp(QEMUClockType type)
     }
 }
 
+static bool is_replay_enabled(void *opaque)
+{
+    return replay_mode != REPLAY_MODE_NONE;
+}
+
+static const VMStateDescription vmstate_timers_for_replay = {
+    .name = "timer for replay",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_INT64(cpu_ticks_prev, TimersState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static const VMStateDescription vmstate_timers = {
     .name = "timer",
     .version_id = 2,
@@ -458,6 +485,14 @@  static const VMStateDescription vmstate_timers = {
         VMSTATE_INT64(dummy, TimersState),
         VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection[]) {
+        {
+            .vmsd = &vmstate_timers_for_replay,
+            .needed = is_replay_enabled,
+        }, {
+            /* empty */
+        }
     }
 };
 
@@ -549,9 +584,11 @@  static int do_vm_stop(RunState state)
     int ret = 0;
 
     if (runstate_is_running()) {
+        runstate_set(state);
+        /* Disable ticks can cause recursive call of vm_stop.
+           Stopping before calling functions prevents infinite recursion. */
         cpu_disable_ticks();
         pause_all_vcpus();
-        runstate_set(state);
         vm_state_notify(0, state);
         qapi_event_send_stop(&error_abort);
     }
@@ -1332,10 +1369,9 @@  static void tcg_exec_all(void)
         CPUState *cpu = next_cpu;
         CPUArchState *env = cpu->env_ptr;
 
-        qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
-                          (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
-
         if (cpu_can_run(cpu)) {
+            qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
+                              (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
             r = tcg_cpu_exec(env);
             if (r == EXCP_DEBUG) {
                 cpu_handle_guest_debug(cpu);
diff --git a/qemu-options.hx b/qemu-options.hx
index 7dcdf68..00315fa 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3349,12 +3349,13 @@  in @var{file}
 ETEXI
 
 DEF("record", HAS_ARG, QEMU_OPTION_record,
-    "-record fname=<filename>[,suffix=<suffix>,snapshot=<on/off>,icount=<icount>]\n"
+    "-record fname=<filename>[,period=<period>,suffix=<suffix>,snapshot=<on/off>,icount=<icount>]\n"
     "                writes replay file for latter replaying\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -record fname=@var{file}[,suffix=@var{suffix},snapshot=@var{snapshot},icount=@var{icount}]
+@item -record fname=@var{file}[,period=@var{period},suffix=@var{suffix},snapshot=@var{snapshot},icount=@var{icount}]
 Writes compact execution trace into @var{file}.
+VM state is auto saved every @var{period} second, if this parameter is specified.
 Changes for disk images are written
 into separate files with @var{suffix} added. If no @var{suffix} is
 specified, "replay_qcow" is used as suffix.
diff --git a/qemu-timer.c b/qemu-timer.c
index f8bf060..c33680f 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -104,7 +104,9 @@  QEMUTimerList *timerlist_new(QEMUClockType type,
     QEMUClock *clock = qemu_clock_ptr(type);
 
     timer_list = g_malloc0(sizeof(QEMUTimerList));
-    qemu_event_init(&timer_list->timers_done_ev, false);
+    /* Create signaled event, because they should be signaled
+       outside the timerlist_run_timers function */
+    qemu_event_init(&timer_list->timers_done_ev, true);
     timer_list->clock = clock;
     timer_list->notify_cb = cb;
     timer_list->notify_opaque = opaque;
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 25b133f..74ff59b 100755
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -15,6 +15,8 @@ 
 #include <stdio.h>
 #include "sysemu/sysemu.h"
 
+/* internal data for savevm */
+#define EVENT_END_STARTUP           0
 /* for time_t event */
 #define EVENT_TIME_T                1
 /* for tm event */
@@ -23,6 +25,10 @@ 
 #define EVENT_INTERRUPT             15
 /* for shutdown request */
 #define EVENT_SHUTDOWN              20
+/* for save VM event */
+#define EVENT_SAVE_VM_BEGIN         21
+/* for save VM event */
+#define EVENT_SAVE_VM_END           22
 /* for emulated exceptions */
 #define EVENT_EXCEPTION             23
 /* for async events */
@@ -53,9 +59,21 @@  typedef struct ReplayState {
     int skipping_instruction;
     /*! Current step - number of processed instructions and timer events. */
     uint64_t current_step;
+    /*! Temporary data for saving/loading replay file position. */
+    uint64_t file_offset;
 } ReplayState;
 extern ReplayState replay_state;
 
+/*! Information about saved VM state */
+struct SavedStateInfo {
+    /* Offset in the replay log file where state is saved. */
+    uint64_t file_offset;
+    /* Step number, corresponding to the saved state. */
+    uint64_t step;
+};
+/*! Reference to the saved state */
+typedef struct SavedStateInfo SavedStateInfo;
+
 extern volatile unsigned int replay_data_kind;
 extern volatile unsigned int replay_has_unread_data;
 
diff --git a/replay/replay.c b/replay/replay.c
index ea4de5c..a9268db 100755
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -12,6 +12,8 @@ 
 #include "qemu-common.h"
 #include "replay.h"
 #include "replay-internal.h"
+#include "migration/vmstate.h"
+#include "monitor/monitor.h"
 
 /* Current version of the replay mechanism.
    Increase it when file format changes. */
@@ -30,12 +32,178 @@  char *replay_image_suffix;
 
 ReplayState replay_state;
 
+/*
+    Auto-saving for VM states data
+*/
+
+/* Minimum capacity of saved states information array */
+#define SAVED_STATES_MIN_CAPACITY   128
+/* Format of the name for the saved state */
+#define SAVED_STATE_NAME_FORMAT     "replay_%" PRId64
+
+/* Timer for auto-save VM states */
+static QEMUTimer *save_timer;
+/* Save state period in seconds */
+static uint64_t save_state_period;
+/* List of the saved states information */
+SavedStateInfo *saved_states;
+/* Number of saved states */
+static size_t saved_states_count;
+/* Capacity of the buffer for saved states */
+static size_t saved_states_capacity;
+/* Number of last loaded/saved state */
+static uint64_t current_saved_state;
+
+/*
+   Replay functions
+ */
 
 ReplaySubmode replay_get_play_submode(void)
 {
     return play_submode;
 }
 
+static void replay_pre_save(void *opaque)
+{
+    ReplayState *state = opaque;
+    state->file_offset = ftello64(replay_file);
+}
+
+static int replay_post_load(void *opaque, int version_id)
+{
+    first_cpu->instructions_count = 0;
+
+    ReplayState *state = opaque;
+    fseeko64(replay_file, state->file_offset, SEEK_SET);
+    replay_has_unread_data = 0;
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_replay = {
+    .name = "replay",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = replay_pre_save,
+    .post_load = replay_post_load,
+    .fields      = (VMStateField[]) {
+        VMSTATE_INT64_ARRAY(cached_clock, ReplayState, REPLAY_CLOCK_COUNT),
+        VMSTATE_INT32(skipping_instruction, ReplayState),
+        VMSTATE_UINT64(current_step, ReplayState),
+        VMSTATE_UINT64(file_offset, ReplayState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void replay_savevm(void *opaque)
+{
+    char name[128];
+    uint64_t offset;
+
+    offset = ftello64(replay_file);
+
+    replay_save_instructions();
+
+    replay_put_event(EVENT_SAVE_VM_BEGIN);
+
+    vm_stop(RUN_STATE_SAVE_VM);
+
+    /* save VM state */
+    sprintf(name, SAVED_STATE_NAME_FORMAT, current_saved_state);
+    if (save_vmstate(default_mon, name) > 0) {
+        /* if period is 0, save only once */
+        if (save_state_period != 0) {
+            timer_mod(save_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+                                  + save_state_period);
+        }
+
+        /* add more memory to buffer */
+        if (saved_states_count >= saved_states_capacity) {
+            saved_states_capacity += SAVED_STATES_MIN_CAPACITY;
+            saved_states = g_realloc(saved_states, saved_states_capacity
+                                                   * sizeof(SavedStateInfo));
+            if (!saved_states) {
+                saved_states_count = 0;
+                fprintf(stderr,
+                        "Replay: Saved states memory reallocation failed.\n");
+                exit(1);
+            }
+        }
+        /* save state ID into the buffer */
+        saved_states[saved_states_count].file_offset = offset;
+        saved_states[saved_states_count].step = replay_get_current_step();
+        ++saved_states_count;
+        ++current_saved_state;
+    } else {
+        fprintf(stderr, "Cannot save simulator states for replay.\n");
+    }
+
+    replay_put_event(EVENT_SAVE_VM_END);
+
+    tb_flush_all();
+
+    vm_start();
+}
+
+/*! Checks SAVEVM event while reading event log. */
+static void check_savevm(void)
+{
+    replay_fetch_data_kind();
+    if (replay_data_kind != EVENT_SAVE_VM_BEGIN
+        && replay_data_kind != EVENT_SAVE_VM_END) {
+        fprintf(stderr, "Replay: read wrong data kind %d within savevm\n",
+                replay_data_kind);
+        exit(1);
+    }
+    replay_has_unread_data = 0;
+}
+
+/*! Loads specified VM state. */
+static void replay_loadvm(int64_t state)
+{
+    char name[128];
+    bool running = runstate_is_running();
+    if (running && !qemu_in_vcpu_thread()) {
+        vm_stop(RUN_STATE_RESTORE_VM);
+    } else {
+        cpu_disable_ticks();
+    }
+
+    replay_clear_events();
+
+    sprintf(name, SAVED_STATE_NAME_FORMAT, state);
+    if (load_vmstate(name) < 0) {
+        fprintf(stderr, "Replay: cannot load VM state\n");
+        exit(1);
+    }
+    /* check end event */
+    check_savevm();
+
+    tb_flush_all();
+
+    current_saved_state = state;
+
+    cpu_enable_ticks();
+    if (running && !qemu_in_vcpu_thread()) {
+        vm_start();
+    }
+
+    replay_fetch_data_kind();
+    while (replay_data_kind >= EVENT_CLOCK
+           && replay_data_kind < EVENT_CLOCK + REPLAY_CLOCK_COUNT) {
+        replay_read_next_clock(-1);
+        replay_fetch_data_kind();
+    }
+}
+
+/*! Skips clock events saved to file while saving the VM state. */
+static void replay_skip_savevm(void)
+{
+    replay_has_unread_data = 0;
+    replay_loadvm(current_saved_state + 1);
+}
+
 bool skip_async_events(int stop_event)
 {
     /* nothing to skip - not all instructions used */
@@ -55,6 +223,13 @@  bool skip_async_events(int stop_event)
             replay_has_unread_data = 0;
             qemu_system_shutdown_request_impl();
             break;
+        case EVENT_SAVE_VM_BEGIN:
+            /* cannot correctly load VM while in CPU thread */
+            if (qemu_in_vcpu_thread()) {
+                return res;
+            }
+            replay_skip_savevm();
+            break;
         case EVENT_INSTRUCTION:
             first_cpu->instructions_count = replay_get_dword();
             return res;
@@ -285,6 +460,7 @@  static void replay_enable(const char *fname, int mode)
     replay_data_kind = -1;
     replay_state.skipping_instruction = 0;
     replay_state.current_step = 0;
+    current_saved_state = 0;
 
     /* skip file header for RECORD and check it for PLAY */
     if (replay_mode == REPLAY_MODE_RECORD) {
@@ -296,11 +472,23 @@  static void replay_enable(const char *fname, int mode)
             fprintf(stderr, "Replay: invalid input log file version\n");
             exit(1);
         }
+        /* read states table */
+        fseeko64(replay_file, offset, SEEK_SET);
+        saved_states_count = replay_get_qword();
+        saved_states_capacity = saved_states_count;
+        if (saved_states_count) {
+            saved_states = g_malloc(sizeof(SavedStateInfo)
+                                    * saved_states_count);
+            fread(saved_states, sizeof(SavedStateInfo), saved_states_count,
+                  replay_file);
+        }
         /* go to the beginning */
         fseek(replay_file, 12, SEEK_SET);
     }
 
     replay_init_events();
+
+    vmstate_register(NULL, 0, &vmstate_replay, &replay_state);
 }
 
 void replay_configure(QemuOpts *opts, int mode)
@@ -321,6 +509,7 @@  void replay_configure(QemuOpts *opts, int mode)
     }
 
     replay_icount = (int)qemu_opt_get_number(opts, "icount", 0);
+    save_state_period = 1000LL * qemu_opt_get_number(opts, "period", 0);
 
     replay_enable(fname, mode);
 }
@@ -332,6 +521,26 @@  void replay_init_timer(void)
     }
 
     replay_enable_events();
+
+    /* create timer for states auto-saving */
+    if (replay_mode == REPLAY_MODE_RECORD) {
+        saved_states_count = 0;
+        if (!saved_states) {
+            saved_states = g_malloc(sizeof(SavedStateInfo)
+                                    * SAVED_STATES_MIN_CAPACITY);
+            saved_states_capacity = SAVED_STATES_MIN_CAPACITY;
+        }
+        if (save_state_period) {
+            save_timer = timer_new_ms(QEMU_CLOCK_REALTIME, replay_savevm, NULL);
+            timer_mod(save_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
+        }
+        replay_put_event(EVENT_END_STARTUP);
+        /* Save it right now without waiting for timer */
+        replay_savevm(NULL);
+    } else if (replay_mode == REPLAY_MODE_PLAY) {
+        /* load starting VM state */
+        replay_loadvm(0);
+    }
 }
 
 void replay_finish(void)
@@ -349,6 +558,14 @@  void replay_finish(void)
             /* write end event */
             replay_put_event(EVENT_END);
 
+            /* write states table */
+            offset = ftello64(replay_file);
+            replay_put_qword(saved_states_count);
+            if (saved_states && saved_states_count) {
+                fwrite(saved_states, sizeof(SavedStateInfo),
+                       saved_states_count, replay_file);
+            }
+
             /* write header */
             fseek(replay_file, 0, SEEK_SET);
             replay_put_dword(REPLAY_VERSION);
@@ -358,6 +575,15 @@  void replay_finish(void)
         fclose(replay_file);
         replay_file = NULL;
     }
+    if (save_timer) {
+        timer_del(save_timer);
+        timer_free(save_timer);
+        save_timer = NULL;
+    }
+    if (saved_states) {
+        g_free(saved_states);
+        saved_states = NULL;
+    }
     if (replay_filename) {
         g_free(replay_filename);
         replay_filename = NULL;
diff --git a/vl.c b/vl.c
index 628aca6..b0127a2 100644
--- a/vl.c
+++ b/vl.c
@@ -575,6 +575,9 @@  static QemuOptsList qemu_replay_opts = {
         },{
             .name = "icount",
             .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "period",
+            .type = QEMU_OPT_NUMBER,
         },
         { /* end of list */ }
     },