Patchwork [RFC,2/2] live backup vm, snapshots all lively

login
register
mail settings
Submitter Wayne Xia
Date Dec. 8, 2012, 8:24 a.m.
Message ID <1354955059-14391-3-git-send-email-xiawenc@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/204428/
State New
Headers show

Comments

Wayne Xia - Dec. 8, 2012, 8:24 a.m.
This patch uses a tricky to do live migration to local block
images, and then create internal snapshots. So basically this
patch connect function in live migration and savevm to get
a full live back effect.
  In monitor, type migrate image: to trigger this action for
a test.

Signed-off-by: Wenchao Xia <xiawenc@linux.vnet.ibm.com>
---
 buffered_file.c |  106 +++++++++++++++++++++++++++
 buffered_file.h |    1 +
 migration.c     |  215 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 migration.h     |   13 ++++
 4 files changed, 335 insertions(+), 0 deletions(-)

Patch

diff --git a/buffered_file.c b/buffered_file.c
index bd0f61d..63284ed 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -267,3 +267,109 @@  QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
 
     return s->file;
 }
+
+/* image operations */
+
+static int image_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
+{
+    QEMUFileBuffered *s = opaque;
+    BlockDriverState *bds = s->migration_state->image.bds;
+    ssize_t error;
+
+    DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
+
+    error = qemu_file_get_error(s->file);
+    if (error) {
+        DPRINTF("flush when error, bailing: %s\n", strerror(-error));
+        return error;
+    }
+
+    DPRINTF("unfreezing output\n");
+    s->freeze_output = 0;
+
+    /* no buffer is used, so speed limit is not accurate but a memcpy is saved,
+       will buffer increase performance in this case? */
+    if (size > 0) {
+        error = bdrv_save_vmstate(bds, buf, pos, size);
+        if (error < 0) {
+            DPRINTF("bdrv_save_vmstate error, bailing: %s\n",
+                     strerror(-error));
+            return error;
+        }
+        s->bytes_xfer += size;
+    }
+
+    if (pos == 0 && size == 0) {
+        DPRINTF("file is ready\n");
+        if (!s->freeze_output && s->bytes_xfer < s->xfer_limit) {
+            DPRINTF("notifying client\n");
+            migrate_image_put_ready(s->migration_state);
+        }
+    }
+
+    return size;
+}
+
+static int image_close(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+    BlockDriverState *bds = s->migration_state->image.bds;
+    int ret = 0;
+
+    DPRINTF("closing\n");
+    s->xfer_limit = INT_MAX;
+    ret = bdrv_flush(bds);
+
+    qemu_del_timer(s->timer);
+    qemu_free_timer(s->timer);
+    g_free(s->buffer);
+    g_free(s);
+
+    return ret;
+}
+
+static const QEMUFileOps image_file_ops = {
+    .get_fd =         NULL,
+    .put_buffer =     image_put_buffer,
+    .close =          image_close,
+    .rate_limit =     buffered_rate_limit,
+    .get_rate_limit = buffered_get_rate_limit,
+    .set_rate_limit = buffered_set_rate_limit,
+};
+
+static void image_rate_tick(void *opaque)
+{
+    QEMUFileBuffered *s = opaque;
+
+    if (qemu_file_get_error(s->file)) {
+        buffered_close(s);
+        return;
+    }
+
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+
+    if (s->freeze_output)
+        return;
+
+    s->bytes_xfer = 0;
+
+    image_put_buffer(s, NULL, 0, 0);
+}
+
+QEMUFile *qemu_fopen_ops_image(MigrationState *migration_state)
+{
+    QEMUFileBuffered *s;
+
+    s = g_malloc0(sizeof(*s));
+
+    s->migration_state = migration_state;
+    s->xfer_limit = migration_state->bandwidth_limit / 10;
+
+    s->file = qemu_fopen_ops(s, &image_file_ops);
+
+    s->timer = qemu_new_timer_ms(rt_clock, image_rate_tick, s);
+
+    qemu_mod_timer(s->timer, qemu_get_clock_ms(rt_clock) + 100);
+
+    return s->file;
+}
diff --git a/buffered_file.h b/buffered_file.h
index ef010fe..8c02b1a 100644
--- a/buffered_file.h
+++ b/buffered_file.h
@@ -18,5 +18,6 @@ 
 #include "migration.h"
 
 QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state);
+QEMUFile *qemu_fopen_ops_image(MigrationState *migration_state);
 
 #endif
diff --git a/migration.c b/migration.c
index 73ce170..385cafd 100644
--- a/migration.c
+++ b/migration.c
@@ -524,6 +524,7 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
 
     s = migrate_init(&params);
 
+
     if (strstart(uri, "tcp:", &p)) {
         tcp_start_outgoing_migration(s, p, &local_err);
 #if !defined(WIN32)
@@ -534,6 +535,16 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
     } else if (strstart(uri, "fd:", &p)) {
         fd_start_outgoing_migration(s, p, &local_err);
 #endif
+    } else if (strstart(uri, "image:", &p)) {
+    /* a direct hack to test migration to image */
+        migrate_image_prepare(s, NULL, &local_err);
+        if (local_err) {
+            migrate_image_error(s);
+            error_propagate(errp, local_err);
+            return;
+        }
+        migrate_image_start(s);
+        printf("start migrate to image.\n");
     } else {
         error_set(errp, QERR_INVALID_PARAMETER_VALUE, "uri", "a valid migration protocol");
         return;
@@ -609,3 +620,207 @@  int64_t migrate_xbzrle_cache_size(void)
 
     return s->xbzrle_cache_size;
 }
+
+/* migration to image helpers */
+
+static int migrate_image_cleanup(MigrationState *s)
+{
+    int ret = 0;
+
+    if (s->file) {
+        DPRINTF("closing file\n");
+        ret = qemu_fclose(s->file);
+        s->file = NULL;
+    }
+
+    return ret;
+}
+
+static int migrate_image_snapshot_create(MigrationState *s, Error **errp)
+{
+    BlockDriverState *bs = s->image.bds, *bs1;
+    QEMUSnapshotInfo *sn = &s->image.sn;
+    int vm_state_size = qemu_ftell(s->file);
+    int ret = 0;
+
+    /* create the snapshots */
+
+    bs1 = NULL;
+    while ((bs1 = bdrv_next(bs1))) {
+        if (bdrv_can_snapshot(bs1)) {
+            /* Write VM state size only to the image that contains the state */
+            sn->vm_state_size = (bs == bs1 ? vm_state_size : 0);
+            ret = bdrv_snapshot_create(bs1, sn);
+            if (ret < 0) {
+                if (errp != NULL) {
+                    error_setg(errp,
+                               "Error while creating snapshot on '%s'\n",
+                               bdrv_get_device_name(bs1));
+                }
+                ret = -1;
+            }
+        }
+    }
+    return ret;
+}
+
+void migrate_image_error(MigrationState *s)
+{
+    DPRINTF("setting error state\n");
+    s->state = MIG_STATE_ERROR;
+    notifier_list_notify(&migration_state_notifiers, s);
+    migrate_image_cleanup(s);
+}
+
+static void migrate_image_completed(MigrationState *s)
+{
+    DPRINTF("setting completed state\n");
+    if (migrate_image_cleanup(s) < 0) {
+        s->state = MIG_STATE_ERROR;
+    } else {
+        s->state = MIG_STATE_COMPLETED;
+        runstate_set(RUN_STATE_POSTMIGRATE);
+    }
+    notifier_list_notify(&migration_state_notifiers, s);
+}
+
+void migrate_image_put_ready(MigrationState *s)
+{
+    int ret;
+
+    if (s->state != MIG_STATE_ACTIVE) {
+        DPRINTF("put_ready returning because of non-active state\n");
+        return;
+    }
+
+    DPRINTF("iterate\n");
+    ret = qemu_savevm_state_iterate(s->file);
+    if (ret < 0) {
+        DPRINTF("failed in savevm iterate.\n");
+        migrate_image_error(s);
+    } else if (ret == 1) {
+        int old_vm_running = runstate_is_running();
+        int64_t start_time, end_time;
+
+        DPRINTF("done iterating\n");
+        start_time = qemu_get_clock_ms(rt_clock);
+        qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
+        vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+
+        if ((qemu_savevm_state_complete(s->file) >= 0) &&
+            (migrate_image_snapshot_create(s, NULL) >= 0)) {
+            migrate_image_completed(s);
+
+        } else {
+            migrate_image_error(s);
+        }
+
+        end_time = qemu_get_clock_ms(rt_clock);
+        s->total_time = end_time - s->total_time;
+        s->downtime = end_time - start_time;
+        if (old_vm_running) {
+            vm_start();
+        }
+    }
+}
+
+void migrate_image_prepare(MigrationState *s, const char *name, Error **errp)
+{
+    BlockDriverState *bs;
+    QEMUSnapshotInfo *sn = &s->image.sn, old_sn1, *old_sn = &old_sn1;
+    int ret, saved_vm_running;
+    test_ms = s;
+#ifdef _WIN32
+    struct _timeb tb;
+    struct tm *ptm;
+#else
+    struct timeval tv;
+    struct tm tm;
+#endif
+
+    bs = NULL;
+    while ((bs = bdrv_next(bs))) {
+
+        if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
+            continue;
+        }
+
+        if (!bdrv_can_snapshot(bs)) {
+            error_setg(errp,
+                  "Device '%s' is writable but does not support snapshots.\n",
+                  bdrv_get_device_name(bs));
+            return;
+        }
+    }
+
+    bs = bdrv_snapshots();
+    if (!bs) {
+        error_setg(errp, "No block device can accept snapshots\n");
+        return;
+    }
+    s->image.bds = bs;
+
+    saved_vm_running = runstate_is_running();
+    vm_stop(RUN_STATE_SAVE_VM);
+
+    memset(sn, 0, sizeof(*sn));
+
+    /* fill auxiliary fields */
+#ifdef _WIN32
+    _ftime(&tb);
+    sn->date_sec = tb.time;
+    sn->date_nsec = tb.millitm * 1000000;
+#else
+    gettimeofday(&tv, NULL);
+    sn->date_sec = tv.tv_sec;
+    sn->date_nsec = tv.tv_usec * 1000;
+#endif
+    sn->vm_clock_nsec = qemu_get_clock_ns(vm_clock);
+
+    if (name) {
+        ret = bdrv_snapshot_find(bs, old_sn, name);
+        if (ret >= 0) {
+            pstrcpy(sn->name, sizeof(sn->name), old_sn->name);
+            pstrcpy(sn->id_str, sizeof(sn->id_str), old_sn->id_str);
+        } else {
+            pstrcpy(sn->name, sizeof(sn->name), name);
+        }
+    } else {
+#ifdef _WIN32
+        time_t t = tb.time;
+        ptm = localtime(&t);
+        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", ptm);
+#else
+        /* cast below needed for OpenBSD where tv_sec is still 'long' */
+        localtime_r((const time_t *)&tv.tv_sec, &tm);
+        strftime(sn->name, sizeof(sn->name), "vm-%Y%m%d%H%M%S", &tm);
+#endif
+    }
+
+    /* Delete old snapshots of the same name */
+    if (name && del_existing_snapshots(NULL, errp, name) < 0) {
+        goto the_end;
+    }
+
+ the_end:
+    if (saved_vm_running)
+        vm_start();
+    return;
+}
+
+void migrate_image_start(MigrationState *s)
+{
+    int ret;
+
+    s->state = MIG_STATE_ACTIVE;
+    s->file = qemu_fopen_ops_image(s);
+
+    DPRINTF("beginning savevm\n");
+    ret = qemu_savevm_state_begin(s->file, &s->params);
+    if (ret < 0) {
+        DPRINTF("failed, %d\n", ret);
+        migrate_image_error(s);
+        return;
+    }
+    migrate_image_put_ready(s);
+}
diff --git a/migration.h b/migration.h
index c3a23cc..f2308c3 100644
--- a/migration.h
+++ b/migration.h
@@ -20,6 +20,7 @@ 
 #include "error.h"
 #include "vmstate.h"
 #include "qapi-types.h"
+#include "block.h"
 
 struct MigrationParams {
     bool blk;
@@ -28,6 +29,13 @@  struct MigrationParams {
 
 typedef struct MigrationState MigrationState;
 
+struct MigrationImage {
+    BlockDriverState *bds;
+    QEMUSnapshotInfo sn;
+};
+
+typedef struct MigrationImage MigrationImage;
+
 struct MigrationState
 {
     int64_t bandwidth_limit;
@@ -45,6 +53,7 @@  struct MigrationState
     int64_t dirty_pages_rate;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
     int64_t xbzrle_cache_size;
+    MigrationImage image;
 };
 
 void process_incoming_migration(QEMUFile *f);
@@ -128,4 +137,8 @@  int64_t migrate_xbzrle_cache_size(void);
 
 int64_t xbzrle_cache_resize(int64_t new_size);
 
+void migrate_image_error(MigrationState *s);
+void migrate_image_prepare(MigrationState *s, const char *name, Error **errp);
+void migrate_image_start(MigrationState *s);
+void migrate_image_put_ready(MigrationState *s);
 #endif