diff mbox

[RFC,v3,04/27] migration: Integrate COLO checkpoint process into migration

Message ID 1423711034-5340-5-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Feb. 12, 2015, 3:16 a.m. UTC
Add a migrate state: MIG_STATE_COLO, enter this migration state
after the first live migration successfully finished.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 include/migration/migration-colo.h |  2 ++
 include/migration/migration.h      | 13 +++++++
 migration/Makefile.objs            |  1 +
 migration/colo.c                   | 72 ++++++++++++++++++++++++++++++++++++++
 migration/migration.c              | 38 +++++++++++---------
 stubs/Makefile.objs                |  1 +
 stubs/migration-colo.c             | 17 +++++++++
 7 files changed, 128 insertions(+), 16 deletions(-)
 create mode 100644 migration/colo.c
 create mode 100644 stubs/migration-colo.c

Comments

Eric Blake Feb. 16, 2015, 11:27 p.m. UTC | #1
On 02/11/2015 08:16 PM, zhanghailiang wrote:
> Add a migrate state: MIG_STATE_COLO, enter this migration state
> after the first live migration successfully finished.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
>  include/migration/migration-colo.h |  2 ++
>  include/migration/migration.h      | 13 +++++++
>  migration/Makefile.objs            |  1 +
>  migration/colo.c                   | 72 ++++++++++++++++++++++++++++++++++++++
>  migration/migration.c              | 38 +++++++++++---------
>  stubs/Makefile.objs                |  1 +
>  stubs/migration-colo.c             | 17 +++++++++
>  7 files changed, 128 insertions(+), 16 deletions(-)
>  create mode 100644 migration/colo.c
>  create mode 100644 stubs/migration-colo.c
> 

> +++ b/include/migration/migration.h
> @@ -65,6 +65,19 @@ struct MigrationState
>      int64_t dirty_sync_count;
>  };
>  
> +enum {
> +    MIG_STATE_ERROR = -1,
> +    MIG_STATE_NONE,
> +    MIG_STATE_SETUP,
> +    MIG_STATE_CANCELLING,
> +    MIG_STATE_CANCELLED,
> +    MIG_STATE_ACTIVE,
> +    MIG_STATE_COLO,
> +    MIG_STATE_COMPLETED,
> +};

Is the new state intended to be user-visible?  If so, wouldn't it be
better to expose this enum via qapi-schema.json?


> +
> +/* #define DEBUG_COLO */
> +
> +#ifdef DEBUG_COLO
> +#define DPRINTF(fmt, ...) \
> +do { fprintf(stdout, "colo: " fmt , ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) do {} while (0)
> +#endif
> +

Same comment as in 3/27 about avoiding bit-rotting debug statements.  Or
even better,...

> +static QEMUBH *colo_bh;
> +
> +static void *colo_thread(void *opaque)
> +{
> +    MigrationState *s = opaque;
> +
> +    qemu_mutex_lock_iothread();
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +    DPRINTF("vm resume to run\n");

...why not add tracepoints instead of using DPRINTF?


> @@ -227,6 +218,11 @@ MigrationInfo *qmp_query_migrate(Error **errp)
>  
>          get_xbzrle_cache_stats(info);
>          break;
> +    case MIG_STATE_COLO:
> +        info->has_status = true;
> +        info->status = g_strdup("colo");
> +        /* TODO: display COLO specific informations(checkpoint info etc.),*/
> +        break;

Uggh.  We REALLY need to fix MigrationInfo to convert 'status' to use an
enum type, instead of an open-coded 'str' (such a conversion is
backwards compatible, and better documented).  Then it would be more
obvious that you are adding an enum value.  Doing the conversion would
be a good prerequisite patch.

s/informations(checkpoint info etc.),/information (checkpoint info etc.)/
Zhanghailiang Feb. 25, 2015, 6:43 a.m. UTC | #2
On 2015/2/17 7:27, Eric Blake wrote:
> On 02/11/2015 08:16 PM, zhanghailiang wrote:
>> Add a migrate state: MIG_STATE_COLO, enter this migration state
>> after the first live migration successfully finished.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
>> ---
>>   include/migration/migration-colo.h |  2 ++
>>   include/migration/migration.h      | 13 +++++++
>>   migration/Makefile.objs            |  1 +
>>   migration/colo.c                   | 72 ++++++++++++++++++++++++++++++++++++++
>>   migration/migration.c              | 38 +++++++++++---------
>>   stubs/Makefile.objs                |  1 +
>>   stubs/migration-colo.c             | 17 +++++++++
>>   7 files changed, 128 insertions(+), 16 deletions(-)
>>   create mode 100644 migration/colo.c
>>   create mode 100644 stubs/migration-colo.c
>>
>
>> +++ b/include/migration/migration.h
>> @@ -65,6 +65,19 @@ struct MigrationState
>>       int64_t dirty_sync_count;
>>   };
>>
>> +enum {
>> +    MIG_STATE_ERROR = -1,
>> +    MIG_STATE_NONE,
>> +    MIG_STATE_SETUP,
>> +    MIG_STATE_CANCELLING,
>> +    MIG_STATE_CANCELLED,
>> +    MIG_STATE_ACTIVE,
>> +    MIG_STATE_COLO,
>> +    MIG_STATE_COMPLETED,
>> +};
>
> Is the new state intended to be user-visible?  If so, wouldn't it be
> better to expose this enum via qapi-schema.json?
>

No, for now it is only used internally.

>
>> +
>> +/* #define DEBUG_COLO */
>> +
>> +#ifdef DEBUG_COLO
>> +#define DPRINTF(fmt, ...) \
>> +do { fprintf(stdout, "colo: " fmt , ## __VA_ARGS__); } while (0)
>> +#else
>> +#define DPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>
> Same comment as in 3/27 about avoiding bit-rotting debug statements.  Or
> even better,...
>

OK, will fix it.

>> +static QEMUBH *colo_bh;
>> +
>> +static void *colo_thread(void *opaque)
>> +{
>> +    MigrationState *s = opaque;
>> +
>> +    qemu_mutex_lock_iothread();
>> +    vm_start();
>> +    qemu_mutex_unlock_iothread();
>> +    DPRINTF("vm resume to run\n");
>
> ...why not add tracepoints instead of using DPRINTF?
>

Hmm, we will change it to using tracepoints, for now, we use DPRINTF just for convenience.

>
>> @@ -227,6 +218,11 @@ MigrationInfo *qmp_query_migrate(Error **errp)
>>
>>           get_xbzrle_cache_stats(info);
>>           break;
>> +    case MIG_STATE_COLO:
>> +        info->has_status = true;
>> +        info->status = g_strdup("colo");
>> +        /* TODO: display COLO specific informations(checkpoint info etc.),*/
>> +        break;
>
> Uggh.  We REALLY need to fix MigrationInfo to convert 'status' to use an
> enum type, instead of an open-coded 'str' (such a conversion is
> backwards compatible, and better documented).  Then it would be more
> obvious that you are adding an enum value.  Doing the conversion would
> be a good prerequisite patch.
>

Good idea, i will do this, send a patch like that. ;)

> s/informations(checkpoint info etc.),/information (checkpoint info etc.)/
>

Will fix it, thanks.
diff mbox

Patch

diff --git a/include/migration/migration-colo.h b/include/migration/migration-colo.h
index d52ebd0..b72662c 100644
--- a/include/migration/migration-colo.h
+++ b/include/migration/migration-colo.h
@@ -18,4 +18,6 @@ 
 
 void colo_info_mig_init(void);
 
+void colo_init_checkpointer(MigrationState *s);
+
 #endif
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 3f5c705..c4c98d2 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -65,6 +65,19 @@  struct MigrationState
     int64_t dirty_sync_count;
 };
 
+enum {
+    MIG_STATE_ERROR = -1,
+    MIG_STATE_NONE,
+    MIG_STATE_SETUP,
+    MIG_STATE_CANCELLING,
+    MIG_STATE_CANCELLED,
+    MIG_STATE_ACTIVE,
+    MIG_STATE_COLO,
+    MIG_STATE_COMPLETED,
+};
+
+void migrate_set_state(MigrationState *s, int old_state, int new_state);
+
 void process_incoming_migration(QEMUFile *f);
 
 void qemu_start_incoming_migration(const char *uri, Error **errp);
diff --git a/migration/Makefile.objs b/migration/Makefile.objs
index 97b72ad..895583e 100644
--- a/migration/Makefile.objs
+++ b/migration/Makefile.objs
@@ -1,5 +1,6 @@ 
 common-obj-y += migration.o tcp.o
 common-obj-y += colo-comm.o
+common-obj-$(CONFIG_COLO) += colo.o
 common-obj-y += vmstate.o
 common-obj-y += qemu-file.o qemu-file-buf.o qemu-file-unix.o qemu-file-stdio.o
 common-obj-y += xbzrle.o
diff --git a/migration/colo.c b/migration/colo.c
new file mode 100644
index 0000000..f40b0d8
--- /dev/null
+++ b/migration/colo.c
@@ -0,0 +1,72 @@ 
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "sysemu/sysemu.h"
+#include "migration/migration-colo.h"
+#include "qemu/error-report.h"
+
+/* #define DEBUG_COLO */
+
+#ifdef DEBUG_COLO
+#define DPRINTF(fmt, ...) \
+do { fprintf(stdout, "colo: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while (0)
+#endif
+
+static QEMUBH *colo_bh;
+
+static void *colo_thread(void *opaque)
+{
+    MigrationState *s = opaque;
+
+    qemu_mutex_lock_iothread();
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    DPRINTF("vm resume to run\n");
+
+
+    /*TODO: COLO checkpoint savevm loop*/
+
+    migrate_set_state(s, MIG_STATE_COLO, MIG_STATE_COMPLETED);
+
+    qemu_mutex_lock_iothread();
+    qemu_bh_schedule(s->cleanup_bh);
+    qemu_mutex_unlock_iothread();
+
+    return NULL;
+}
+
+static void colo_start_checkpointer(void *opaque)
+{
+    MigrationState *s = opaque;
+
+    if (colo_bh) {
+        qemu_bh_delete(colo_bh);
+        colo_bh = NULL;
+    }
+
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(&s->thread);
+    qemu_mutex_lock_iothread();
+
+    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COLO);
+
+    qemu_thread_create(&s->thread, "colo", colo_thread, s,
+                       QEMU_THREAD_JOINABLE);
+}
+
+void colo_init_checkpointer(MigrationState *s)
+{
+    colo_bh = qemu_bh_new(colo_start_checkpointer, s);
+    qemu_bh_schedule(colo_bh);
+}
diff --git a/migration/migration.c b/migration/migration.c
index 8403c8a..536ba01e 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -25,16 +25,7 @@ 
 #include "qemu/thread.h"
 #include "qmp-commands.h"
 #include "trace.h"
-
-enum {
-    MIG_STATE_ERROR = -1,
-    MIG_STATE_NONE,
-    MIG_STATE_SETUP,
-    MIG_STATE_CANCELLING,
-    MIG_STATE_CANCELLED,
-    MIG_STATE_ACTIVE,
-    MIG_STATE_COMPLETED,
-};
+#include "migration/migration-colo.h"
 
 #define MAX_THROTTLE  (32 << 20)      /* Migration speed throttling */
 
@@ -227,6 +218,11 @@  MigrationInfo *qmp_query_migrate(Error **errp)
 
         get_xbzrle_cache_stats(info);
         break;
+    case MIG_STATE_COLO:
+        info->has_status = true;
+        info->status = g_strdup("colo");
+        /* TODO: display COLO specific informations(checkpoint info etc.),*/
+        break;
     case MIG_STATE_COMPLETED:
         get_xbzrle_cache_stats(info);
 
@@ -270,7 +266,8 @@  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
     MigrationState *s = migrate_get_current();
     MigrationCapabilityStatusList *cap;
 
-    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP) {
+    if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
+        s->state == MIG_STATE_COLO) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -291,7 +288,7 @@  void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
 
 /* shared migration helpers */
 
-static void migrate_set_state(MigrationState *s, int old_state, int new_state)
+void migrate_set_state(MigrationState *s, int old_state, int new_state)
 {
     if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
         trace_migrate_set_state(new_state);
@@ -437,7 +434,7 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.shared = has_inc && inc;
 
     if (s->state == MIG_STATE_ACTIVE || s->state == MIG_STATE_SETUP ||
-        s->state == MIG_STATE_CANCELLING) {
+        s->state == MIG_STATE_CANCELLING || s->state == MIG_STATE_COLO) {
         error_set(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -611,6 +608,7 @@  static void *migration_thread(void *opaque)
     int64_t max_size = 0;
     int64_t start_time = initial_time;
     bool old_vm_running = false;
+    bool enable_colo = migrate_enable_colo();
 
     qemu_savevm_state_begin(s->file, &s->params);
 
@@ -647,7 +645,10 @@  static void *migration_thread(void *opaque)
                 }
 
                 if (!qemu_file_get_error(s->file)) {
-                    migrate_set_state(s, MIG_STATE_ACTIVE, MIG_STATE_COMPLETED);
+                    if (!enable_colo) {
+                        migrate_set_state(s, MIG_STATE_ACTIVE,
+                                          MIG_STATE_COMPLETED);
+                    }
                     break;
                 }
             }
@@ -697,11 +698,16 @@  static void *migration_thread(void *opaque)
         }
         runstate_set(RUN_STATE_POSTMIGRATE);
     } else {
-        if (old_vm_running) {
+        if (s->state == MIG_STATE_ACTIVE && enable_colo) {
+            colo_init_checkpointer(s);
+        } else if (old_vm_running) {
             vm_start();
         }
     }
-    qemu_bh_schedule(s->cleanup_bh);
+
+    if (!enable_colo) {
+        qemu_bh_schedule(s->cleanup_bh);
+    }
     qemu_mutex_unlock_iothread();
 
     return NULL;
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
index 5e347d0..9fe6b4c 100644
--- a/stubs/Makefile.objs
+++ b/stubs/Makefile.objs
@@ -40,3 +40,4 @@  stub-obj-$(CONFIG_WIN32) += fd-register.o
 stub-obj-y += cpus.o
 stub-obj-y += kvm.o
 stub-obj-y += qmp_pc_dimm_device_list.o
+stub-obj-y += migration-colo.o
diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
new file mode 100644
index 0000000..b2cff9c
--- /dev/null
+++ b/stubs/migration-colo.c
@@ -0,0 +1,17 @@ 
+/*
+ * COarse-grain LOck-stepping Virtual Machines for Non-stop Service (COLO)
+ * (a.k.a. Fault Tolerance or Continuous Replication)
+ *
+ * Copyright (c) 2015 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (c) 2015 FUJITSU LIMITED
+ * Copyright (c) 2015 Intel Corporation
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "migration/migration-colo.h"
+
+void colo_init_checkpointer(MigrationState *s)
+{
+}