diff mbox

[PULL,03/18] migration: Enter into COLO mode after migration if COLO is enabled

Message ID 1477824430-1460-4-git-send-email-amit.shah@redhat.com
State New
Headers show

Commit Message

Amit Shah Oct. 30, 2016, 10:46 a.m. UTC
From: zhanghailiang <zhang.zhanghailiang@huawei.com>

Add a new migration state: MIGRATION_STATUS_COLO. Migration source side
enters this state after the first live migration successfully finished
if COLO is enabled by command 'migrate_set_capability x-colo on'.

We reuse migration thread, so the process of checkpointing will be handled
in migration thread.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Amit Shah <amit@amitshah.net>
---
 include/migration/colo.h |  3 +++
 migration/colo.c         | 29 +++++++++++++++++++++++++++++
 migration/migration.c    | 38 +++++++++++++++++++++++++++++++++-----
 migration/trace-events   |  3 +++
 qapi-schema.json         |  4 +++-
 stubs/migration-colo.c   |  9 +++++++++
 6 files changed, 80 insertions(+), 6 deletions(-)

Comments

Eric Blake Oct. 31, 2016, 10:27 p.m. UTC | #1
On 10/30/2016 05:46 AM, Amit Shah wrote:
> From: zhanghailiang <zhang.zhanghailiang@huawei.com>
> 
> Add a new migration state: MIGRATION_STATUS_COLO. Migration source side
> enters this state after the first live migration successfully finished
> if COLO is enabled by command 'migrate_set_capability x-colo on'.
> 
> We reuse migration thread, so the process of checkpointing will be handled
> in migration thread.
> 

> +++ b/qapi-schema.json
> @@ -459,12 +459,14 @@
>  #
>  # @failed: some error occurred during migration process.
>  #
> +# @colo: VM is in the process of fault tolerance. (since 2.8)
> +#

The commit message mentions it, but the public documentation does not,
so it might be worth a followup patch that clarifies in the schema that
the 'colo' state is not possible without using the 'x-colo' migration
property (so that applications like libvirt know that they don't have to
worry about handling an unexpected state, at least not in 2.8 or until
the 'x-colo' is promoted to plain 'colo').
Zhanghailiang Nov. 1, 2016, 3:39 a.m. UTC | #2
On 2016/11/1 6:27, Eric Blake wrote:
> On 10/30/2016 05:46 AM, Amit Shah wrote:
>> From: zhanghailiang <zhang.zhanghailiang@huawei.com>
>>
>> Add a new migration state: MIGRATION_STATUS_COLO. Migration source side
>> enters this state after the first live migration successfully finished
>> if COLO is enabled by command 'migrate_set_capability x-colo on'.
>>
>> We reuse migration thread, so the process of checkpointing will be handled
>> in migration thread.
>>
>
>> +++ b/qapi-schema.json
>> @@ -459,12 +459,14 @@
>>   #
>>   # @failed: some error occurred during migration process.
>>   #
>> +# @colo: VM is in the process of fault tolerance. (since 2.8)
>> +#
>
> The commit message mentions it, but the public documentation does not,
> so it might be worth a followup patch that clarifies in the schema that
> the 'colo' state is not possible without using the 'x-colo' migration
> property (so that applications like libvirt know that they don't have to
> worry about handling an unexpected state, at least not in 2.8 or until
> the 'x-colo' is promoted to plain 'colo').
>

Sounds reasonable, I'll send a patch to add such clarification.

Thanks,
Hailiang
diff mbox

Patch

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 1c899a0..bf84b99 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -19,4 +19,7 @@ 
 bool colo_supported(void);
 void colo_info_init(void);
 
+void migrate_start_colo_process(MigrationState *s);
+bool migration_in_colo_state(void);
+
 #endif
diff --git a/migration/colo.c b/migration/colo.c
index d215057..f480431 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -11,9 +11,38 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "sysemu/sysemu.h"
 #include "migration/colo.h"
+#include "trace.h"
 
 bool colo_supported(void)
 {
     return false;
 }
+
+bool migration_in_colo_state(void)
+{
+    MigrationState *s = migrate_get_current();
+
+    return (s->state == MIGRATION_STATUS_COLO);
+}
+
+static void colo_process_checkpoint(MigrationState *s)
+{
+    qemu_mutex_lock_iothread();
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("stop", "run");
+
+    /* TODO: COLO checkpoint savevm loop */
+
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+    qemu_mutex_unlock_iothread();
+    migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
+                      MIGRATION_STATUS_COLO);
+    colo_process_checkpoint(s);
+    qemu_mutex_lock_iothread();
+}
diff --git a/migration/migration.c b/migration/migration.c
index c7869e5..aff3eb4 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -695,6 +695,10 @@  MigrationInfo *qmp_query_migrate(Error **errp)
 
         get_xbzrle_cache_stats(info);
         break;
+    case MIGRATION_STATUS_COLO:
+        info->has_status = true;
+        /* TODO: display COLO specific information (checkpoint info etc.) */
+        break;
     case MIGRATION_STATUS_COMPLETED:
         get_xbzrle_cache_stats(info);
 
@@ -1113,7 +1117,8 @@  void qmp_migrate(const char *uri, bool has_blk, bool blk,
     params.shared = has_inc && inc;
 
     if (migration_is_setup_or_active(s->state) ||
-        s->state == MIGRATION_STATUS_CANCELLING) {
+        s->state == MIGRATION_STATUS_CANCELLING ||
+        s->state == MIGRATION_STATUS_COLO) {
         error_setg(errp, QERR_MIGRATION_ACTIVE);
         return;
     }
@@ -1661,7 +1666,11 @@  static void migration_completion(MigrationState *s, int current_active_state,
 
         if (!ret) {
             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
-            if (ret >= 0) {
+            /*
+             * Don't mark the image with BDRV_O_INACTIVE flag if
+             * we will go into COLO stage later.
+             */
+            if (ret >= 0 && !migrate_colo_enabled()) {
                 ret = bdrv_inactivate_all();
             }
             if (ret >= 0) {
@@ -1703,8 +1712,11 @@  static void migration_completion(MigrationState *s, int current_active_state,
         goto fail_invalidate;
     }
 
-    migrate_set_state(&s->state, current_active_state,
-                      MIGRATION_STATUS_COMPLETED);
+    if (!migrate_colo_enabled()) {
+        migrate_set_state(&s->state, current_active_state,
+                          MIGRATION_STATUS_COMPLETED);
+    }
+
     return;
 
 fail_invalidate:
@@ -1749,6 +1761,7 @@  static void *migration_thread(void *opaque)
     bool entered_postcopy = false;
     /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */
     enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE;
+    bool enable_colo = migrate_colo_enabled();
 
     rcu_register_thread();
 
@@ -1857,7 +1870,13 @@  static void *migration_thread(void *opaque)
     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 
     qemu_mutex_lock_iothread();
-    qemu_savevm_state_cleanup();
+    /*
+     * The resource has been allocated by migration will be reused in COLO
+     * process, so don't release them.
+     */
+    if (!enable_colo) {
+        qemu_savevm_state_cleanup();
+    }
     if (s->state == MIGRATION_STATUS_COMPLETED) {
         uint64_t transferred_bytes = qemu_ftell(s->to_dst_file);
         s->total_time = end_time - s->total_time;
@@ -1870,6 +1889,15 @@  static void *migration_thread(void *opaque)
         }
         runstate_set(RUN_STATE_POSTMIGRATE);
     } else {
+        if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) {
+            migrate_start_colo_process(s);
+            qemu_savevm_state_cleanup();
+            /*
+            * Fixme: we will run VM in COLO no matter its old running state.
+            * After exited COLO, we will keep running.
+            */
+            old_vm_running = true;
+        }
         if (old_vm_running && !entered_postcopy) {
             vm_start();
         } else {
diff --git a/migration/trace-events b/migration/trace-events
index dfee75a..a29e5a0 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -207,3 +207,6 @@  migration_tls_outgoing_handshake_complete(void) ""
 migration_tls_incoming_handshake_start(void) ""
 migration_tls_incoming_handshake_error(const char *err) "err=%s"
 migration_tls_incoming_handshake_complete(void) ""
+
+# migration/colo.c
+colo_vm_state_change(const char *old, const char *new) "Change '%s' => '%s'"
diff --git a/qapi-schema.json b/qapi-schema.json
index 0fb4d7e..8b21215 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -459,12 +459,14 @@ 
 #
 # @failed: some error occurred during migration process.
 #
+# @colo: VM is in the process of fault tolerance. (since 2.8)
+#
 # Since: 2.3
 #
 ##
 { 'enum': 'MigrationStatus',
   'data': [ 'none', 'setup', 'cancelling', 'cancelled',
-            'active', 'postcopy-active', 'completed', 'failed' ] }
+            'active', 'postcopy-active', 'completed', 'failed', 'colo' ] }
 
 ##
 # @MigrationInfo
diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
index d215057..0c8eef4 100644
--- a/stubs/migration-colo.c
+++ b/stubs/migration-colo.c
@@ -17,3 +17,12 @@  bool colo_supported(void)
 {
     return false;
 }
+
+bool migration_in_colo_state(void)
+{
+    return false;
+}
+
+void migrate_start_colo_process(MigrationState *s)
+{
+}