diff mbox

[COLO-Frame,v12,27/38] COLO failover: Don't do failover during loading VM's state

Message ID 1450167779-9960-28-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Dec. 15, 2015, 8:22 a.m. UTC
We should not do failover work while the main thread is loading
VM's state, otherwise it will destroy the consistent of VM's memory and
device state.

Here we add a new failover status 'RELAUNCH' which means we should
relaunch the process of failover.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 include/migration/failover.h |  2 ++
 migration/colo.c             | 25 +++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

Comments

Dr. David Alan Gilbert Dec. 15, 2015, 10:21 a.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> We should not do failover work while the main thread is loading
> VM's state, otherwise it will destroy the consistent of VM's memory and
> device state.
> 
> Here we add a new failover status 'RELAUNCH' which means we should
> relaunch the process of failover.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
>  include/migration/failover.h |  2 ++
>  migration/colo.c             | 25 +++++++++++++++++++++++++
>  2 files changed, 27 insertions(+)
> 
> diff --git a/include/migration/failover.h b/include/migration/failover.h
> index fba3931..e115d25 100644
> --- a/include/migration/failover.h
> +++ b/include/migration/failover.h
> @@ -20,6 +20,8 @@ typedef enum COLOFailoverStatus {
>      FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
>      FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
>      FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
> +    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
> +    FAILOVER_STATUS_RELAUNCH = 4,
>  } COLOFailoverStatus;
>  
>  void failover_init_state(void);
> diff --git a/migration/colo.c b/migration/colo.c
> index 58531e7..f4bb661 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -20,6 +20,8 @@
>  #include "migration/failover.h"
>  #include "qapi-event.h"
>  
> +static bool vmstate_loading;
> +
>  /* colo buffer */
>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>  
> @@ -52,6 +54,19 @@ static void secondary_vm_do_failover(void)
>      int old_state;
>      MigrationIncomingState *mis = migration_incoming_get_current();
>  
> +    /* Can not do failover during the process of VM's loading VMstate, Or
> +      * it will break the secondary VM.
> +      */
> +    if (vmstate_loading) {
> +        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
> +                                       FAILOVER_STATUS_RELAUNCH);
> +        if (old_state != FAILOVER_STATUS_HANDLING) {
> +            error_report("Unknow error while do failover for secondary VM,"
> +                         "old_state: %d", old_state);

Typo: 'Unknown' and it would be good to say it was during vmstate_loading.

The state is being loaded from the qemu buffer, not the real file descriptor,
so we're guaranteed that the vmstate will finish loading; so yes, this is OK.

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>


> +        }
> +        return;
> +    }
> +
>      migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
>                        MIGRATION_STATUS_COMPLETED);
>  
> @@ -535,13 +550,23 @@ void *colo_process_incoming_thread(void *opaque)
>  
>          qemu_mutex_lock_iothread();
>          qemu_system_reset(VMRESET_SILENT);
> +        vmstate_loading = true;
>          if (qemu_loadvm_state(fb) < 0) {
>              error_report("COLO: loadvm failed");
> +            vmstate_loading = false;
>              qemu_mutex_unlock_iothread();
>              goto out;
>          }
> +
> +        vmstate_loading = false;
>          qemu_mutex_unlock_iothread();
>  
> +        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
> +            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
> +            failover_request_active(NULL);
> +            goto out;
> +        }
> +
>          ret = colo_put_cmd(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED);
>          if (ret < 0) {
>              goto out;
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Dec. 25, 2015, 1:02 a.m. UTC | #2
On 2015/12/15 18:21, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> We should not do failover work while the main thread is loading
>> VM's state, otherwise it will destroy the consistent of VM's memory and
>> device state.
>>
>> Here we add a new failover status 'RELAUNCH' which means we should
>> relaunch the process of failover.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> ---
>>   include/migration/failover.h |  2 ++
>>   migration/colo.c             | 25 +++++++++++++++++++++++++
>>   2 files changed, 27 insertions(+)
>>
>> diff --git a/include/migration/failover.h b/include/migration/failover.h
>> index fba3931..e115d25 100644
>> --- a/include/migration/failover.h
>> +++ b/include/migration/failover.h
>> @@ -20,6 +20,8 @@ typedef enum COLOFailoverStatus {
>>       FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
>>       FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
>>       FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
>> +    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
>> +    FAILOVER_STATUS_RELAUNCH = 4,
>>   } COLOFailoverStatus;
>>
>>   void failover_init_state(void);
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 58531e7..f4bb661 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -20,6 +20,8 @@
>>   #include "migration/failover.h"
>>   #include "qapi-event.h"
>>
>> +static bool vmstate_loading;
>> +
>>   /* colo buffer */
>>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>
>> @@ -52,6 +54,19 @@ static void secondary_vm_do_failover(void)
>>       int old_state;
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>
>> +    /* Can not do failover during the process of VM's loading VMstate, Or
>> +      * it will break the secondary VM.
>> +      */
>> +    if (vmstate_loading) {
>> +        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
>> +                                       FAILOVER_STATUS_RELAUNCH);
>> +        if (old_state != FAILOVER_STATUS_HANDLING) {
>> +            error_report("Unknow error while do failover for secondary VM,"
>> +                         "old_state: %d", old_state);
>
> Typo: 'Unknown' and it would be good to say it was during vmstate_loading.
>
> The state is being loaded from the qemu buffer, not the real file descriptor,
> so we're guaranteed that the vmstate will finish loading; so yes, this is OK.
>

I will fix it in next version.

Thanks.
Hailiang
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>
>
>> +        }
>> +        return;
>> +    }
>> +
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
>>                         MIGRATION_STATUS_COMPLETED);
>>
>> @@ -535,13 +550,23 @@ void *colo_process_incoming_thread(void *opaque)
>>
>>           qemu_mutex_lock_iothread();
>>           qemu_system_reset(VMRESET_SILENT);
>> +        vmstate_loading = true;
>>           if (qemu_loadvm_state(fb) < 0) {
>>               error_report("COLO: loadvm failed");
>> +            vmstate_loading = false;
>>               qemu_mutex_unlock_iothread();
>>               goto out;
>>           }
>> +
>> +        vmstate_loading = false;
>>           qemu_mutex_unlock_iothread();
>>
>> +        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
>> +            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
>> +            failover_request_active(NULL);
>> +            goto out;
>> +        }
>> +
>>           ret = colo_put_cmd(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED);
>>           if (ret < 0) {
>>               goto out;
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/include/migration/failover.h b/include/migration/failover.h
index fba3931..e115d25 100644
--- a/include/migration/failover.h
+++ b/include/migration/failover.h
@@ -20,6 +20,8 @@  typedef enum COLOFailoverStatus {
     FAILOVER_STATUS_REQUEST = 1, /* Request but not handled */
     FAILOVER_STATUS_HANDLING = 2, /* In the process of handling failover */
     FAILOVER_STATUS_COMPLETED = 3, /* Finish the failover process */
+    /* Optional, Relaunch the failover process, again 'NONE' -> 'COMPLETED' */
+    FAILOVER_STATUS_RELAUNCH = 4,
 } COLOFailoverStatus;
 
 void failover_init_state(void);
diff --git a/migration/colo.c b/migration/colo.c
index 58531e7..f4bb661 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -20,6 +20,8 @@ 
 #include "migration/failover.h"
 #include "qapi-event.h"
 
+static bool vmstate_loading;
+
 /* colo buffer */
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 
@@ -52,6 +54,19 @@  static void secondary_vm_do_failover(void)
     int old_state;
     MigrationIncomingState *mis = migration_incoming_get_current();
 
+    /* Can not do failover during the process of VM's loading VMstate, Or
+      * it will break the secondary VM.
+      */
+    if (vmstate_loading) {
+        old_state = failover_set_state(FAILOVER_STATUS_HANDLING,
+                                       FAILOVER_STATUS_RELAUNCH);
+        if (old_state != FAILOVER_STATUS_HANDLING) {
+            error_report("Unknow error while do failover for secondary VM,"
+                         "old_state: %d", old_state);
+        }
+        return;
+    }
+
     migrate_set_state(&mis->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
@@ -535,13 +550,23 @@  void *colo_process_incoming_thread(void *opaque)
 
         qemu_mutex_lock_iothread();
         qemu_system_reset(VMRESET_SILENT);
+        vmstate_loading = true;
         if (qemu_loadvm_state(fb) < 0) {
             error_report("COLO: loadvm failed");
+            vmstate_loading = false;
             qemu_mutex_unlock_iothread();
             goto out;
         }
+
+        vmstate_loading = false;
         qemu_mutex_unlock_iothread();
 
+        if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) {
+            failover_set_state(FAILOVER_STATUS_RELAUNCH, FAILOVER_STATUS_NONE);
+            failover_request_active(NULL);
+            goto out;
+        }
+
         ret = colo_put_cmd(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED);
         if (ret < 0) {
             goto out;