diff mbox

[COLO-Frame,v10,14/38] COLO: Load VMState into qsb before restore it

Message ID 1446551816-15768-15-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Nov. 3, 2015, 11:56 a.m. UTC
We should not destroy the state of SVM (Secondary VM) until we receive the whole
state from the PVM (Primary VM), in case the primary fails in the middle of sending
the state, so, here we cache the device state in Secondary before restore it.

Besides, we should call qemu_system_reset() before load VM state,
which can ensure the data is intact.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
---
 migration/colo.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert Nov. 13, 2015, 4:02 p.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> We should not destroy the state of SVM (Secondary VM) until we receive the whole
> state from the PVM (Primary VM), in case the primary fails in the middle of sending
> the state, so, here we cache the device state in Secondary before restore it.
> 
> Besides, we should call qemu_system_reset() before load VM state,
> which can ensure the data is intact.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> ---
>  migration/colo.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 25f85b2..1339774 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -287,6 +287,9 @@ static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request)
>  void *colo_process_incoming_thread(void *opaque)
>  {
>      MigrationIncomingState *mis = opaque;
> +    QEMUFile *fb = NULL;
> +    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
> +    int  total_size;
>      int fd, ret = 0;
>  
>      migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
> @@ -310,6 +313,12 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  
> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
> +    if (buffer == NULL) {
> +        error_report("Failed to allocate colo buffer!");
> +        goto out;
> +    }
> +
>      ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0);
>      if (ret < 0) {
>          goto out;
> @@ -337,19 +346,50 @@ void *colo_process_incoming_thread(void *opaque)
>              goto out;
>          }
>  
> -        /* TODO Load VM state */
> +        /* read the VM state total size first */
> +        total_size = colo_ctl_get(mis->from_src_file,
> +                                  COLO_COMMAND_VMSTATE_SIZE);
> +        if (total_size <= 0) {

Error message?

> +            goto out;
> +        }

OK, and when you fix up the colo_ctl_get in the previous patch to
take a separate pointer for value, you can make total_size a size_t.


Other than those, it looks good.

Dave

> +        /* read vm device state into colo buffer */
> +        ret = qsb_fill_buffer(buffer, mis->from_src_file, total_size);
> +        if (ret != total_size) {
> +            error_report("can't get all migration data");
> +            goto out;
> +        }
>  
>          ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0);
>          if (ret < 0) {
>              goto out;
>          }
>  
> +        /* open colo buffer for read */
> +        fb = qemu_bufopen("r", buffer);
> +        if (!fb) {
> +            error_report("can't open colo buffer for read");
> +            goto out;
> +        }
> +
> +        qemu_mutex_lock_iothread();
> +        qemu_system_reset(VMRESET_SILENT);
> +        if (qemu_loadvm_state(fb) < 0) {
> +            error_report("COLO: loadvm failed");
> +            qemu_mutex_unlock_iothread();
> +            goto out;
> +        }
> +        qemu_mutex_unlock_iothread();
> +
>          /* TODO: flush vm state */
>  
>          ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0);
>          if (ret < 0) {
>              goto out;
>          }
> +
> +        qemu_fclose(fb);
> +        fb = NULL;
>      }
>  
>  out:
> @@ -358,6 +398,11 @@ out:
>                       strerror(-ret));
>      }
>  
> +    if (fb) {
> +        qemu_fclose(fb);
> +    }
> +    qsb_free(buffer);
> +
>      qemu_mutex_lock_iothread();
>      colo_release_ram_cache();
>      qemu_mutex_unlock_iothread();
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Nov. 16, 2015, 8:46 a.m. UTC | #2
On 2015/11/14 0:02, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> We should not destroy the state of SVM (Secondary VM) until we receive the whole
>> state from the PVM (Primary VM), in case the primary fails in the middle of sending
>> the state, so, here we cache the device state in Secondary before restore it.
>>
>> Besides, we should call qemu_system_reset() before load VM state,
>> which can ensure the data is intact.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> ---
>>   migration/colo.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 46 insertions(+), 1 deletion(-)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 25f85b2..1339774 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -287,6 +287,9 @@ static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request)
>>   void *colo_process_incoming_thread(void *opaque)
>>   {
>>       MigrationIncomingState *mis = opaque;
>> +    QEMUFile *fb = NULL;
>> +    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
>> +    int  total_size;
>>       int fd, ret = 0;
>>
>>       migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
>> @@ -310,6 +313,12 @@ void *colo_process_incoming_thread(void *opaque)
>>           goto out;
>>       }
>>
>> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
>> +    if (buffer == NULL) {
>> +        error_report("Failed to allocate colo buffer!");
>> +        goto out;
>> +    }
>> +
>>       ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0);
>>       if (ret < 0) {
>>           goto out;
>> @@ -337,19 +346,50 @@ void *colo_process_incoming_thread(void *opaque)
>>               goto out;
>>           }
>>
>> -        /* TODO Load VM state */
>> +        /* read the VM state total size first */
>> +        total_size = colo_ctl_get(mis->from_src_file,
>> +                                  COLO_COMMAND_VMSTATE_SIZE);
>> +        if (total_size <= 0) {
>
> Error message?
>

OK, we need one.

>> +            goto out;
>> +        }
>
> OK, and when you fix up the colo_ctl_get in the previous patch to
> take a separate pointer for value, you can make total_size a size_t.
>

Yes, i have updated it after addressing your review comment on patch 11.

>
> Other than those, it looks good.
>

Thanks.

> Dave
>
>> +        /* read vm device state into colo buffer */
>> +        ret = qsb_fill_buffer(buffer, mis->from_src_file, total_size);
>> +        if (ret != total_size) {
>> +            error_report("can't get all migration data");
>> +            goto out;
>> +        }
>>
>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0);
>>           if (ret < 0) {
>>               goto out;
>>           }
>>
>> +        /* open colo buffer for read */
>> +        fb = qemu_bufopen("r", buffer);
>> +        if (!fb) {
>> +            error_report("can't open colo buffer for read");
>> +            goto out;
>> +        }
>> +
>> +        qemu_mutex_lock_iothread();
>> +        qemu_system_reset(VMRESET_SILENT);
>> +        if (qemu_loadvm_state(fb) < 0) {
>> +            error_report("COLO: loadvm failed");
>> +            qemu_mutex_unlock_iothread();
>> +            goto out;
>> +        }
>> +        qemu_mutex_unlock_iothread();
>> +
>>           /* TODO: flush vm state */
>>
>>           ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0);
>>           if (ret < 0) {
>>               goto out;
>>           }
>> +
>> +        qemu_fclose(fb);
>> +        fb = NULL;
>>       }
>>
>>   out:
>> @@ -358,6 +398,11 @@ out:
>>                        strerror(-ret));
>>       }
>>
>> +    if (fb) {
>> +        qemu_fclose(fb);
>> +    }
>> +    qsb_free(buffer);
>> +
>>       qemu_mutex_lock_iothread();
>>       colo_release_ram_cache();
>>       qemu_mutex_unlock_iothread();
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/migration/colo.c b/migration/colo.c
index 25f85b2..1339774 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -287,6 +287,9 @@  static int colo_wait_handle_cmd(QEMUFile *f, int *checkpoint_request)
 void *colo_process_incoming_thread(void *opaque)
 {
     MigrationIncomingState *mis = opaque;
+    QEMUFile *fb = NULL;
+    QEMUSizedBuffer *buffer = NULL; /* Cache incoming device state */
+    int  total_size;
     int fd, ret = 0;
 
     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
@@ -310,6 +313,12 @@  void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 
+    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
+    if (buffer == NULL) {
+        error_report("Failed to allocate colo buffer!");
+        goto out;
+    }
+
     ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_CHECKPOINT_READY, 0);
     if (ret < 0) {
         goto out;
@@ -337,19 +346,50 @@  void *colo_process_incoming_thread(void *opaque)
             goto out;
         }
 
-        /* TODO Load VM state */
+        /* read the VM state total size first */
+        total_size = colo_ctl_get(mis->from_src_file,
+                                  COLO_COMMAND_VMSTATE_SIZE);
+        if (total_size <= 0) {
+            goto out;
+        }
+
+        /* read vm device state into colo buffer */
+        ret = qsb_fill_buffer(buffer, mis->from_src_file, total_size);
+        if (ret != total_size) {
+            error_report("can't get all migration data");
+            goto out;
+        }
 
         ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_RECEIVED, 0);
         if (ret < 0) {
             goto out;
         }
 
+        /* open colo buffer for read */
+        fb = qemu_bufopen("r", buffer);
+        if (!fb) {
+            error_report("can't open colo buffer for read");
+            goto out;
+        }
+
+        qemu_mutex_lock_iothread();
+        qemu_system_reset(VMRESET_SILENT);
+        if (qemu_loadvm_state(fb) < 0) {
+            error_report("COLO: loadvm failed");
+            qemu_mutex_unlock_iothread();
+            goto out;
+        }
+        qemu_mutex_unlock_iothread();
+
         /* TODO: flush vm state */
 
         ret = colo_ctl_put(mis->to_src_file, COLO_COMMAND_VMSTATE_LOADED, 0);
         if (ret < 0) {
             goto out;
         }
+
+        qemu_fclose(fb);
+        fb = NULL;
     }
 
 out:
@@ -358,6 +398,11 @@  out:
                      strerror(-ret));
     }
 
+    if (fb) {
+        qemu_fclose(fb);
+    }
+    qsb_free(buffer);
+
     qemu_mutex_lock_iothread();
     colo_release_ram_cache();
     qemu_mutex_unlock_iothread();