diff mbox

[COLO-Frame,v10,12/38] COLO: Save PVM state to secondary side when do checkpoint

Message ID 1446551816-15768-13-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Nov. 3, 2015, 11:56 a.m. UTC
The main process of checkpoint is to synchronize SVM with PVM.
VM's state includes ram and device state. So we will migrate PVM's
state to SVM when do checkpoint, just like migration does.

We will cache PVM's state in slave, we use QEMUSizedBuffer
to store the data, we need to know the size of VM state, so in master,
we use qsb to store VM state temporarily, get the data size by call qsb_get_length()
and then migrate the data to the qsb in the secondary side.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 migration/colo.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
 migration/savevm.c |  2 +-
 3 files changed, 101 insertions(+), 16 deletions(-)

Comments

Dr. David Alan Gilbert Nov. 6, 2015, 6:59 p.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> The main process of checkpoint is to synchronize SVM with PVM.
> VM's state includes ram and device state. So we will migrate PVM's
> state to SVM when do checkpoint, just like migration does.
> 
> We will cache PVM's state in slave, we use QEMUSizedBuffer
> to store the data, we need to know the size of VM state, so in master,
> we use qsb to store VM state temporarily, get the data size by call qsb_get_length()
> and then migrate the data to the qsb in the secondary side.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
>  migration/colo.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>  migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
>  migration/savevm.c |  2 +-
>  3 files changed, 101 insertions(+), 16 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 2510762..b865513 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -17,6 +17,9 @@
>  #include "qemu/error-report.h"
>  #include "qemu/sockets.h"
>  
> +/* colo buffer */
> +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> +
>  bool colo_supported(void)
>  {
>      return true;
> @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require)
>      return value;
>  }
>  
> -static int colo_do_checkpoint_transaction(MigrationState *s)
> +static int colo_do_checkpoint_transaction(MigrationState *s,
> +                                          QEMUSizedBuffer *buffer)
>  {
>      int ret;
> +    size_t size;
> +    QEMUFile *trans = NULL;
>  
>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
>      if (ret < 0) {
> @@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>      if (ret < 0) {
>          goto out;
>      }
> +    /* Reset colo buffer and open it for write */
> +    qsb_set_length(buffer, 0);
> +    trans = qemu_bufopen("w", buffer);
> +    if (!trans) {
> +        error_report("Open colo buffer for write failed");
> +        goto out;
> +    }
>  
> -    /* TODO: suspend and save vm state to colo buffer */
> +    qemu_mutex_lock_iothread();
> +    vm_stop_force_state(RUN_STATE_COLO);
> +    qemu_mutex_unlock_iothread();
> +    trace_colo_vm_state_change("run", "stop");
> +
> +    /* Disable block migration */
> +    s->params.blk = 0;
> +    s->params.shared = 0;
> +    qemu_savevm_state_header(trans);
> +    qemu_savevm_state_begin(trans, &s->params);
> +    qemu_mutex_lock_iothread();
> +    qemu_savevm_state_complete(trans);
> +    qemu_mutex_unlock_iothread();
> +
> +    qemu_fflush(trans);
>  
>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
>      if (ret < 0) {
>          goto out;
>      }
> +    /* we send the total size of the vmstate first */
> +    size = qsb_get_length(buffer);
> +    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
> +    if (ret < 0) {
> +        goto out;
> +    }
>  
> -    /* TODO: send vmstate to Secondary */
> +    qsb_put_buffer(s->to_dst_file, buffer, size);
> +    qemu_fflush(s->to_dst_file);
> +    ret = qemu_file_get_error(s->to_dst_file);
> +    if (ret < 0) {
> +        goto out;
> +    }
>  
>      ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
>      if (ret < 0) {
> @@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>          goto out;
>      }
>  
> -    /* TODO: resume Primary */
> +    ret = 0;
> +    /* resume master */
> +    qemu_mutex_lock_iothread();
> +    vm_start();
> +    qemu_mutex_unlock_iothread();
> +    trace_colo_vm_state_change("stop", "run");
>  
>  out:
> +    if (trans) {
> +        qemu_fclose(trans);
> +    }
> +
>      return ret;
>  }
>  
>  static void colo_process_checkpoint(MigrationState *s)
>  {
> +    QEMUSizedBuffer *buffer = NULL;
>      int fd, ret = 0;
>  
>      /* Dup the fd of to_dst_file */
> @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s)
>          goto out;
>      }
>  
> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
> +    if (buffer == NULL) {
> +        ret = -ENOMEM;
> +        error_report("Failed to allocate buffer!");

Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers.

> +        goto out;
> +    }
> +
>      qemu_mutex_lock_iothread();
>      vm_start();
>      qemu_mutex_unlock_iothread();
> @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s)
>  
>      while (s->state == MIGRATION_STATUS_COLO) {
>          /* start a colo checkpoint */
> -        ret = colo_do_checkpoint_transaction(s);
> +        ret = colo_do_checkpoint_transaction(s, buffer);
>          if (ret < 0) {
>              goto out;
>          }
> @@ -179,6 +234,9 @@ out:
>      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>                        MIGRATION_STATUS_COMPLETED);
>  
> +    qsb_free(buffer);
> +    buffer = NULL;
> +
>      if (s->from_dst_file) {
>          qemu_fclose(s->from_dst_file);
>      }
> diff --git a/migration/ram.c b/migration/ram.c
> index a25bcc7..5784c15 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -38,6 +38,7 @@
>  #include "trace.h"
>  #include "exec/ram_addr.h"
>  #include "qemu/rcu_queue.h"
> +#include "migration/colo.h"
>  
>  #ifdef DEBUG_MIGRATION_RAM
>  #define DPRINTF(fmt, ...) \
> @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
>      }
>  }
>  
> -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> - * long-running RCU critical section.  When rcu-reclaims in the code
> - * start to become numerous it will be necessary to reduce the
> - * granularity of these critical sections.
> - */
> -
> -static int ram_save_setup(QEMUFile *f, void *opaque)
> +static int ram_save_init_globals(void)
>  {
> -    RAMBlock *block;
>      int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
>  
>      dirty_rate_high_cnt = 0;
> @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>      migration_bitmap_sync();
>      qemu_mutex_unlock_ramlist();
>      qemu_mutex_unlock_iothread();
> +    rcu_read_unlock();
> +
> +    return 0;
> +}

It surprises me you want migration_bitmap_sync in ram_save_init_globals(),
but I guess you want the first sync at the start.

> +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> + * long-running RCU critical section.  When rcu-reclaims in the code
> + * start to become numerous it will be necessary to reduce the
> + * granularity of these critical sections.
> + */
> +
> +static int ram_save_setup(QEMUFile *f, void *opaque)
> +{
> +    RAMBlock *block;
> +
> +    /*
> +     * migration has already setup the bitmap, reuse it.
> +     */
> +    if (!migration_in_colo_state()) {
> +        if (ram_save_init_globals() < 0) {
> +            return -1;
> +         }
> +    }
> +
> +    rcu_read_lock();
>  
>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>  
> @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      while (true) {
>          int pages;
>  
> -        pages = ram_find_and_save_block(f, true, &bytes_transferred);
> +        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
> +                                        &bytes_transferred);
>          /* no more blocks to sent */
>          if (pages == 0) {
>              break;
> @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>  
>      rcu_read_unlock();
> +    /*
> +     * Since we need to reuse dirty bitmap in colo,
> +     * don't cleanup the bitmap.
> +     */
> +    if (!migrate_colo_enabled() ||
> +        migration_has_failed(migrate_get_current())) {
> +        migration_end();
> +    }
>  
> -    migration_end();
>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>  
>      return 0;
> diff --git a/migration/savevm.c b/migration/savevm.c
> index dbcc39a..0faf12b 100644
> --- a/migration/savevm.c
> +++ b/migration/savevm.c
> @@ -48,7 +48,7 @@
>  #include "qemu/iov.h"
>  #include "block/snapshot.h"
>  #include "block/qapi.h"
> -
> +#include "migration/colo.h"
>  
>  #ifndef ETH_P_RARP
>  #define ETH_P_RARP 0x8035

Wrong patch?


So other than those minor things:

Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>

but watch out for the recent changes to migrate_end that went in
a few days ago.

Dave

> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Nov. 9, 2015, 9:17 a.m. UTC | #2
On 2015/11/7 2:59, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> The main process of checkpoint is to synchronize SVM with PVM.
>> VM's state includes ram and device state. So we will migrate PVM's
>> state to SVM when do checkpoint, just like migration does.
>>
>> We will cache PVM's state in slave, we use QEMUSizedBuffer
>> to store the data, we need to know the size of VM state, so in master,
>> we use qsb to store VM state temporarily, get the data size by call qsb_get_length()
>> and then migrate the data to the qsb in the secondary side.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> ---
>>   migration/colo.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>>   migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
>>   migration/savevm.c |  2 +-
>>   3 files changed, 101 insertions(+), 16 deletions(-)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 2510762..b865513 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -17,6 +17,9 @@
>>   #include "qemu/error-report.h"
>>   #include "qemu/sockets.h"
>>
>> +/* colo buffer */
>> +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>> +
>>   bool colo_supported(void)
>>   {
>>       return true;
>> @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require)
>>       return value;
>>   }
>>
>> -static int colo_do_checkpoint_transaction(MigrationState *s)
>> +static int colo_do_checkpoint_transaction(MigrationState *s,
>> +                                          QEMUSizedBuffer *buffer)
>>   {
>>       int ret;
>> +    size_t size;
>> +    QEMUFile *trans = NULL;
>>
>>       ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
>>       if (ret < 0) {
>> @@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>>       if (ret < 0) {
>>           goto out;
>>       }
>> +    /* Reset colo buffer and open it for write */
>> +    qsb_set_length(buffer, 0);
>> +    trans = qemu_bufopen("w", buffer);
>> +    if (!trans) {
>> +        error_report("Open colo buffer for write failed");
>> +        goto out;
>> +    }
>>
>> -    /* TODO: suspend and save vm state to colo buffer */
>> +    qemu_mutex_lock_iothread();
>> +    vm_stop_force_state(RUN_STATE_COLO);
>> +    qemu_mutex_unlock_iothread();
>> +    trace_colo_vm_state_change("run", "stop");
>> +
>> +    /* Disable block migration */
>> +    s->params.blk = 0;
>> +    s->params.shared = 0;
>> +    qemu_savevm_state_header(trans);
>> +    qemu_savevm_state_begin(trans, &s->params);
>> +    qemu_mutex_lock_iothread();
>> +    qemu_savevm_state_complete(trans);
>> +    qemu_mutex_unlock_iothread();
>> +
>> +    qemu_fflush(trans);
>>
>>       ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
>>       if (ret < 0) {
>>           goto out;
>>       }
>> +    /* we send the total size of the vmstate first */
>> +    size = qsb_get_length(buffer);
>> +    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
>> +    if (ret < 0) {
>> +        goto out;
>> +    }
>>
>> -    /* TODO: send vmstate to Secondary */
>> +    qsb_put_buffer(s->to_dst_file, buffer, size);
>> +    qemu_fflush(s->to_dst_file);
>> +    ret = qemu_file_get_error(s->to_dst_file);
>> +    if (ret < 0) {
>> +        goto out;
>> +    }
>>
>>       ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
>>       if (ret < 0) {
>> @@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>>           goto out;
>>       }
>>
>> -    /* TODO: resume Primary */
>> +    ret = 0;
>> +    /* resume master */
>> +    qemu_mutex_lock_iothread();
>> +    vm_start();
>> +    qemu_mutex_unlock_iothread();
>> +    trace_colo_vm_state_change("stop", "run");
>>
>>   out:
>> +    if (trans) {
>> +        qemu_fclose(trans);
>> +    }
>> +
>>       return ret;
>>   }
>>
>>   static void colo_process_checkpoint(MigrationState *s)
>>   {
>> +    QEMUSizedBuffer *buffer = NULL;
>>       int fd, ret = 0;
>>
>>       /* Dup the fd of to_dst_file */
>> @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s)
>>           goto out;
>>       }
>>
>> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
>> +    if (buffer == NULL) {
>> +        ret = -ENOMEM;
>> +        error_report("Failed to allocate buffer!");
>
> Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers.
>

OK, will fix it in next version.

>> +        goto out;
>> +    }
>> +
>>       qemu_mutex_lock_iothread();
>>       vm_start();
>>       qemu_mutex_unlock_iothread();
>> @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s)
>>
>>       while (s->state == MIGRATION_STATUS_COLO) {
>>           /* start a colo checkpoint */
>> -        ret = colo_do_checkpoint_transaction(s);
>> +        ret = colo_do_checkpoint_transaction(s, buffer);
>>           if (ret < 0) {
>>               goto out;
>>           }
>> @@ -179,6 +234,9 @@ out:
>>       migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>>                         MIGRATION_STATUS_COMPLETED);
>>
>> +    qsb_free(buffer);
>> +    buffer = NULL;
>> +
>>       if (s->from_dst_file) {
>>           qemu_fclose(s->from_dst_file);
>>       }
>> diff --git a/migration/ram.c b/migration/ram.c
>> index a25bcc7..5784c15 100644
>> --- a/migration/ram.c
>> +++ b/migration/ram.c
>> @@ -38,6 +38,7 @@
>>   #include "trace.h"
>>   #include "exec/ram_addr.h"
>>   #include "qemu/rcu_queue.h"
>> +#include "migration/colo.h"
>>
>>   #ifdef DEBUG_MIGRATION_RAM
>>   #define DPRINTF(fmt, ...) \
>> @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
>>       }
>>   }
>>
>> -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>> - * long-running RCU critical section.  When rcu-reclaims in the code
>> - * start to become numerous it will be necessary to reduce the
>> - * granularity of these critical sections.
>> - */
>> -
>> -static int ram_save_setup(QEMUFile *f, void *opaque)
>> +static int ram_save_init_globals(void)
>>   {
>> -    RAMBlock *block;
>>       int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
>>
>>       dirty_rate_high_cnt = 0;
>> @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>       migration_bitmap_sync();
>>       qemu_mutex_unlock_ramlist();
>>       qemu_mutex_unlock_iothread();
>> +    rcu_read_unlock();
>> +
>> +    return 0;
>> +}
>
> It surprises me you want migration_bitmap_sync in ram_save_init_globals(),
> but I guess you want the first sync at the start.
>

Er, sorry,i don't quite understand.
Here. I just split part codes of ram_save_setup()
into a helper function ram_save_init_global(), to make it more clear.
We can't do initial work for twice. Is there any thing wrong ?


>> +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>> + * long-running RCU critical section.  When rcu-reclaims in the code
>> + * start to become numerous it will be necessary to reduce the
>> + * granularity of these critical sections.
>> + */
>> +
>> +static int ram_save_setup(QEMUFile *f, void *opaque)
>> +{
>> +    RAMBlock *block;
>> +
>> +    /*
>> +     * migration has already setup the bitmap, reuse it.
>> +     */
>> +    if (!migration_in_colo_state()) {
>> +        if (ram_save_init_globals() < 0) {
>> +            return -1;
>> +         }
>> +    }
>> +
>> +    rcu_read_lock();
>>
>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>
>> @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>       while (true) {
>>           int pages;
>>
>> -        pages = ram_find_and_save_block(f, true, &bytes_transferred);
>> +        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
>> +                                        &bytes_transferred);
>>           /* no more blocks to sent */
>>           if (pages == 0) {
>>               break;
>> @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>>
>>       rcu_read_unlock();
>> +    /*
>> +     * Since we need to reuse dirty bitmap in colo,
>> +     * don't cleanup the bitmap.
>> +     */
>> +    if (!migrate_colo_enabled() ||
>> +        migration_has_failed(migrate_get_current())) {
>> +        migration_end();
>> +    }
>>
>> -    migration_end();
>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>
>>       return 0;
>> diff --git a/migration/savevm.c b/migration/savevm.c
>> index dbcc39a..0faf12b 100644
>> --- a/migration/savevm.c
>> +++ b/migration/savevm.c
>> @@ -48,7 +48,7 @@
>>   #include "qemu/iov.h"
>>   #include "block/snapshot.h"
>>   #include "block/qapi.h"
>> -
>> +#include "migration/colo.h"
>>
>>   #ifndef ETH_P_RARP
>>   #define ETH_P_RARP 0x8035
>
> Wrong patch?
>

No, we have call migration_in_colo_state() in qemu_savevm_state_begin().
So we have to include "migration/colo.h"

>
> So other than those minor things:
>
> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>
> but watch out for the recent changes to migrate_end that went in
> a few days ago.
>

Thanks for reminding me, i have rebased that. ;)

zhanghailiang

> Dave
>
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
Dr. David Alan Gilbert Nov. 13, 2015, 6:53 p.m. UTC | #3
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> On 2015/11/7 2:59, Dr. David Alan Gilbert wrote:
> >* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> >>The main process of checkpoint is to synchronize SVM with PVM.
> >>VM's state includes ram and device state. So we will migrate PVM's
> >>state to SVM when do checkpoint, just like migration does.
> >>
> >>We will cache PVM's state in slave, we use QEMUSizedBuffer
> >>to store the data, we need to know the size of VM state, so in master,
> >>we use qsb to store VM state temporarily, get the data size by call qsb_get_length()
> >>and then migrate the data to the qsb in the secondary side.
> >>
> >>Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> >>Signed-off-by: Gonglei <arei.gonglei@huawei.com>
> >>Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> >>---
> >>  migration/colo.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++----
> >>  migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
> >>  migration/savevm.c |  2 +-
> >>  3 files changed, 101 insertions(+), 16 deletions(-)
> >>
> >>diff --git a/migration/colo.c b/migration/colo.c
> >>index 2510762..b865513 100644
> >>--- a/migration/colo.c
> >>+++ b/migration/colo.c
> >>@@ -17,6 +17,9 @@
> >>  #include "qemu/error-report.h"
> >>  #include "qemu/sockets.h"
> >>
> >>+/* colo buffer */
> >>+#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> >>+
> >>  bool colo_supported(void)
> >>  {
> >>      return true;
> >>@@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require)
> >>      return value;
> >>  }
> >>
> >>-static int colo_do_checkpoint_transaction(MigrationState *s)
> >>+static int colo_do_checkpoint_transaction(MigrationState *s,
> >>+                                          QEMUSizedBuffer *buffer)
> >>  {
> >>      int ret;
> >>+    size_t size;
> >>+    QEMUFile *trans = NULL;
> >>
> >>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
> >>      if (ret < 0) {
> >>@@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
> >>      if (ret < 0) {
> >>          goto out;
> >>      }
> >>+    /* Reset colo buffer and open it for write */
> >>+    qsb_set_length(buffer, 0);
> >>+    trans = qemu_bufopen("w", buffer);
> >>+    if (!trans) {
> >>+        error_report("Open colo buffer for write failed");
> >>+        goto out;
> >>+    }
> >>
> >>-    /* TODO: suspend and save vm state to colo buffer */
> >>+    qemu_mutex_lock_iothread();
> >>+    vm_stop_force_state(RUN_STATE_COLO);
> >>+    qemu_mutex_unlock_iothread();
> >>+    trace_colo_vm_state_change("run", "stop");
> >>+
> >>+    /* Disable block migration */
> >>+    s->params.blk = 0;
> >>+    s->params.shared = 0;
> >>+    qemu_savevm_state_header(trans);
> >>+    qemu_savevm_state_begin(trans, &s->params);
> >>+    qemu_mutex_lock_iothread();
> >>+    qemu_savevm_state_complete(trans);
> >>+    qemu_mutex_unlock_iothread();
> >>+
> >>+    qemu_fflush(trans);
> >>
> >>      ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
> >>      if (ret < 0) {
> >>          goto out;
> >>      }
> >>+    /* we send the total size of the vmstate first */
> >>+    size = qsb_get_length(buffer);
> >>+    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
> >>+    if (ret < 0) {
> >>+        goto out;
> >>+    }
> >>
> >>-    /* TODO: send vmstate to Secondary */
> >>+    qsb_put_buffer(s->to_dst_file, buffer, size);
> >>+    qemu_fflush(s->to_dst_file);
> >>+    ret = qemu_file_get_error(s->to_dst_file);
> >>+    if (ret < 0) {
> >>+        goto out;
> >>+    }
> >>
> >>      ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
> >>      if (ret < 0) {
> >>@@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
> >>          goto out;
> >>      }
> >>
> >>-    /* TODO: resume Primary */
> >>+    ret = 0;
> >>+    /* resume master */
> >>+    qemu_mutex_lock_iothread();
> >>+    vm_start();
> >>+    qemu_mutex_unlock_iothread();
> >>+    trace_colo_vm_state_change("stop", "run");
> >>
> >>  out:
> >>+    if (trans) {
> >>+        qemu_fclose(trans);
> >>+    }
> >>+
> >>      return ret;
> >>  }
> >>
> >>  static void colo_process_checkpoint(MigrationState *s)
> >>  {
> >>+    QEMUSizedBuffer *buffer = NULL;
> >>      int fd, ret = 0;
> >>
> >>      /* Dup the fd of to_dst_file */
> >>@@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s)
> >>          goto out;
> >>      }
> >>
> >>+    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
> >>+    if (buffer == NULL) {
> >>+        ret = -ENOMEM;
> >>+        error_report("Failed to allocate buffer!");
> >
> >Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers.
> >
> 
> OK, will fix it in next version.
> 
> >>+        goto out;
> >>+    }
> >>+
> >>      qemu_mutex_lock_iothread();
> >>      vm_start();
> >>      qemu_mutex_unlock_iothread();
> >>@@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s)
> >>
> >>      while (s->state == MIGRATION_STATUS_COLO) {
> >>          /* start a colo checkpoint */
> >>-        ret = colo_do_checkpoint_transaction(s);
> >>+        ret = colo_do_checkpoint_transaction(s, buffer);
> >>          if (ret < 0) {
> >>              goto out;
> >>          }
> >>@@ -179,6 +234,9 @@ out:
> >>      migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
> >>                        MIGRATION_STATUS_COMPLETED);
> >>
> >>+    qsb_free(buffer);
> >>+    buffer = NULL;
> >>+
> >>      if (s->from_dst_file) {
> >>          qemu_fclose(s->from_dst_file);
> >>      }
> >>diff --git a/migration/ram.c b/migration/ram.c
> >>index a25bcc7..5784c15 100644
> >>--- a/migration/ram.c
> >>+++ b/migration/ram.c
> >>@@ -38,6 +38,7 @@
> >>  #include "trace.h"
> >>  #include "exec/ram_addr.h"
> >>  #include "qemu/rcu_queue.h"
> >>+#include "migration/colo.h"
> >>
> >>  #ifdef DEBUG_MIGRATION_RAM
> >>  #define DPRINTF(fmt, ...) \
> >>@@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
> >>      }
> >>  }
> >>
> >>-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> >>- * long-running RCU critical section.  When rcu-reclaims in the code
> >>- * start to become numerous it will be necessary to reduce the
> >>- * granularity of these critical sections.
> >>- */
> >>-
> >>-static int ram_save_setup(QEMUFile *f, void *opaque)
> >>+static int ram_save_init_globals(void)
> >>  {
> >>-    RAMBlock *block;
> >>      int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
> >>
> >>      dirty_rate_high_cnt = 0;
> >>@@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
> >>      migration_bitmap_sync();
> >>      qemu_mutex_unlock_ramlist();
> >>      qemu_mutex_unlock_iothread();
> >>+    rcu_read_unlock();
> >>+
> >>+    return 0;
> >>+}
> >
> >It surprises me you want migration_bitmap_sync in ram_save_init_globals(),
> >but I guess you want the first sync at the start.
> >
> 
> Er, sorry,i don't quite understand.
> Here. I just split part codes of ram_save_setup()
> into a helper function ram_save_init_global(), to make it more clear.
> We can't do initial work for twice. Is there any thing wrong ?

No, that's OK - it just seemed odd for a function like 'init_globals'
to do such a big side effect of doing the sync; but yes, it makes sense
since it's just a split.

> >>+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
> >>+ * long-running RCU critical section.  When rcu-reclaims in the code
> >>+ * start to become numerous it will be necessary to reduce the
> >>+ * granularity of these critical sections.
> >>+ */
> >>+
> >>+static int ram_save_setup(QEMUFile *f, void *opaque)
> >>+{
> >>+    RAMBlock *block;
> >>+
> >>+    /*
> >>+     * migration has already setup the bitmap, reuse it.
> >>+     */
> >>+    if (!migration_in_colo_state()) {
> >>+        if (ram_save_init_globals() < 0) {
> >>+            return -1;
> >>+         }
> >>+    }
> >>+
> >>+    rcu_read_lock();
> >>
> >>      qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
> >>
> >>@@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
> >>      while (true) {
> >>          int pages;
> >>
> >>-        pages = ram_find_and_save_block(f, true, &bytes_transferred);
> >>+        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
> >>+                                        &bytes_transferred);
> >>          /* no more blocks to sent */
> >>          if (pages == 0) {
> >>              break;
> >>@@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
> >>      ram_control_after_iterate(f, RAM_CONTROL_FINISH);
> >>
> >>      rcu_read_unlock();
> >>+    /*
> >>+     * Since we need to reuse dirty bitmap in colo,
> >>+     * don't cleanup the bitmap.
> >>+     */
> >>+    if (!migrate_colo_enabled() ||
> >>+        migration_has_failed(migrate_get_current())) {
> >>+        migration_end();
> >>+    }
> >>
> >>-    migration_end();
> >>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> >>
> >>      return 0;
> >>diff --git a/migration/savevm.c b/migration/savevm.c
> >>index dbcc39a..0faf12b 100644
> >>--- a/migration/savevm.c
> >>+++ b/migration/savevm.c
> >>@@ -48,7 +48,7 @@
> >>  #include "qemu/iov.h"
> >>  #include "block/snapshot.h"
> >>  #include "block/qapi.h"
> >>-
> >>+#include "migration/colo.h"
> >>
> >>  #ifndef ETH_P_RARP
> >>  #define ETH_P_RARP 0x8035
> >
> >Wrong patch?
> >
> 
> No, we have call migration_in_colo_state() in qemu_savevm_state_begin().
> So we have to include "migration/colo.h"

I don't think you use it in savevm.c until patch 30, so you can add
the #include in patch 30 (or whichever is the patch that first needs it).

Dave


> 
> >
> >So other than those minor things:
> >
> >Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >
> >but watch out for the recent changes to migrate_end that went in
> >a few days ago.
> >
> 
> Thanks for reminding me, i have rebased that. ;)
> 
> zhanghailiang
> 
> >Dave
> >
> >>--
> >>1.8.3.1
> >>
> >>
> >--
> >Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> >.
> >
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Nov. 17, 2015, 10:20 a.m. UTC | #4
On 2015/11/14 2:53, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> On 2015/11/7 2:59, Dr. David Alan Gilbert wrote:
>>> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>>>> The main process of checkpoint is to synchronize SVM with PVM.
>>>> VM's state includes ram and device state. So we will migrate PVM's
>>>> state to SVM when do checkpoint, just like migration does.
>>>>
>>>> We will cache PVM's state in slave, we use QEMUSizedBuffer
>>>> to store the data, we need to know the size of VM state, so in master,
>>>> we use qsb to store VM state temporarily, get the data size by call qsb_get_length()
>>>> and then migrate the data to the qsb in the secondary side.
>>>>
>>>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>>>> Signed-off-by: Gonglei <arei.gonglei@huawei.com>
>>>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>>>> ---
>>>>   migration/colo.c   | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++----
>>>>   migration/ram.c    | 47 +++++++++++++++++++++++++++++--------
>>>>   migration/savevm.c |  2 +-
>>>>   3 files changed, 101 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/migration/colo.c b/migration/colo.c
>>>> index 2510762..b865513 100644
>>>> --- a/migration/colo.c
>>>> +++ b/migration/colo.c
>>>> @@ -17,6 +17,9 @@
>>>>   #include "qemu/error-report.h"
>>>>   #include "qemu/sockets.h"
>>>>
>>>> +/* colo buffer */
>>>> +#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>>> +
>>>>   bool colo_supported(void)
>>>>   {
>>>>       return true;
>>>> @@ -94,9 +97,12 @@ static int colo_ctl_get(QEMUFile *f, uint32_t require)
>>>>       return value;
>>>>   }
>>>>
>>>> -static int colo_do_checkpoint_transaction(MigrationState *s)
>>>> +static int colo_do_checkpoint_transaction(MigrationState *s,
>>>> +                                          QEMUSizedBuffer *buffer)
>>>>   {
>>>>       int ret;
>>>> +    size_t size;
>>>> +    QEMUFile *trans = NULL;
>>>>
>>>>       ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
>>>>       if (ret < 0) {
>>>> @@ -107,15 +113,47 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>>>>       if (ret < 0) {
>>>>           goto out;
>>>>       }
>>>> +    /* Reset colo buffer and open it for write */
>>>> +    qsb_set_length(buffer, 0);
>>>> +    trans = qemu_bufopen("w", buffer);
>>>> +    if (!trans) {
>>>> +        error_report("Open colo buffer for write failed");
>>>> +        goto out;
>>>> +    }
>>>>
>>>> -    /* TODO: suspend and save vm state to colo buffer */
>>>> +    qemu_mutex_lock_iothread();
>>>> +    vm_stop_force_state(RUN_STATE_COLO);
>>>> +    qemu_mutex_unlock_iothread();
>>>> +    trace_colo_vm_state_change("run", "stop");
>>>> +
>>>> +    /* Disable block migration */
>>>> +    s->params.blk = 0;
>>>> +    s->params.shared = 0;
>>>> +    qemu_savevm_state_header(trans);
>>>> +    qemu_savevm_state_begin(trans, &s->params);
>>>> +    qemu_mutex_lock_iothread();
>>>> +    qemu_savevm_state_complete(trans);
>>>> +    qemu_mutex_unlock_iothread();
>>>> +
>>>> +    qemu_fflush(trans);
>>>>
>>>>       ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
>>>>       if (ret < 0) {
>>>>           goto out;
>>>>       }
>>>> +    /* we send the total size of the vmstate first */
>>>> +    size = qsb_get_length(buffer);
>>>> +    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
>>>> +    if (ret < 0) {
>>>> +        goto out;
>>>> +    }
>>>>
>>>> -    /* TODO: send vmstate to Secondary */
>>>> +    qsb_put_buffer(s->to_dst_file, buffer, size);
>>>> +    qemu_fflush(s->to_dst_file);
>>>> +    ret = qemu_file_get_error(s->to_dst_file);
>>>> +    if (ret < 0) {
>>>> +        goto out;
>>>> +    }
>>>>
>>>>       ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
>>>>       if (ret < 0) {
>>>> @@ -127,14 +165,24 @@ static int colo_do_checkpoint_transaction(MigrationState *s)
>>>>           goto out;
>>>>       }
>>>>
>>>> -    /* TODO: resume Primary */
>>>> +    ret = 0;
>>>> +    /* resume master */
>>>> +    qemu_mutex_lock_iothread();
>>>> +    vm_start();
>>>> +    qemu_mutex_unlock_iothread();
>>>> +    trace_colo_vm_state_change("stop", "run");
>>>>
>>>>   out:
>>>> +    if (trans) {
>>>> +        qemu_fclose(trans);
>>>> +    }
>>>> +
>>>>       return ret;
>>>>   }
>>>>
>>>>   static void colo_process_checkpoint(MigrationState *s)
>>>>   {
>>>> +    QEMUSizedBuffer *buffer = NULL;
>>>>       int fd, ret = 0;
>>>>
>>>>       /* Dup the fd of to_dst_file */
>>>> @@ -159,6 +207,13 @@ static void colo_process_checkpoint(MigrationState *s)
>>>>           goto out;
>>>>       }
>>>>
>>>> +    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
>>>> +    if (buffer == NULL) {
>>>> +        ret = -ENOMEM;
>>>> +        error_report("Failed to allocate buffer!");
>>>
>>> Please say 'Failed to allocate colo buffer'; QEMU has lots and lots of buffers.
>>>
>>
>> OK, will fix it in next version.
>>
>>>> +        goto out;
>>>> +    }
>>>> +
>>>>       qemu_mutex_lock_iothread();
>>>>       vm_start();
>>>>       qemu_mutex_unlock_iothread();
>>>> @@ -166,7 +221,7 @@ static void colo_process_checkpoint(MigrationState *s)
>>>>
>>>>       while (s->state == MIGRATION_STATUS_COLO) {
>>>>           /* start a colo checkpoint */
>>>> -        ret = colo_do_checkpoint_transaction(s);
>>>> +        ret = colo_do_checkpoint_transaction(s, buffer);
>>>>           if (ret < 0) {
>>>>               goto out;
>>>>           }
>>>> @@ -179,6 +234,9 @@ out:
>>>>       migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
>>>>                         MIGRATION_STATUS_COMPLETED);
>>>>
>>>> +    qsb_free(buffer);
>>>> +    buffer = NULL;
>>>> +
>>>>       if (s->from_dst_file) {
>>>>           qemu_fclose(s->from_dst_file);
>>>>       }
>>>> diff --git a/migration/ram.c b/migration/ram.c
>>>> index a25bcc7..5784c15 100644
>>>> --- a/migration/ram.c
>>>> +++ b/migration/ram.c
>>>> @@ -38,6 +38,7 @@
>>>>   #include "trace.h"
>>>>   #include "exec/ram_addr.h"
>>>>   #include "qemu/rcu_queue.h"
>>>> +#include "migration/colo.h"
>>>>
>>>>   #ifdef DEBUG_MIGRATION_RAM
>>>>   #define DPRINTF(fmt, ...) \
>>>> @@ -1165,15 +1166,8 @@ void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
>>>>       }
>>>>   }
>>>>
>>>> -/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>>>> - * long-running RCU critical section.  When rcu-reclaims in the code
>>>> - * start to become numerous it will be necessary to reduce the
>>>> - * granularity of these critical sections.
>>>> - */
>>>> -
>>>> -static int ram_save_setup(QEMUFile *f, void *opaque)
>>>> +static int ram_save_init_globals(void)
>>>>   {
>>>> -    RAMBlock *block;
>>>>       int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
>>>>
>>>>       dirty_rate_high_cnt = 0;
>>>> @@ -1233,6 +1227,31 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
>>>>       migration_bitmap_sync();
>>>>       qemu_mutex_unlock_ramlist();
>>>>       qemu_mutex_unlock_iothread();
>>>> +    rcu_read_unlock();
>>>> +
>>>> +    return 0;
>>>> +}
>>>
>>> It surprises me you want migration_bitmap_sync in ram_save_init_globals(),
>>> but I guess you want the first sync at the start.
>>>
>>
>> Er, sorry,i don't quite understand.
>> Here. I just split part codes of ram_save_setup()
>> into a helper function ram_save_init_global(), to make it more clear.
>> We can't do initial work for twice. Is there any thing wrong ?
>
> No, that's OK - it just seemed odd for a function like 'init_globals'
> to do such a big side effect of doing the sync; but yes, it makes sense
> since it's just a split.
>
>>>> +/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
>>>> + * long-running RCU critical section.  When rcu-reclaims in the code
>>>> + * start to become numerous it will be necessary to reduce the
>>>> + * granularity of these critical sections.
>>>> + */
>>>> +
>>>> +static int ram_save_setup(QEMUFile *f, void *opaque)
>>>> +{
>>>> +    RAMBlock *block;
>>>> +
>>>> +    /*
>>>> +     * migration has already setup the bitmap, reuse it.
>>>> +     */
>>>> +    if (!migration_in_colo_state()) {
>>>> +        if (ram_save_init_globals() < 0) {
>>>> +            return -1;
>>>> +         }
>>>> +    }
>>>> +
>>>> +    rcu_read_lock();
>>>>
>>>>       qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
>>>>
>>>> @@ -1332,7 +1351,8 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>>>       while (true) {
>>>>           int pages;
>>>>
>>>> -        pages = ram_find_and_save_block(f, true, &bytes_transferred);
>>>> +        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
>>>> +                                        &bytes_transferred);
>>>>           /* no more blocks to sent */
>>>>           if (pages == 0) {
>>>>               break;
>>>> @@ -1343,8 +1363,15 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
>>>>       ram_control_after_iterate(f, RAM_CONTROL_FINISH);
>>>>
>>>>       rcu_read_unlock();
>>>> +    /*
>>>> +     * Since we need to reuse dirty bitmap in colo,
>>>> +     * don't cleanup the bitmap.
>>>> +     */
>>>> +    if (!migrate_colo_enabled() ||
>>>> +        migration_has_failed(migrate_get_current())) {
>>>> +        migration_end();
>>>> +    }
>>>>
>>>> -    migration_end();
>>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>>>
>>>>       return 0;
>>>> diff --git a/migration/savevm.c b/migration/savevm.c
>>>> index dbcc39a..0faf12b 100644
>>>> --- a/migration/savevm.c
>>>> +++ b/migration/savevm.c
>>>> @@ -48,7 +48,7 @@
>>>>   #include "qemu/iov.h"
>>>>   #include "block/snapshot.h"
>>>>   #include "block/qapi.h"
>>>> -
>>>> +#include "migration/colo.h"
>>>>
>>>>   #ifndef ETH_P_RARP
>>>>   #define ETH_P_RARP 0x8035
>>>
>>> Wrong patch?
>>>
>>
>> No, we have call migration_in_colo_state() in qemu_savevm_state_begin().
>> So we have to include "migration/colo.h"
>
> I don't think you use it in savevm.c until patch 30, so you can add
> the #include in patch 30 (or whichever is the patch that first needs it).
>

Ha, I know what you mean. And yes, you are right,
we shouldn't call migration_in_colo_state() in qemu_savevm_state_begin() here.
Good catch. I will fix it in next version. Thanks.


> Dave
>
>
>>
>>>
>>> So other than those minor things:
>>>
>>> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>
>>> but watch out for the recent changes to migrate_end that went in
>>> a few days ago.
>>>
>>
>> Thanks for reminding me, i have rebased that. ;)
>>
>> zhanghailiang
>>
>>> Dave
>>>
>>>> --
>>>> 1.8.3.1
>>>>
>>>>
>>> --
>>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>>>
>>> .
>>>
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/migration/colo.c b/migration/colo.c
index 2510762..b865513 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -17,6 +17,9 @@ 
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 
+/* colo buffer */
+#define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
+
 bool colo_supported(void)
 {
     return true;
@@ -94,9 +97,12 @@  static int colo_ctl_get(QEMUFile *f, uint32_t require)
     return value;
 }
 
-static int colo_do_checkpoint_transaction(MigrationState *s)
+static int colo_do_checkpoint_transaction(MigrationState *s,
+                                          QEMUSizedBuffer *buffer)
 {
     int ret;
+    size_t size;
+    QEMUFile *trans = NULL;
 
     ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_CHECKPOINT_REQUEST, 0);
     if (ret < 0) {
@@ -107,15 +113,47 @@  static int colo_do_checkpoint_transaction(MigrationState *s)
     if (ret < 0) {
         goto out;
     }
+    /* Reset colo buffer and open it for write */
+    qsb_set_length(buffer, 0);
+    trans = qemu_bufopen("w", buffer);
+    if (!trans) {
+        error_report("Open colo buffer for write failed");
+        goto out;
+    }
 
-    /* TODO: suspend and save vm state to colo buffer */
+    qemu_mutex_lock_iothread();
+    vm_stop_force_state(RUN_STATE_COLO);
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("run", "stop");
+
+    /* Disable block migration */
+    s->params.blk = 0;
+    s->params.shared = 0;
+    qemu_savevm_state_header(trans);
+    qemu_savevm_state_begin(trans, &s->params);
+    qemu_mutex_lock_iothread();
+    qemu_savevm_state_complete(trans);
+    qemu_mutex_unlock_iothread();
+
+    qemu_fflush(trans);
 
     ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SEND, 0);
     if (ret < 0) {
         goto out;
     }
+    /* we send the total size of the vmstate first */
+    size = qsb_get_length(buffer);
+    ret = colo_ctl_put(s->to_dst_file, COLO_COMMAND_VMSTATE_SIZE, size);
+    if (ret < 0) {
+        goto out;
+    }
 
-    /* TODO: send vmstate to Secondary */
+    qsb_put_buffer(s->to_dst_file, buffer, size);
+    qemu_fflush(s->to_dst_file);
+    ret = qemu_file_get_error(s->to_dst_file);
+    if (ret < 0) {
+        goto out;
+    }
 
     ret = colo_ctl_get(s->from_dst_file, COLO_COMMAND_VMSTATE_RECEIVED);
     if (ret < 0) {
@@ -127,14 +165,24 @@  static int colo_do_checkpoint_transaction(MigrationState *s)
         goto out;
     }
 
-    /* TODO: resume Primary */
+    ret = 0;
+    /* resume master */
+    qemu_mutex_lock_iothread();
+    vm_start();
+    qemu_mutex_unlock_iothread();
+    trace_colo_vm_state_change("stop", "run");
 
 out:
+    if (trans) {
+        qemu_fclose(trans);
+    }
+
     return ret;
 }
 
 static void colo_process_checkpoint(MigrationState *s)
 {
+    QEMUSizedBuffer *buffer = NULL;
     int fd, ret = 0;
 
     /* Dup the fd of to_dst_file */
@@ -159,6 +207,13 @@  static void colo_process_checkpoint(MigrationState *s)
         goto out;
     }
 
+    buffer = qsb_create(NULL, COLO_BUFFER_BASE_SIZE);
+    if (buffer == NULL) {
+        ret = -ENOMEM;
+        error_report("Failed to allocate buffer!");
+        goto out;
+    }
+
     qemu_mutex_lock_iothread();
     vm_start();
     qemu_mutex_unlock_iothread();
@@ -166,7 +221,7 @@  static void colo_process_checkpoint(MigrationState *s)
 
     while (s->state == MIGRATION_STATUS_COLO) {
         /* start a colo checkpoint */
-        ret = colo_do_checkpoint_transaction(s);
+        ret = colo_do_checkpoint_transaction(s, buffer);
         if (ret < 0) {
             goto out;
         }
@@ -179,6 +234,9 @@  out:
     migrate_set_state(&s->state, MIGRATION_STATUS_COLO,
                       MIGRATION_STATUS_COMPLETED);
 
+    qsb_free(buffer);
+    buffer = NULL;
+
     if (s->from_dst_file) {
         qemu_fclose(s->from_dst_file);
     }
diff --git a/migration/ram.c b/migration/ram.c
index a25bcc7..5784c15 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -38,6 +38,7 @@ 
 #include "trace.h"
 #include "exec/ram_addr.h"
 #include "qemu/rcu_queue.h"
+#include "migration/colo.h"
 
 #ifdef DEBUG_MIGRATION_RAM
 #define DPRINTF(fmt, ...) \
@@ -1165,15 +1166,8 @@  void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
     }
 }
 
-/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
- * long-running RCU critical section.  When rcu-reclaims in the code
- * start to become numerous it will be necessary to reduce the
- * granularity of these critical sections.
- */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
+static int ram_save_init_globals(void)
 {
-    RAMBlock *block;
     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
 
     dirty_rate_high_cnt = 0;
@@ -1233,6 +1227,31 @@  static int ram_save_setup(QEMUFile *f, void *opaque)
     migration_bitmap_sync();
     qemu_mutex_unlock_ramlist();
     qemu_mutex_unlock_iothread();
+    rcu_read_unlock();
+
+    return 0;
+}
+
+/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
+ * long-running RCU critical section.  When rcu-reclaims in the code
+ * start to become numerous it will be necessary to reduce the
+ * granularity of these critical sections.
+ */
+
+static int ram_save_setup(QEMUFile *f, void *opaque)
+{
+    RAMBlock *block;
+
+    /*
+     * migration has already setup the bitmap, reuse it.
+     */
+    if (!migration_in_colo_state()) {
+        if (ram_save_init_globals() < 0) {
+            return -1;
+         }
+    }
+
+    rcu_read_lock();
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
@@ -1332,7 +1351,8 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
     while (true) {
         int pages;
 
-        pages = ram_find_and_save_block(f, true, &bytes_transferred);
+        pages = ram_find_and_save_block(f, !migration_in_colo_state(),
+                                        &bytes_transferred);
         /* no more blocks to sent */
         if (pages == 0) {
             break;
@@ -1343,8 +1363,15 @@  static int ram_save_complete(QEMUFile *f, void *opaque)
     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
 
     rcu_read_unlock();
+    /*
+     * Since we need to reuse dirty bitmap in colo,
+     * don't cleanup the bitmap.
+     */
+    if (!migrate_colo_enabled() ||
+        migration_has_failed(migrate_get_current())) {
+        migration_end();
+    }
 
-    migration_end();
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
     return 0;
diff --git a/migration/savevm.c b/migration/savevm.c
index dbcc39a..0faf12b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -48,7 +48,7 @@ 
 #include "qemu/iov.h"
 #include "block/snapshot.h"
 #include "block/qapi.h"
-
+#include "migration/colo.h"
 
 #ifndef ETH_P_RARP
 #define ETH_P_RARP 0x8035