diff mbox

[COLO-Frame,v5,25/29] COLO: Add colo-set-checkpoint-period command

Message ID 1432196001-10352-26-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang May 21, 2015, 8:13 a.m. UTC
With this command, we can control the period of checkpoint, if
there is no comparison of net packets.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 hmp-commands.hx        | 15 +++++++++++++++
 hmp.c                  |  7 +++++++
 hmp.h                  |  1 +
 migration/colo.c       | 11 ++++++++++-
 qapi-schema.json       | 13 +++++++++++++
 qmp-commands.hx        | 22 ++++++++++++++++++++++
 stubs/migration-colo.c |  4 ++++
 7 files changed, 72 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert June 5, 2015, 6:45 p.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> With this command, we can control the period of checkpoint, if
> there is no comparison of net packets.

This should use the MigrationParameter stuff that's gone into qemu recently;
in my local copy of your code I've got this, and your COLO_MIN period, and the 
delay after a miscompare and your live-ram size threshold all wired in as
MigrationParameters; makes it a lot easier to play with the values.

Dave

> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
>  hmp-commands.hx        | 15 +++++++++++++++
>  hmp.c                  |  7 +++++++
>  hmp.h                  |  1 +
>  migration/colo.c       | 11 ++++++++++-
>  qapi-schema.json       | 13 +++++++++++++
>  qmp-commands.hx        | 22 ++++++++++++++++++++++
>  stubs/migration-colo.c |  4 ++++
>  7 files changed, 72 insertions(+), 1 deletion(-)
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index be3e398..32cd548 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1023,6 +1023,21 @@ Tell COLO that heartbeat is lost, a failover or takeover is needed.
>  ETEXI
>  
>      {
> +        .name       = "colo_set_checkpoint_period",
> +        .args_type  = "value:i",
> +        .params     = "value",
> +        .help       = "set checkpoint period (in ms) for colo. "
> +        "Defaults to 100ms",
> +        .mhandler.cmd = hmp_colo_set_checkpoint_period,
> +    },
> +
> +STEXI
> +@item migrate_set_checkpoint_period @var{value}
> +@findex migrate_set_checkpoint_period
> +Set checkpoint period to @var{value} (in ms) for colo.
> +ETEXI
> +
> +    {
>          .name       = "client_migrate_info",
>          .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>          .params     = "protocol hostname port tls-port cert-subject",
> diff --git a/hmp.c b/hmp.c
> index f87fa37..f727686 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -1257,6 +1257,13 @@ void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
>      hmp_handle_error(mon, &err);
>  }
>  
> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict)
> +{
> +    int64_t value = qdict_get_int(qdict, "value");
> +
> +    qmp_colo_set_checkpoint_period(value, NULL);
> +}
> +
>  void hmp_set_password(Monitor *mon, const QDict *qdict)
>  {
>      const char *protocol  = qdict_get_str(qdict, "protocol");
> diff --git a/hmp.h b/hmp.h
> index b6549f8..9570345 100644
> --- a/hmp.h
> +++ b/hmp.h
> @@ -68,6 +68,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
>  void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
>  void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
>  void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict);
>  void hmp_set_password(Monitor *mon, const QDict *qdict);
>  void hmp_expire_password(Monitor *mon, const QDict *qdict);
>  void hmp_eject(Monitor *mon, const QDict *qdict);
> diff --git a/migration/colo.c b/migration/colo.c
> index 195973a..f5fc79c 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -17,6 +17,7 @@
>  #include "qemu/error-report.h"
>  #include "migration/migration-failover.h"
>  #include "net/colo-nic.h"
> +#include "qmp-commands.h"
>  
>  /*
>  * We should not do checkpoint one after another without any time interval,
> @@ -70,6 +71,9 @@ enum {
>  static QEMUBH *colo_bh;
>  static bool vmstate_loading;
>  static Coroutine *colo;
> +
> +int64_t colo_checkpoint_period = CHECKPOINT_MAX_PEROID;
> +
>  /* colo buffer */
>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>  QEMUSizedBuffer *colo_buffer;
> @@ -85,6 +89,11 @@ bool migrate_in_colo_state(void)
>      return (s->state == MIGRATION_STATUS_COLO);
>  }
>  
> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
> +{
> +    colo_checkpoint_period = value;
> +}
> +
>  static bool colo_runstate_is_stopped(void)
>  {
>      return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
> @@ -361,7 +370,7 @@ static void *colo_thread(void *opaque)
>           * and then check if we need checkpoint again.
>           */
>          current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> -        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
> +        if (current_time - checkpoint_time < colo_checkpoint_period) {
>              g_usleep(100000);
>              continue;
>          }
> diff --git a/qapi-schema.json b/qapi-schema.json
> index dc0ee07..62b5cfd 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -653,6 +653,19 @@
>  { 'command': 'colo-lost-heartbeat' }
>  
>  ##
> +# @colo-set-checkpoint-period
> +#
> +# Set colo checkpoint period
> +#
> +# @value: period of colo checkpoint in ms
> +#
> +# Returns: nothing on success
> +#
> +# Since: 2.4
> +##
> +{ 'command': 'colo-set-checkpoint-period', 'data': {'value': 'int'} }
> +
> +##
>  # @MouseInfo:
>  #
>  # Information about a mouse device.
> diff --git a/qmp-commands.hx b/qmp-commands.hx
> index 3813f66..4b16044 100644
> --- a/qmp-commands.hx
> +++ b/qmp-commands.hx
> @@ -800,6 +800,28 @@ Example:
>  EQMP
>  
>      {
> +         .name       = "colo-set-checkpoint-period",
> +         .args_type  = "value:i",
> +         .mhandler.cmd_new = qmp_marshal_input_colo_set_checkpoint_period,
> +    },
> +
> +SQMP
> +colo-set-checkpoint-period
> +--------------------------
> +
> +set checkpoint period
> +
> +Arguments:
> +- "value": checkpoint period
> +
> +Example:
> +
> +-> { "execute": "colo-set-checkpoint-period", "arguments": { "value": "1000" } }
> +<- { "return": {} }
> +
> +EQMP
> +
> +    {
>          .name       = "client_migrate_info",
>          .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>          .params     = "protocol hostname port tls-port cert-subject",
> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
> index 03a395b..d3c9dc4 100644
> --- a/stubs/migration-colo.c
> +++ b/stubs/migration-colo.c
> @@ -52,3 +52,7 @@ void qmp_colo_lost_heartbeat(Error **errp)
>                       " with --enable-colo option in order to support"
>                       " COLO feature");
>  }
> +
> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
> +{
> +}
> -- 
> 1.7.12.4
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang June 9, 2015, 3:28 a.m. UTC | #2
On 2015/6/6 2:45, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> With this command, we can control the period of checkpoint, if
>> there is no comparison of net packets.
>
> This should use the MigrationParameter stuff that's gone into qemu recently;
> in my local copy of your code I've got this, and your COLO_MIN period, and the
> delay after a miscompare and your live-ram size threshold all wired in as
> MigrationParameters; makes it a lot easier to play with the values.

Yes, it is a good idea to use the new command 'migrate-set-parameters' to set all the parameters of COLO
related, but i noticed that this new command was custom-built for compress, not good designed for extension.
The qmp api is defined like bellow:
void qmp_migrate_set_parameters(bool has_compress_level,
                                 int64_t compress_level,
                                 bool has_compress_threads,
                                 int64_t compress_threads,
                                 bool has_decompress_threads,
                                 int64_t decompress_threads, Error **errp)
Maybe we should change it like:

void qmp_migrate_set_parameters(bool has_compress_info,
                                 struct compress_info compress,
				Error **errp)

I will try to fix it like that, and then use it in the COLO.

Thanks,
zhanghailiang

>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> ---
>>   hmp-commands.hx        | 15 +++++++++++++++
>>   hmp.c                  |  7 +++++++
>>   hmp.h                  |  1 +
>>   migration/colo.c       | 11 ++++++++++-
>>   qapi-schema.json       | 13 +++++++++++++
>>   qmp-commands.hx        | 22 ++++++++++++++++++++++
>>   stubs/migration-colo.c |  4 ++++
>>   7 files changed, 72 insertions(+), 1 deletion(-)
>>
>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>> index be3e398..32cd548 100644
>> --- a/hmp-commands.hx
>> +++ b/hmp-commands.hx
>> @@ -1023,6 +1023,21 @@ Tell COLO that heartbeat is lost, a failover or takeover is needed.
>>   ETEXI
>>
>>       {
>> +        .name       = "colo_set_checkpoint_period",
>> +        .args_type  = "value:i",
>> +        .params     = "value",
>> +        .help       = "set checkpoint period (in ms) for colo. "
>> +        "Defaults to 100ms",
>> +        .mhandler.cmd = hmp_colo_set_checkpoint_period,
>> +    },
>> +
>> +STEXI
>> +@item migrate_set_checkpoint_period @var{value}
>> +@findex migrate_set_checkpoint_period
>> +Set checkpoint period to @var{value} (in ms) for colo.
>> +ETEXI
>> +
>> +    {
>>           .name       = "client_migrate_info",
>>           .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>>           .params     = "protocol hostname port tls-port cert-subject",
>> diff --git a/hmp.c b/hmp.c
>> index f87fa37..f727686 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -1257,6 +1257,13 @@ void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
>>       hmp_handle_error(mon, &err);
>>   }
>>
>> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict)
>> +{
>> +    int64_t value = qdict_get_int(qdict, "value");
>> +
>> +    qmp_colo_set_checkpoint_period(value, NULL);
>> +}
>> +
>>   void hmp_set_password(Monitor *mon, const QDict *qdict)
>>   {
>>       const char *protocol  = qdict_get_str(qdict, "protocol");
>> diff --git a/hmp.h b/hmp.h
>> index b6549f8..9570345 100644
>> --- a/hmp.h
>> +++ b/hmp.h
>> @@ -68,6 +68,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
>>   void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
>>   void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
>>   void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
>> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict);
>>   void hmp_set_password(Monitor *mon, const QDict *qdict);
>>   void hmp_expire_password(Monitor *mon, const QDict *qdict);
>>   void hmp_eject(Monitor *mon, const QDict *qdict);
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 195973a..f5fc79c 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -17,6 +17,7 @@
>>   #include "qemu/error-report.h"
>>   #include "migration/migration-failover.h"
>>   #include "net/colo-nic.h"
>> +#include "qmp-commands.h"
>>
>>   /*
>>   * We should not do checkpoint one after another without any time interval,
>> @@ -70,6 +71,9 @@ enum {
>>   static QEMUBH *colo_bh;
>>   static bool vmstate_loading;
>>   static Coroutine *colo;
>> +
>> +int64_t colo_checkpoint_period = CHECKPOINT_MAX_PEROID;
>> +
>>   /* colo buffer */
>>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>   QEMUSizedBuffer *colo_buffer;
>> @@ -85,6 +89,11 @@ bool migrate_in_colo_state(void)
>>       return (s->state == MIGRATION_STATUS_COLO);
>>   }
>>
>> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
>> +{
>> +    colo_checkpoint_period = value;
>> +}
>> +
>>   static bool colo_runstate_is_stopped(void)
>>   {
>>       return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
>> @@ -361,7 +370,7 @@ static void *colo_thread(void *opaque)
>>            * and then check if we need checkpoint again.
>>            */
>>           current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>> -        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>> +        if (current_time - checkpoint_time < colo_checkpoint_period) {
>>               g_usleep(100000);
>>               continue;
>>           }
>> diff --git a/qapi-schema.json b/qapi-schema.json
>> index dc0ee07..62b5cfd 100644
>> --- a/qapi-schema.json
>> +++ b/qapi-schema.json
>> @@ -653,6 +653,19 @@
>>   { 'command': 'colo-lost-heartbeat' }
>>
>>   ##
>> +# @colo-set-checkpoint-period
>> +#
>> +# Set colo checkpoint period
>> +#
>> +# @value: period of colo checkpoint in ms
>> +#
>> +# Returns: nothing on success
>> +#
>> +# Since: 2.4
>> +##
>> +{ 'command': 'colo-set-checkpoint-period', 'data': {'value': 'int'} }
>> +
>> +##
>>   # @MouseInfo:
>>   #
>>   # Information about a mouse device.
>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>> index 3813f66..4b16044 100644
>> --- a/qmp-commands.hx
>> +++ b/qmp-commands.hx
>> @@ -800,6 +800,28 @@ Example:
>>   EQMP
>>
>>       {
>> +         .name       = "colo-set-checkpoint-period",
>> +         .args_type  = "value:i",
>> +         .mhandler.cmd_new = qmp_marshal_input_colo_set_checkpoint_period,
>> +    },
>> +
>> +SQMP
>> +colo-set-checkpoint-period
>> +--------------------------
>> +
>> +set checkpoint period
>> +
>> +Arguments:
>> +- "value": checkpoint period
>> +
>> +Example:
>> +
>> +-> { "execute": "colo-set-checkpoint-period", "arguments": { "value": "1000" } }
>> +<- { "return": {} }
>> +
>> +EQMP
>> +
>> +    {
>>           .name       = "client_migrate_info",
>>           .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>>           .params     = "protocol hostname port tls-port cert-subject",
>> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
>> index 03a395b..d3c9dc4 100644
>> --- a/stubs/migration-colo.c
>> +++ b/stubs/migration-colo.c
>> @@ -52,3 +52,7 @@ void qmp_colo_lost_heartbeat(Error **errp)
>>                        " with --enable-colo option in order to support"
>>                        " COLO feature");
>>   }
>> +
>> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
>> +{
>> +}
>> --
>> 1.7.12.4
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
Dr. David Alan Gilbert June 9, 2015, 8:01 a.m. UTC | #3
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> On 2015/6/6 2:45, Dr. David Alan Gilbert wrote:
> >* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> >>With this command, we can control the period of checkpoint, if
> >>there is no comparison of net packets.
> >
> >This should use the MigrationParameter stuff that's gone into qemu recently;
> >in my local copy of your code I've got this, and your COLO_MIN period, and the
> >delay after a miscompare and your live-ram size threshold all wired in as
> >MigrationParameters; makes it a lot easier to play with the values.
> 
> Yes, it is a good idea to use the new command 'migrate-set-parameters' to set all the parameters of COLO
> related, but i noticed that this new command was custom-built for compress, not good designed for extension.
> The qmp api is defined like bellow:
> void qmp_migrate_set_parameters(bool has_compress_level,
>                                 int64_t compress_level,
>                                 bool has_compress_threads,
>                                 int64_t compress_threads,
>                                 bool has_decompress_threads,
>                                 int64_t decompress_threads, Error **errp)

Yes, I don't like it.

> Maybe we should change it like:
> 
> void qmp_migrate_set_parameters(bool has_compress_info,
>                                 struct compress_info compress,
> 				Error **errp)
> 
> I will try to fix it like that, and then use it in the COLO.

See my thread with Markus and Eric here:
https://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg01709.html

Dave

> 
> Thanks,
> zhanghailiang
> 
> >>Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> >>Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> >>---
> >>  hmp-commands.hx        | 15 +++++++++++++++
> >>  hmp.c                  |  7 +++++++
> >>  hmp.h                  |  1 +
> >>  migration/colo.c       | 11 ++++++++++-
> >>  qapi-schema.json       | 13 +++++++++++++
> >>  qmp-commands.hx        | 22 ++++++++++++++++++++++
> >>  stubs/migration-colo.c |  4 ++++
> >>  7 files changed, 72 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/hmp-commands.hx b/hmp-commands.hx
> >>index be3e398..32cd548 100644
> >>--- a/hmp-commands.hx
> >>+++ b/hmp-commands.hx
> >>@@ -1023,6 +1023,21 @@ Tell COLO that heartbeat is lost, a failover or takeover is needed.
> >>  ETEXI
> >>
> >>      {
> >>+        .name       = "colo_set_checkpoint_period",
> >>+        .args_type  = "value:i",
> >>+        .params     = "value",
> >>+        .help       = "set checkpoint period (in ms) for colo. "
> >>+        "Defaults to 100ms",
> >>+        .mhandler.cmd = hmp_colo_set_checkpoint_period,
> >>+    },
> >>+
> >>+STEXI
> >>+@item migrate_set_checkpoint_period @var{value}
> >>+@findex migrate_set_checkpoint_period
> >>+Set checkpoint period to @var{value} (in ms) for colo.
> >>+ETEXI
> >>+
> >>+    {
> >>          .name       = "client_migrate_info",
> >>          .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
> >>          .params     = "protocol hostname port tls-port cert-subject",
> >>diff --git a/hmp.c b/hmp.c
> >>index f87fa37..f727686 100644
> >>--- a/hmp.c
> >>+++ b/hmp.c
> >>@@ -1257,6 +1257,13 @@ void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
> >>      hmp_handle_error(mon, &err);
> >>  }
> >>
> >>+void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict)
> >>+{
> >>+    int64_t value = qdict_get_int(qdict, "value");
> >>+
> >>+    qmp_colo_set_checkpoint_period(value, NULL);
> >>+}
> >>+
> >>  void hmp_set_password(Monitor *mon, const QDict *qdict)
> >>  {
> >>      const char *protocol  = qdict_get_str(qdict, "protocol");
> >>diff --git a/hmp.h b/hmp.h
> >>index b6549f8..9570345 100644
> >>--- a/hmp.h
> >>+++ b/hmp.h
> >>@@ -68,6 +68,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
> >>  void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
> >>  void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
> >>  void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
> >>+void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict);
> >>  void hmp_set_password(Monitor *mon, const QDict *qdict);
> >>  void hmp_expire_password(Monitor *mon, const QDict *qdict);
> >>  void hmp_eject(Monitor *mon, const QDict *qdict);
> >>diff --git a/migration/colo.c b/migration/colo.c
> >>index 195973a..f5fc79c 100644
> >>--- a/migration/colo.c
> >>+++ b/migration/colo.c
> >>@@ -17,6 +17,7 @@
> >>  #include "qemu/error-report.h"
> >>  #include "migration/migration-failover.h"
> >>  #include "net/colo-nic.h"
> >>+#include "qmp-commands.h"
> >>
> >>  /*
> >>  * We should not do checkpoint one after another without any time interval,
> >>@@ -70,6 +71,9 @@ enum {
> >>  static QEMUBH *colo_bh;
> >>  static bool vmstate_loading;
> >>  static Coroutine *colo;
> >>+
> >>+int64_t colo_checkpoint_period = CHECKPOINT_MAX_PEROID;
> >>+
> >>  /* colo buffer */
> >>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> >>  QEMUSizedBuffer *colo_buffer;
> >>@@ -85,6 +89,11 @@ bool migrate_in_colo_state(void)
> >>      return (s->state == MIGRATION_STATUS_COLO);
> >>  }
> >>
> >>+void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
> >>+{
> >>+    colo_checkpoint_period = value;
> >>+}
> >>+
> >>  static bool colo_runstate_is_stopped(void)
> >>  {
> >>      return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
> >>@@ -361,7 +370,7 @@ static void *colo_thread(void *opaque)
> >>           * and then check if we need checkpoint again.
> >>           */
> >>          current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>-        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
> >>+        if (current_time - checkpoint_time < colo_checkpoint_period) {
> >>              g_usleep(100000);
> >>              continue;
> >>          }
> >>diff --git a/qapi-schema.json b/qapi-schema.json
> >>index dc0ee07..62b5cfd 100644
> >>--- a/qapi-schema.json
> >>+++ b/qapi-schema.json
> >>@@ -653,6 +653,19 @@
> >>  { 'command': 'colo-lost-heartbeat' }
> >>
> >>  ##
> >>+# @colo-set-checkpoint-period
> >>+#
> >>+# Set colo checkpoint period
> >>+#
> >>+# @value: period of colo checkpoint in ms
> >>+#
> >>+# Returns: nothing on success
> >>+#
> >>+# Since: 2.4
> >>+##
> >>+{ 'command': 'colo-set-checkpoint-period', 'data': {'value': 'int'} }
> >>+
> >>+##
> >>  # @MouseInfo:
> >>  #
> >>  # Information about a mouse device.
> >>diff --git a/qmp-commands.hx b/qmp-commands.hx
> >>index 3813f66..4b16044 100644
> >>--- a/qmp-commands.hx
> >>+++ b/qmp-commands.hx
> >>@@ -800,6 +800,28 @@ Example:
> >>  EQMP
> >>
> >>      {
> >>+         .name       = "colo-set-checkpoint-period",
> >>+         .args_type  = "value:i",
> >>+         .mhandler.cmd_new = qmp_marshal_input_colo_set_checkpoint_period,
> >>+    },
> >>+
> >>+SQMP
> >>+colo-set-checkpoint-period
> >>+--------------------------
> >>+
> >>+set checkpoint period
> >>+
> >>+Arguments:
> >>+- "value": checkpoint period
> >>+
> >>+Example:
> >>+
> >>+-> { "execute": "colo-set-checkpoint-period", "arguments": { "value": "1000" } }
> >>+<- { "return": {} }
> >>+
> >>+EQMP
> >>+
> >>+    {
> >>          .name       = "client_migrate_info",
> >>          .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
> >>          .params     = "protocol hostname port tls-port cert-subject",
> >>diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
> >>index 03a395b..d3c9dc4 100644
> >>--- a/stubs/migration-colo.c
> >>+++ b/stubs/migration-colo.c
> >>@@ -52,3 +52,7 @@ void qmp_colo_lost_heartbeat(Error **errp)
> >>                       " with --enable-colo option in order to support"
> >>                       " COLO feature");
> >>  }
> >>+
> >>+void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
> >>+{
> >>+}
> >>--
> >>1.7.12.4
> >>
> >>
> >--
> >Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> >.
> >
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang June 9, 2015, 10:14 a.m. UTC | #4
On 2015/6/9 16:01, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> On 2015/6/6 2:45, Dr. David Alan Gilbert wrote:
>>> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>>>> With this command, we can control the period of checkpoint, if
>>>> there is no comparison of net packets.
>>>
>>> This should use the MigrationParameter stuff that's gone into qemu recently;
>>> in my local copy of your code I've got this, and your COLO_MIN period, and the
>>> delay after a miscompare and your live-ram size threshold all wired in as
>>> MigrationParameters; makes it a lot easier to play with the values.
>>
>> Yes, it is a good idea to use the new command 'migrate-set-parameters' to set all the parameters of COLO
>> related, but i noticed that this new command was custom-built for compress, not good designed for extension.
>> The qmp api is defined like bellow:
>> void qmp_migrate_set_parameters(bool has_compress_level,
>>                                  int64_t compress_level,
>>                                  bool has_compress_threads,
>>                                  int64_t compress_threads,
>>                                  bool has_decompress_threads,
>>                                  int64_t decompress_threads, Error **errp)
>
> Yes, I don't like it.
>
>> Maybe we should change it like:
>>
>> void qmp_migrate_set_parameters(bool has_compress_info,
>>                                  struct compress_info compress,
>> 				Error **errp)
>>
>> I will try to fix it like that, and then use it in the COLO.
>
> See my thread with Markus and Eric here:
> https://lists.nongnu.org/archive/html/qemu-devel/2015-06/msg01709.html

Er, i didn't notice this discussion, and have sent a RFC patch to community,
please ignore. Thanks, :)

>>
>> Thanks,
>> zhanghailiang
>>
>>>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>>>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>>>> ---
>>>>   hmp-commands.hx        | 15 +++++++++++++++
>>>>   hmp.c                  |  7 +++++++
>>>>   hmp.h                  |  1 +
>>>>   migration/colo.c       | 11 ++++++++++-
>>>>   qapi-schema.json       | 13 +++++++++++++
>>>>   qmp-commands.hx        | 22 ++++++++++++++++++++++
>>>>   stubs/migration-colo.c |  4 ++++
>>>>   7 files changed, 72 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hmp-commands.hx b/hmp-commands.hx
>>>> index be3e398..32cd548 100644
>>>> --- a/hmp-commands.hx
>>>> +++ b/hmp-commands.hx
>>>> @@ -1023,6 +1023,21 @@ Tell COLO that heartbeat is lost, a failover or takeover is needed.
>>>>   ETEXI
>>>>
>>>>       {
>>>> +        .name       = "colo_set_checkpoint_period",
>>>> +        .args_type  = "value:i",
>>>> +        .params     = "value",
>>>> +        .help       = "set checkpoint period (in ms) for colo. "
>>>> +        "Defaults to 100ms",
>>>> +        .mhandler.cmd = hmp_colo_set_checkpoint_period,
>>>> +    },
>>>> +
>>>> +STEXI
>>>> +@item migrate_set_checkpoint_period @var{value}
>>>> +@findex migrate_set_checkpoint_period
>>>> +Set checkpoint period to @var{value} (in ms) for colo.
>>>> +ETEXI
>>>> +
>>>> +    {
>>>>           .name       = "client_migrate_info",
>>>>           .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>>>>           .params     = "protocol hostname port tls-port cert-subject",
>>>> diff --git a/hmp.c b/hmp.c
>>>> index f87fa37..f727686 100644
>>>> --- a/hmp.c
>>>> +++ b/hmp.c
>>>> @@ -1257,6 +1257,13 @@ void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
>>>>       hmp_handle_error(mon, &err);
>>>>   }
>>>>
>>>> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict)
>>>> +{
>>>> +    int64_t value = qdict_get_int(qdict, "value");
>>>> +
>>>> +    qmp_colo_set_checkpoint_period(value, NULL);
>>>> +}
>>>> +
>>>>   void hmp_set_password(Monitor *mon, const QDict *qdict)
>>>>   {
>>>>       const char *protocol  = qdict_get_str(qdict, "protocol");
>>>> diff --git a/hmp.h b/hmp.h
>>>> index b6549f8..9570345 100644
>>>> --- a/hmp.h
>>>> +++ b/hmp.h
>>>> @@ -68,6 +68,7 @@ void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
>>>>   void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
>>>>   void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
>>>>   void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
>>>> +void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict);
>>>>   void hmp_set_password(Monitor *mon, const QDict *qdict);
>>>>   void hmp_expire_password(Monitor *mon, const QDict *qdict);
>>>>   void hmp_eject(Monitor *mon, const QDict *qdict);
>>>> diff --git a/migration/colo.c b/migration/colo.c
>>>> index 195973a..f5fc79c 100644
>>>> --- a/migration/colo.c
>>>> +++ b/migration/colo.c
>>>> @@ -17,6 +17,7 @@
>>>>   #include "qemu/error-report.h"
>>>>   #include "migration/migration-failover.h"
>>>>   #include "net/colo-nic.h"
>>>> +#include "qmp-commands.h"
>>>>
>>>>   /*
>>>>   * We should not do checkpoint one after another without any time interval,
>>>> @@ -70,6 +71,9 @@ enum {
>>>>   static QEMUBH *colo_bh;
>>>>   static bool vmstate_loading;
>>>>   static Coroutine *colo;
>>>> +
>>>> +int64_t colo_checkpoint_period = CHECKPOINT_MAX_PEROID;
>>>> +
>>>>   /* colo buffer */
>>>>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>>>   QEMUSizedBuffer *colo_buffer;
>>>> @@ -85,6 +89,11 @@ bool migrate_in_colo_state(void)
>>>>       return (s->state == MIGRATION_STATUS_COLO);
>>>>   }
>>>>
>>>> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
>>>> +{
>>>> +    colo_checkpoint_period = value;
>>>> +}
>>>> +
>>>>   static bool colo_runstate_is_stopped(void)
>>>>   {
>>>>       return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
>>>> @@ -361,7 +370,7 @@ static void *colo_thread(void *opaque)
>>>>            * and then check if we need checkpoint again.
>>>>            */
>>>>           current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>> -        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>>>> +        if (current_time - checkpoint_time < colo_checkpoint_period) {
>>>>               g_usleep(100000);
>>>>               continue;
>>>>           }
>>>> diff --git a/qapi-schema.json b/qapi-schema.json
>>>> index dc0ee07..62b5cfd 100644
>>>> --- a/qapi-schema.json
>>>> +++ b/qapi-schema.json
>>>> @@ -653,6 +653,19 @@
>>>>   { 'command': 'colo-lost-heartbeat' }
>>>>
>>>>   ##
>>>> +# @colo-set-checkpoint-period
>>>> +#
>>>> +# Set colo checkpoint period
>>>> +#
>>>> +# @value: period of colo checkpoint in ms
>>>> +#
>>>> +# Returns: nothing on success
>>>> +#
>>>> +# Since: 2.4
>>>> +##
>>>> +{ 'command': 'colo-set-checkpoint-period', 'data': {'value': 'int'} }
>>>> +
>>>> +##
>>>>   # @MouseInfo:
>>>>   #
>>>>   # Information about a mouse device.
>>>> diff --git a/qmp-commands.hx b/qmp-commands.hx
>>>> index 3813f66..4b16044 100644
>>>> --- a/qmp-commands.hx
>>>> +++ b/qmp-commands.hx
>>>> @@ -800,6 +800,28 @@ Example:
>>>>   EQMP
>>>>
>>>>       {
>>>> +         .name       = "colo-set-checkpoint-period",
>>>> +         .args_type  = "value:i",
>>>> +         .mhandler.cmd_new = qmp_marshal_input_colo_set_checkpoint_period,
>>>> +    },
>>>> +
>>>> +SQMP
>>>> +colo-set-checkpoint-period
>>>> +--------------------------
>>>> +
>>>> +set checkpoint period
>>>> +
>>>> +Arguments:
>>>> +- "value": checkpoint period
>>>> +
>>>> +Example:
>>>> +
>>>> +-> { "execute": "colo-set-checkpoint-period", "arguments": { "value": "1000" } }
>>>> +<- { "return": {} }
>>>> +
>>>> +EQMP
>>>> +
>>>> +    {
>>>>           .name       = "client_migrate_info",
>>>>           .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
>>>>           .params     = "protocol hostname port tls-port cert-subject",
>>>> diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
>>>> index 03a395b..d3c9dc4 100644
>>>> --- a/stubs/migration-colo.c
>>>> +++ b/stubs/migration-colo.c
>>>> @@ -52,3 +52,7 @@ void qmp_colo_lost_heartbeat(Error **errp)
>>>>                        " with --enable-colo option in order to support"
>>>>                        " COLO feature");
>>>>   }
>>>> +
>>>> +void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
>>>> +{
>>>> +}
>>>> --
>>>> 1.7.12.4
>>>>
>>>>
>>> --
>>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>>>
>>> .
>>>
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/hmp-commands.hx b/hmp-commands.hx
index be3e398..32cd548 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1023,6 +1023,21 @@  Tell COLO that heartbeat is lost, a failover or takeover is needed.
 ETEXI
 
     {
+        .name       = "colo_set_checkpoint_period",
+        .args_type  = "value:i",
+        .params     = "value",
+        .help       = "set checkpoint period (in ms) for colo. "
+        "Defaults to 100ms",
+        .mhandler.cmd = hmp_colo_set_checkpoint_period,
+    },
+
+STEXI
+@item migrate_set_checkpoint_period @var{value}
+@findex migrate_set_checkpoint_period
+Set checkpoint period to @var{value} (in ms) for colo.
+ETEXI
+
+    {
         .name       = "client_migrate_info",
         .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
         .params     = "protocol hostname port tls-port cert-subject",
diff --git a/hmp.c b/hmp.c
index f87fa37..f727686 100644
--- a/hmp.c
+++ b/hmp.c
@@ -1257,6 +1257,13 @@  void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &err);
 }
 
+void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict)
+{
+    int64_t value = qdict_get_int(qdict, "value");
+
+    qmp_colo_set_checkpoint_period(value, NULL);
+}
+
 void hmp_set_password(Monitor *mon, const QDict *qdict)
 {
     const char *protocol  = qdict_get_str(qdict, "protocol");
diff --git a/hmp.h b/hmp.h
index b6549f8..9570345 100644
--- a/hmp.h
+++ b/hmp.h
@@ -68,6 +68,7 @@  void hmp_migrate_set_capability(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict);
 void hmp_migrate_set_cache_size(Monitor *mon, const QDict *qdict);
 void hmp_colo_lost_heartbeat(Monitor *mon, const QDict *qdict);
+void hmp_colo_set_checkpoint_period(Monitor *mon, const QDict *qdict);
 void hmp_set_password(Monitor *mon, const QDict *qdict);
 void hmp_expire_password(Monitor *mon, const QDict *qdict);
 void hmp_eject(Monitor *mon, const QDict *qdict);
diff --git a/migration/colo.c b/migration/colo.c
index 195973a..f5fc79c 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -17,6 +17,7 @@ 
 #include "qemu/error-report.h"
 #include "migration/migration-failover.h"
 #include "net/colo-nic.h"
+#include "qmp-commands.h"
 
 /*
 * We should not do checkpoint one after another without any time interval,
@@ -70,6 +71,9 @@  enum {
 static QEMUBH *colo_bh;
 static bool vmstate_loading;
 static Coroutine *colo;
+
+int64_t colo_checkpoint_period = CHECKPOINT_MAX_PEROID;
+
 /* colo buffer */
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 QEMUSizedBuffer *colo_buffer;
@@ -85,6 +89,11 @@  bool migrate_in_colo_state(void)
     return (s->state == MIGRATION_STATUS_COLO);
 }
 
+void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
+{
+    colo_checkpoint_period = value;
+}
+
 static bool colo_runstate_is_stopped(void)
 {
     return runstate_check(RUN_STATE_COLO) || !runstate_is_running();
@@ -361,7 +370,7 @@  static void *colo_thread(void *opaque)
          * and then check if we need checkpoint again.
          */
         current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
-        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
+        if (current_time - checkpoint_time < colo_checkpoint_period) {
             g_usleep(100000);
             continue;
         }
diff --git a/qapi-schema.json b/qapi-schema.json
index dc0ee07..62b5cfd 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -653,6 +653,19 @@ 
 { 'command': 'colo-lost-heartbeat' }
 
 ##
+# @colo-set-checkpoint-period
+#
+# Set colo checkpoint period
+#
+# @value: period of colo checkpoint in ms
+#
+# Returns: nothing on success
+#
+# Since: 2.4
+##
+{ 'command': 'colo-set-checkpoint-period', 'data': {'value': 'int'} }
+
+##
 # @MouseInfo:
 #
 # Information about a mouse device.
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 3813f66..4b16044 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -800,6 +800,28 @@  Example:
 EQMP
 
     {
+         .name       = "colo-set-checkpoint-period",
+         .args_type  = "value:i",
+         .mhandler.cmd_new = qmp_marshal_input_colo_set_checkpoint_period,
+    },
+
+SQMP
+colo-set-checkpoint-period
+--------------------------
+
+set checkpoint period
+
+Arguments:
+- "value": checkpoint period
+
+Example:
+
+-> { "execute": "colo-set-checkpoint-period", "arguments": { "value": "1000" } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "client_migrate_info",
         .args_type  = "protocol:s,hostname:s,port:i?,tls-port:i?,cert-subject:s?",
         .params     = "protocol hostname port tls-port cert-subject",
diff --git a/stubs/migration-colo.c b/stubs/migration-colo.c
index 03a395b..d3c9dc4 100644
--- a/stubs/migration-colo.c
+++ b/stubs/migration-colo.c
@@ -52,3 +52,7 @@  void qmp_colo_lost_heartbeat(Error **errp)
                      " with --enable-colo option in order to support"
                      " COLO feature");
 }
+
+void qmp_colo_set_checkpoint_period(int64_t value, Error **errp)
+{
+}