diff mbox

[COLO-Frame,v10,17/38] COLO: synchronize PVM's state to SVM periodically

Message ID 1446551816-15768-18-git-send-email-zhang.zhanghailiang@huawei.com
State New
Headers show

Commit Message

Zhanghailiang Nov. 3, 2015, 11:56 a.m. UTC
Do checkpoint periodically, the default interval is 200ms.

Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
---
 migration/colo.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

Comments

Dr. David Alan Gilbert Nov. 13, 2015, 6:34 p.m. UTC | #1
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> Do checkpoint periodically, the default interval is 200ms.
> 
> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> ---
>  migration/colo.c | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/migration/colo.c b/migration/colo.c
> index 0efab21..a6791f4 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -11,12 +11,19 @@
>   */
>  
>  #include <unistd.h>
> +#include "qemu/timer.h"
>  #include "sysemu/sysemu.h"
>  #include "migration/colo.h"
>  #include "trace.h"
>  #include "qemu/error-report.h"
>  #include "qemu/sockets.h"
>  
> +/*
> + * checkpoint interval: unit ms
> + * Note: Please change this default value to 10000 when we support hybrid mode.
> + */
> +#define CHECKPOINT_MAX_PEROID 200

Why not put the patch that makes this a configurable parameter before this,
and then we can use it straight away?

>  /* colo buffer */
>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>  
> @@ -183,6 +190,7 @@ out:
>  static void colo_process_checkpoint(MigrationState *s)
>  {
>      QEMUSizedBuffer *buffer = NULL;
> +    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      int fd, ret = 0;
>  
>      /* Dup the fd of to_dst_file */
> @@ -220,11 +228,17 @@ static void colo_process_checkpoint(MigrationState *s)
>      trace_colo_vm_state_change("stop", "run");
>  
>      while (s->state == MIGRATION_STATUS_COLO) {
> +        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> +        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
> +            g_usleep(100000);
> +            continue;
> +        }

I'm a bit concerned at the 100ms wait, when the period is 200ms; 
depending how the times work out, couldn't we end up waiting for just
under 300ms? - that's a big error - and it's even more weird when
we make it configurable later.

I don't think we've got a sleep-until, which is a shame; but how
about something like:

   if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
       int64_t delay_ms;
       delay_ms = CHECKPOINT_MAX_PERIOD - (current_time - checkpoint_time);
       g_usleep (delay_ms * 1000);
   }

Dave

>          /* start a colo checkpoint */
>          ret = colo_do_checkpoint_transaction(s, buffer);
>          if (ret < 0) {
>              goto out;
>          }
> +        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>      }
>  
>  out:
> -- 
> 1.8.3.1
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Nov. 17, 2015, 9:11 a.m. UTC | #2
On 2015/11/14 2:34, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> Do checkpoint periodically, the default interval is 200ms.
>>
>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>> ---
>>   migration/colo.c | 14 ++++++++++++++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/migration/colo.c b/migration/colo.c
>> index 0efab21..a6791f4 100644
>> --- a/migration/colo.c
>> +++ b/migration/colo.c
>> @@ -11,12 +11,19 @@
>>    */
>>
>>   #include <unistd.h>
>> +#include "qemu/timer.h"
>>   #include "sysemu/sysemu.h"
>>   #include "migration/colo.h"
>>   #include "trace.h"
>>   #include "qemu/error-report.h"
>>   #include "qemu/sockets.h"
>>
>> +/*
>> + * checkpoint interval: unit ms
>> + * Note: Please change this default value to 10000 when we support hybrid mode.
>> + */
>> +#define CHECKPOINT_MAX_PEROID 200
>
> Why not put the patch that makes this a configurable parameter before this,
> and then we can use it straight away?
>

Do you mean setting this value by command  "migrate_set_parameter" ?
I have realized it in patch 26.

>>   /* colo buffer */
>>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>
>> @@ -183,6 +190,7 @@ out:
>>   static void colo_process_checkpoint(MigrationState *s)
>>   {
>>       QEMUSizedBuffer *buffer = NULL;
>> +    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>       int fd, ret = 0;
>>
>>       /* Dup the fd of to_dst_file */
>> @@ -220,11 +228,17 @@ static void colo_process_checkpoint(MigrationState *s)
>>       trace_colo_vm_state_change("stop", "run");
>>
>>       while (s->state == MIGRATION_STATUS_COLO) {
>> +        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>> +        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>> +            g_usleep(100000);
>> +            continue;
>> +        }
>
> I'm a bit concerned at the 100ms wait, when the period is 200ms;
> depending how the times work out, couldn't we end up waiting for just
> under 300ms? - that's a big error - and it's even more weird when
> we make it configurable later.
>

Agreed.

> I don't think we've got a sleep-until, which is a shame; but how
> about something like:
>
>     if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>         int64_t delay_ms;
>         delay_ms = CHECKPOINT_MAX_PERIOD - (current_time - checkpoint_time);
>         g_usleep (delay_ms * 1000);
>     }
>

That's a reasonable modification. I will fix it like that in next version.

Thanks,
zhanghailiang

> Dave
>
>>           /* start a colo checkpoint */
>>           ret = colo_do_checkpoint_transaction(s, buffer);
>>           if (ret < 0) {
>>               goto out;
>>           }
>> +        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>       }
>>
>>   out:
>> --
>> 1.8.3.1
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
Dr. David Alan Gilbert Nov. 17, 2015, 10:08 a.m. UTC | #3
* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> On 2015/11/14 2:34, Dr. David Alan Gilbert wrote:
> >* zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
> >>Do checkpoint periodically, the default interval is 200ms.
> >>
> >>Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
> >>Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
> >>---
> >>  migration/colo.c | 14 ++++++++++++++
> >>  1 file changed, 14 insertions(+)
> >>
> >>diff --git a/migration/colo.c b/migration/colo.c
> >>index 0efab21..a6791f4 100644
> >>--- a/migration/colo.c
> >>+++ b/migration/colo.c
> >>@@ -11,12 +11,19 @@
> >>   */
> >>
> >>  #include <unistd.h>
> >>+#include "qemu/timer.h"
> >>  #include "sysemu/sysemu.h"
> >>  #include "migration/colo.h"
> >>  #include "trace.h"
> >>  #include "qemu/error-report.h"
> >>  #include "qemu/sockets.h"
> >>
> >>+/*
> >>+ * checkpoint interval: unit ms
> >>+ * Note: Please change this default value to 10000 when we support hybrid mode.
> >>+ */
> >>+#define CHECKPOINT_MAX_PEROID 200
> >
> >Why not put the patch that makes this a configurable parameter before this,
> >and then we can use it straight away?
> >
> 
> Do you mean setting this value by command  "migrate_set_parameter" ?
> I have realized it in patch 26.

Yes, I mean reorder the patch series; put the migrate_set_parameter addition
before this patch, and then use it straight away.

Dave

> >>  /* colo buffer */
> >>  #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
> >>
> >>@@ -183,6 +190,7 @@ out:
> >>  static void colo_process_checkpoint(MigrationState *s)
> >>  {
> >>      QEMUSizedBuffer *buffer = NULL;
> >>+    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>      int fd, ret = 0;
> >>
> >>      /* Dup the fd of to_dst_file */
> >>@@ -220,11 +228,17 @@ static void colo_process_checkpoint(MigrationState *s)
> >>      trace_colo_vm_state_change("stop", "run");
> >>
> >>      while (s->state == MIGRATION_STATUS_COLO) {
> >>+        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>+        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
> >>+            g_usleep(100000);
> >>+            continue;
> >>+        }
> >
> >I'm a bit concerned at the 100ms wait, when the period is 200ms;
> >depending how the times work out, couldn't we end up waiting for just
> >under 300ms? - that's a big error - and it's even more weird when
> >we make it configurable later.
> >
> 
> Agreed.
> 
> >I don't think we've got a sleep-until, which is a shame; but how
> >about something like:
> >
> >    if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
> >        int64_t delay_ms;
> >        delay_ms = CHECKPOINT_MAX_PERIOD - (current_time - checkpoint_time);
> >        g_usleep (delay_ms * 1000);
> >    }
> >
> 
> That's a reasonable modification. I will fix it like that in next version.
> 
> Thanks,
> zhanghailiang
> 
> >Dave
> >
> >>          /* start a colo checkpoint */
> >>          ret = colo_do_checkpoint_transaction(s, buffer);
> >>          if (ret < 0) {
> >>              goto out;
> >>          }
> >>+        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
> >>      }
> >>
> >>  out:
> >>--
> >>1.8.3.1
> >>
> >>
> >--
> >Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
> >
> >.
> >
> 
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Zhanghailiang Nov. 17, 2015, 10:29 a.m. UTC | #4
On 2015/11/17 18:08, Dr. David Alan Gilbert wrote:
> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>> On 2015/11/14 2:34, Dr. David Alan Gilbert wrote:
>>> * zhanghailiang (zhang.zhanghailiang@huawei.com) wrote:
>>>> Do checkpoint periodically, the default interval is 200ms.
>>>>
>>>> Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
>>>> Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
>>>> ---
>>>>   migration/colo.c | 14 ++++++++++++++
>>>>   1 file changed, 14 insertions(+)
>>>>
>>>> diff --git a/migration/colo.c b/migration/colo.c
>>>> index 0efab21..a6791f4 100644
>>>> --- a/migration/colo.c
>>>> +++ b/migration/colo.c
>>>> @@ -11,12 +11,19 @@
>>>>    */
>>>>
>>>>   #include <unistd.h>
>>>> +#include "qemu/timer.h"
>>>>   #include "sysemu/sysemu.h"
>>>>   #include "migration/colo.h"
>>>>   #include "trace.h"
>>>>   #include "qemu/error-report.h"
>>>>   #include "qemu/sockets.h"
>>>>
>>>> +/*
>>>> + * checkpoint interval: unit ms
>>>> + * Note: Please change this default value to 10000 when we support hybrid mode.
>>>> + */
>>>> +#define CHECKPOINT_MAX_PEROID 200
>>>
>>> Why not put the patch that makes this a configurable parameter before this,
>>> and then we can use it straight away?
>>>
>>
>> Do you mean setting this value by command  "migrate_set_parameter" ?
>> I have realized it in patch 26.
>
> Yes, I mean reorder the patch series; put the migrate_set_parameter addition
> before this patch, and then use it straight away.

OK, i will reorder them, thanks.

>
>>>>   /* colo buffer */
>>>>   #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
>>>>
>>>> @@ -183,6 +190,7 @@ out:
>>>>   static void colo_process_checkpoint(MigrationState *s)
>>>>   {
>>>>       QEMUSizedBuffer *buffer = NULL;
>>>> +    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>       int fd, ret = 0;
>>>>
>>>>       /* Dup the fd of to_dst_file */
>>>> @@ -220,11 +228,17 @@ static void colo_process_checkpoint(MigrationState *s)
>>>>       trace_colo_vm_state_change("stop", "run");
>>>>
>>>>       while (s->state == MIGRATION_STATUS_COLO) {
>>>> +        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>> +        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>>>> +            g_usleep(100000);
>>>> +            continue;
>>>> +        }
>>>
>>> I'm a bit concerned at the 100ms wait, when the period is 200ms;
>>> depending how the times work out, couldn't we end up waiting for just
>>> under 300ms? - that's a big error - and it's even more weird when
>>> we make it configurable later.
>>>
>>
>> Agreed.
>>
>>> I don't think we've got a sleep-until, which is a shame; but how
>>> about something like:
>>>
>>>     if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
>>>         int64_t delay_ms;
>>>         delay_ms = CHECKPOINT_MAX_PERIOD - (current_time - checkpoint_time);
>>>         g_usleep (delay_ms * 1000);
>>>     }
>>>
>>
>> That's a reasonable modification. I will fix it like that in next version.
>>
>> Thanks,
>> zhanghailiang
>>
>>> Dave
>>>
>>>>           /* start a colo checkpoint */
>>>>           ret = colo_do_checkpoint_transaction(s, buffer);
>>>>           if (ret < 0) {
>>>>               goto out;
>>>>           }
>>>> +        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
>>>>       }
>>>>
>>>>   out:
>>>> --
>>>> 1.8.3.1
>>>>
>>>>
>>> --
>>> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>>>
>>> .
>>>
>>
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
> .
>
diff mbox

Patch

diff --git a/migration/colo.c b/migration/colo.c
index 0efab21..a6791f4 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -11,12 +11,19 @@ 
  */
 
 #include <unistd.h>
+#include "qemu/timer.h"
 #include "sysemu/sysemu.h"
 #include "migration/colo.h"
 #include "trace.h"
 #include "qemu/error-report.h"
 #include "qemu/sockets.h"
 
+/*
+ * checkpoint interval: unit ms
+ * Note: Please change this default value to 10000 when we support hybrid mode.
+ */
+#define CHECKPOINT_MAX_PEROID 200
+
 /* colo buffer */
 #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024)
 
@@ -183,6 +190,7 @@  out:
 static void colo_process_checkpoint(MigrationState *s)
 {
     QEMUSizedBuffer *buffer = NULL;
+    int64_t current_time, checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     int fd, ret = 0;
 
     /* Dup the fd of to_dst_file */
@@ -220,11 +228,17 @@  static void colo_process_checkpoint(MigrationState *s)
     trace_colo_vm_state_change("stop", "run");
 
     while (s->state == MIGRATION_STATUS_COLO) {
+        current_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
+        if (current_time - checkpoint_time < CHECKPOINT_MAX_PEROID) {
+            g_usleep(100000);
+            continue;
+        }
         /* start a colo checkpoint */
         ret = colo_do_checkpoint_transaction(s, buffer);
         if (ret < 0) {
             goto out;
         }
+        checkpoint_time = qemu_clock_get_ms(QEMU_CLOCK_HOST);
     }
 
 out: