diff mbox series

[v1,1/1] COLO: only flush dirty ram pages from colo cache

Message ID 20200910104650.1182807-2-dereksu@qnap.com
State New
Headers show
Series COLO: only flush dirty ram pages from colo cache | expand

Commit Message

Derek Su Sept. 10, 2020, 10:46 a.m. UTC
In secondary side, the colo_flush_ram_cache() calls
migration_bitmap_find_dirty() to finding the dirty pages and
flush them to host. But ram_state's ram_bulk_stage flag is always
enabled in secondary side, it leads to the whole ram pages copy
instead of only dirty pages.

Here, the ram_bulk_stage in secondary side is disabled in the
preparation of COLO incoming process to avoid the whole dirty
ram pages flush.

Signed-off-by: Derek Su <dereksu@qnap.com>
---
 migration/colo.c |  6 +++++-
 migration/ram.c  | 10 ++++++++++
 migration/ram.h  |  3 +++
 3 files changed, 18 insertions(+), 1 deletion(-)

Comments

Derek Su Sept. 22, 2020, 3:31 a.m. UTC | #1
Hello, all

Ping...

Regards,
Derek Su

Derek Su <dereksu@qnap.com> 於 2020年9月10日 週四 下午6:47寫道:

> In secondary side, the colo_flush_ram_cache() calls
> migration_bitmap_find_dirty() to finding the dirty pages and
> flush them to host. But ram_state's ram_bulk_stage flag is always
> enabled in secondary side, it leads to the whole ram pages copy
> instead of only dirty pages.
>
> Here, the ram_bulk_stage in secondary side is disabled in the
> preparation of COLO incoming process to avoid the whole dirty
> ram pages flush.
>
> Signed-off-by: Derek Su <dereksu@qnap.com>
> ---
>  migration/colo.c |  6 +++++-
>  migration/ram.c  | 10 ++++++++++
>  migration/ram.h  |  3 +++
>  3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index ea7d1e9d4e..6e644db306 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
>          return NULL;
>      }
>
> +    colo_disable_ram_bulk_stage();
> +
>      failover_init_state();
>
>      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> @@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  #else
> -        abort();
> +    abort();
>  #endif
>      vm_start();
>      trace_colo_vm_state_change("stop", "run");
> @@ -924,6 +926,8 @@ out:
>          qemu_fclose(fb);
>      }
>
> +    colo_enable_ram_bulk_stage();
> +
>      /* Hope this not to be too long to loop here */
>      qemu_sem_wait(&mis->colo_incoming_sem);
>      qemu_sem_destroy(&mis->colo_incoming_sem);
> diff --git a/migration/ram.c b/migration/ram.c
> index 76d4fee5d5..65e9b12058 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
>      return ps >= POSTCOPY_INCOMING_LISTENING && ps <
> POSTCOPY_INCOMING_END;
>  }
>
> +void colo_enable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = true;
> +}
> +
> +void colo_disable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = false;
> +}
> +
>  /*
>   * Flush content of RAM cache into SVM's memory.
>   * Only flush the pages that be dirtied by PVM or SVM or both.
> diff --git a/migration/ram.h b/migration/ram.h
> index 2eeaacfa13..c1c0ebbe0f 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
>  void colo_release_ram_cache(void);
>  void colo_incoming_start_dirty_log(void);
>
> +void colo_enable_ram_bulk_stage(void);
> +void colo_disable_ram_bulk_stage(void);
> +
>  #endif
> --
> 2.25.1
>
>
Zhang, Chen Sept. 22, 2020, 3:41 a.m. UTC | #2
Hi Derek and Lei,

It looks same with Lei’ patch:
[PATCH 2/3] Reduce the time of checkpoint for COLO
Can you discuss to merge it into one patch?

Thanks
Zhang Chen

From: Derek Su <dereksu@qnap.com>
Sent: Tuesday, September 22, 2020 11:31 AM
To: qemu-devel <qemu-devel@nongnu.org>
Cc: zhang.zhanghailiang@huawei.com; quintela@redhat.com; dgilbert@redhat.com; Zhang, Chen <chen.zhang@intel.com>
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache

Hello, all

Ping...

Regards,
Derek Su

Derek Su <dereksu@qnap.com<mailto:dereksu@qnap.com>> 於 2020年9月10日 週四 下午6:47寫道:
In secondary side, the colo_flush_ram_cache() calls
migration_bitmap_find_dirty() to finding the dirty pages and
flush them to host. But ram_state's ram_bulk_stage flag is always
enabled in secondary side, it leads to the whole ram pages copy
instead of only dirty pages.

Here, the ram_bulk_stage in secondary side is disabled in the
preparation of COLO incoming process to avoid the whole dirty
ram pages flush.

Signed-off-by: Derek Su <dereksu@qnap.com<mailto:dereksu@qnap.com>>
---
 migration/colo.c |  6 +++++-
 migration/ram.c  | 10 ++++++++++
 migration/ram.h  |  3 +++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/migration/colo.c b/migration/colo.c
index ea7d1e9d4e..6e644db306 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
         return NULL;
     }

+    colo_disable_ram_bulk_stage();
+
     failover_init_state();

     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 #else
-        abort();
+    abort();
 #endif
     vm_start();
     trace_colo_vm_state_change("stop", "run");
@@ -924,6 +926,8 @@ out:
         qemu_fclose(fb);
     }

+    colo_enable_ram_bulk_stage();
+
     /* Hope this not to be too long to loop here */
     qemu_sem_wait(&mis->colo_incoming_sem);
     qemu_sem_destroy(&mis->colo_incoming_sem);
diff --git a/migration/ram.c b/migration/ram.c
index 76d4fee5d5..65e9b12058 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
 }

+void colo_enable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = true;
+}
+
+void colo_disable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = false;
+}
+
 /*
  * Flush content of RAM cache into SVM's memory.
  * Only flush the pages that be dirtied by PVM or SVM or both.
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..c1c0ebbe0f 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);

+void colo_enable_ram_bulk_stage(void);
+void colo_disable_ram_bulk_stage(void);
+
 #endif
--
2.25.1
Derek Su Sept. 22, 2020, 3:48 a.m. UTC | #3
Hi, Chen

Sure.

BTW, I just went through Lei's patch.
ram_bulk_stage() might need to reset to true after stopping COLO service as
my patch.
How about your opinion?

Thanks.

Best regards,
Derek


Zhang, Chen <chen.zhang@intel.com> 於 2020年9月22日 週二 上午11:41寫道:

> Hi Derek and Lei,
>
>
>
> It looks same with Lei’ patch:
>
> [PATCH 2/3] Reduce the time of checkpoint for COLO
>
> Can you discuss to merge it into one patch?
>
>
>
> Thanks
>
> Zhang Chen
>
>
>
> *From:* Derek Su <dereksu@qnap.com>
> *Sent:* Tuesday, September 22, 2020 11:31 AM
> *To:* qemu-devel <qemu-devel@nongnu.org>
> *Cc:* zhang.zhanghailiang@huawei.com; quintela@redhat.com;
> dgilbert@redhat.com; Zhang, Chen <chen.zhang@intel.com>
> *Subject:* Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
>
>
> Hello, all
>
>
>
> Ping...
>
>
>
> Regards,
>
> Derek Su
>
>
>
> Derek Su <dereksu@qnap.com> 於 2020年9月10日 週四 下午6:47寫道:
>
> In secondary side, the colo_flush_ram_cache() calls
> migration_bitmap_find_dirty() to finding the dirty pages and
> flush them to host. But ram_state's ram_bulk_stage flag is always
> enabled in secondary side, it leads to the whole ram pages copy
> instead of only dirty pages.
>
> Here, the ram_bulk_stage in secondary side is disabled in the
> preparation of COLO incoming process to avoid the whole dirty
> ram pages flush.
>
> Signed-off-by: Derek Su <dereksu@qnap.com>
> ---
>  migration/colo.c |  6 +++++-
>  migration/ram.c  | 10 ++++++++++
>  migration/ram.h  |  3 +++
>  3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index ea7d1e9d4e..6e644db306 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
>          return NULL;
>      }
>
> +    colo_disable_ram_bulk_stage();
> +
>      failover_init_state();
>
>      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> @@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  #else
> -        abort();
> +    abort();
>  #endif
>      vm_start();
>      trace_colo_vm_state_change("stop", "run");
> @@ -924,6 +926,8 @@ out:
>          qemu_fclose(fb);
>      }
>
> +    colo_enable_ram_bulk_stage();
> +
>      /* Hope this not to be too long to loop here */
>      qemu_sem_wait(&mis->colo_incoming_sem);
>      qemu_sem_destroy(&mis->colo_incoming_sem);
> diff --git a/migration/ram.c b/migration/ram.c
> index 76d4fee5d5..65e9b12058 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
>      return ps >= POSTCOPY_INCOMING_LISTENING && ps <
> POSTCOPY_INCOMING_END;
>  }
>
> +void colo_enable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = true;
> +}
> +
> +void colo_disable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = false;
> +}
> +
>  /*
>   * Flush content of RAM cache into SVM's memory.
>   * Only flush the pages that be dirtied by PVM or SVM or both.
> diff --git a/migration/ram.h b/migration/ram.h
> index 2eeaacfa13..c1c0ebbe0f 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
>  void colo_release_ram_cache(void);
>  void colo_incoming_start_dirty_log(void);
>
> +void colo_enable_ram_bulk_stage(void);
> +void colo_disable_ram_bulk_stage(void);
> +
>  #endif
> --
> 2.25.1
>
>
Lei Rao Sept. 22, 2020, 5:04 a.m. UTC | #4
Hi, Derek and Chen

ram_bulk_stage is false by default before Hailiang's patch.
For COLO, it does not seem to be used, so I think there is no need to reset it to true.

Thanks,
Lei.

From: Derek Su <dereksu@qnap.com> 
Sent: Tuesday, September 22, 2020 11:48 AM
To: Zhang, Chen <chen.zhang@intel.com>
Cc: qemu-devel <qemu-devel@nongnu.org>; Rao, Lei <lei.rao@intel.com>; zhang.zhanghailiang@huawei.com; quintela@redhat.com; dgilbert@redhat.com
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache

Hi, Chen

Sure.

BTW, I just went through Lei's patch.
ram_bulk_stage() might need to reset to true after stopping COLO service as my patch.
How about your opinion?

Thanks.


Best regards,
Derek


Zhang, Chen <mailto:chen.zhang@intel.com> 於 2020年9月22日 週二 上午11:41寫道:
Hi Derek and Lei,
 
It looks same with Lei’ patch:
[PATCH 2/3] Reduce the time of checkpoint for COLO
Can you discuss to merge it into one patch?
 
Thanks
Zhang Chen
 
From: Derek Su <mailto:dereksu@qnap.com> 
Sent: Tuesday, September 22, 2020 11:31 AM
To: qemu-devel <mailto:qemu-devel@nongnu.org>
Cc: mailto:zhang.zhanghailiang@huawei.com; mailto:quintela@redhat.com; mailto:dgilbert@redhat.com; Zhang, Chen <mailto:chen.zhang@intel.com>
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache
 
Hello, all
 
Ping...
 
Regards,
Derek Su
 
Derek Su <mailto:dereksu@qnap.com> 於 2020年9月10日 週四 下午6:47寫道:
In secondary side, the colo_flush_ram_cache() calls
migration_bitmap_find_dirty() to finding the dirty pages and
flush them to host. But ram_state's ram_bulk_stage flag is always
enabled in secondary side, it leads to the whole ram pages copy
instead of only dirty pages.

Here, the ram_bulk_stage in secondary side is disabled in the
preparation of COLO incoming process to avoid the whole dirty
ram pages flush.

Signed-off-by: Derek Su <mailto:dereksu@qnap.com>
---
 migration/colo.c |  6 +++++-
 migration/ram.c  | 10 ++++++++++
 migration/ram.h  |  3 +++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/migration/colo.c b/migration/colo.c
index ea7d1e9d4e..6e644db306 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
         return NULL;
     }

+    colo_disable_ram_bulk_stage();
+
     failover_init_state();

     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 #else
-        abort();
+    abort();
 #endif
     vm_start();
     trace_colo_vm_state_change("stop", "run");
@@ -924,6 +926,8 @@ out:
         qemu_fclose(fb);
     }

+    colo_enable_ram_bulk_stage();
+
     /* Hope this not to be too long to loop here */
     qemu_sem_wait(&mis->colo_incoming_sem);
     qemu_sem_destroy(&mis->colo_incoming_sem);
diff --git a/migration/ram.c b/migration/ram.c
index 76d4fee5d5..65e9b12058 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
 }

+void colo_enable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = true;
+}
+
+void colo_disable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = false;
+}
+
 /*
  * Flush content of RAM cache into SVM's memory.
  * Only flush the pages that be dirtied by PVM or SVM or both.
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..c1c0ebbe0f 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);

+void colo_enable_ram_bulk_stage(void);
+void colo_disable_ram_bulk_stage(void);
+
 #endif
Derek Su Sept. 22, 2020, 5:18 a.m. UTC | #5
Hi, Lei

Got it. Thanks.

Regards,
Derek

Rao, Lei <lei.rao@intel.com> 於 2020年9月22日 週二 下午1:04寫道:

> Hi, Derek and Chen
>
> ram_bulk_stage is false by default before Hailiang's patch.
> For COLO, it does not seem to be used, so I think there is no need to
> reset it to true.
>
> Thanks,
> Lei.
>
> From: Derek Su <dereksu@qnap.com>
> Sent: Tuesday, September 22, 2020 11:48 AM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Rao, Lei <lei.rao@intel.com>;
> zhang.zhanghailiang@huawei.com; quintela@redhat.com; dgilbert@redhat.com
> Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
> Hi, Chen
>
> Sure.
>
> BTW, I just went through Lei's patch.
> ram_bulk_stage() might need to reset to true after stopping COLO service
> as my patch.
> How about your opinion?
>
> Thanks.
>
>
> Best regards,
> Derek
>
>
> Zhang, Chen <mailto:chen.zhang@intel.com> 於 2020年9月22日 週二 上午11:41寫道:
> Hi Derek and Lei,
>
> It looks same with Lei’ patch:
> [PATCH 2/3] Reduce the time of checkpoint for COLO
> Can you discuss to merge it into one patch?
>
> Thanks
> Zhang Chen
>
> From: Derek Su <mailto:dereksu@qnap.com>
> Sent: Tuesday, September 22, 2020 11:31 AM
> To: qemu-devel <mailto:qemu-devel@nongnu.org>
> Cc: mailto:zhang.zhanghailiang@huawei.com; mailto:quintela@redhat.com;
> mailto:dgilbert@redhat.com; Zhang, Chen <mailto:chen.zhang@intel.com>
> Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
> Hello, all
>
> Ping...
>
> Regards,
> Derek Su
>
> Derek Su <mailto:dereksu@qnap.com> 於 2020年9月10日 週四 下午6:47寫道:
> In secondary side, the colo_flush_ram_cache() calls
> migration_bitmap_find_dirty() to finding the dirty pages and
> flush them to host. But ram_state's ram_bulk_stage flag is always
> enabled in secondary side, it leads to the whole ram pages copy
> instead of only dirty pages.
>
> Here, the ram_bulk_stage in secondary side is disabled in the
> preparation of COLO incoming process to avoid the whole dirty
> ram pages flush.
>
> Signed-off-by: Derek Su <mailto:dereksu@qnap.com>
> ---
>  migration/colo.c |  6 +++++-
>  migration/ram.c  | 10 ++++++++++
>  migration/ram.h  |  3 +++
>  3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index ea7d1e9d4e..6e644db306 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
>          return NULL;
>      }
>
> +    colo_disable_ram_bulk_stage();
> +
>      failover_init_state();
>
>      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> @@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  #else
> -        abort();
> +    abort();
>  #endif
>      vm_start();
>      trace_colo_vm_state_change("stop", "run");
> @@ -924,6 +926,8 @@ out:
>          qemu_fclose(fb);
>      }
>
> +    colo_enable_ram_bulk_stage();
> +
>      /* Hope this not to be too long to loop here */
>      qemu_sem_wait(&mis->colo_incoming_sem);
>      qemu_sem_destroy(&mis->colo_incoming_sem);
> diff --git a/migration/ram.c b/migration/ram.c
> index 76d4fee5d5..65e9b12058 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
>      return ps >= POSTCOPY_INCOMING_LISTENING && ps <
> POSTCOPY_INCOMING_END;
>  }
>
> +void colo_enable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = true;
> +}
> +
> +void colo_disable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = false;
> +}
> +
>  /*
>   * Flush content of RAM cache into SVM's memory.
>   * Only flush the pages that be dirtied by PVM or SVM or both.
> diff --git a/migration/ram.h b/migration/ram.h
> index 2eeaacfa13..c1c0ebbe0f 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
>  void colo_release_ram_cache(void);
>  void colo_incoming_start_dirty_log(void);
>
> +void colo_enable_ram_bulk_stage(void);
> +void colo_disable_ram_bulk_stage(void);
> +
>  #endif
> --
> 2.25.1
>
Zhang, Chen Sept. 22, 2020, 5:37 a.m. UTC | #6
So, Derek, you will send new version patch?

Thanks
Zhang Chen

From: Derek Su <dereksu@qnap.com>
Sent: Tuesday, September 22, 2020 1:18 PM
To: Rao, Lei <lei.rao@intel.com>
Cc: Zhang, Chen <chen.zhang@intel.com>; qemu-devel <qemu-devel@nongnu.org>; zhang.zhanghailiang@huawei.com; quintela@redhat.com; dgilbert@redhat.com
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache

Hi, Lei

Got it. Thanks.

Regards,
Derek

Rao, Lei <lei.rao@intel.com<mailto:lei.rao@intel.com>> 於 2020年9月22日 週二 下午1:04寫道:
Hi, Derek and Chen

ram_bulk_stage is false by default before Hailiang's patch.
For COLO, it does not seem to be used, so I think there is no need to reset it to true.

Thanks,
Lei.

From: Derek Su <dereksu@qnap.com<mailto:dereksu@qnap.com>>
Sent: Tuesday, September 22, 2020 11:48 AM
To: Zhang, Chen <chen.zhang@intel.com<mailto:chen.zhang@intel.com>>
Cc: qemu-devel <qemu-devel@nongnu.org<mailto:qemu-devel@nongnu.org>>; Rao, Lei <lei.rao@intel.com<mailto:lei.rao@intel.com>>; zhang.zhanghailiang@huawei.com<mailto:zhang.zhanghailiang@huawei.com>; quintela@redhat.com<mailto:quintela@redhat.com>; dgilbert@redhat.com<mailto:dgilbert@redhat.com>
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache

Hi, Chen

Sure.

BTW, I just went through Lei's patch.
ram_bulk_stage() might need to reset to true after stopping COLO service as my patch.
How about your opinion?

Thanks.


Best regards,
Derek


Zhang, Chen <mailto:chen.zhang@intel.com<mailto:chen.zhang@intel.com>> 於 2020年9月22日 週二 上午11:41寫道:
Hi Derek and Lei,

It looks same with Lei’ patch:
[PATCH 2/3] Reduce the time of checkpoint for COLO
Can you discuss to merge it into one patch?

Thanks
Zhang Chen

From: Derek Su <mailto:dereksu@qnap.com<mailto:dereksu@qnap.com>>
Sent: Tuesday, September 22, 2020 11:31 AM
To: qemu-devel <mailto:qemu-devel@nongnu.org<mailto:qemu-devel@nongnu.org>>
Cc: mailto:zhang.zhanghailiang@huawei.com<mailto:zhang.zhanghailiang@huawei.com>; mailto:quintela@redhat.com<mailto:quintela@redhat.com>; mailto:dgilbert@redhat.com<mailto:dgilbert@redhat.com>; Zhang, Chen <mailto:chen.zhang@intel.com<mailto:chen.zhang@intel.com>>
Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo cache

Hello, all

Ping...

Regards,
Derek Su

Derek Su <mailto:dereksu@qnap.com<mailto:dereksu@qnap.com>> 於 2020年9月10日 週四 下午6:47寫道:
In secondary side, the colo_flush_ram_cache() calls
migration_bitmap_find_dirty() to finding the dirty pages and
flush them to host. But ram_state's ram_bulk_stage flag is always
enabled in secondary side, it leads to the whole ram pages copy
instead of only dirty pages.

Here, the ram_bulk_stage in secondary side is disabled in the
preparation of COLO incoming process to avoid the whole dirty
ram pages flush.

Signed-off-by: Derek Su <mailto:dereksu@qnap.com<mailto:dereksu@qnap.com>>
---
 migration/colo.c |  6 +++++-
 migration/ram.c  | 10 ++++++++++
 migration/ram.h  |  3 +++
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/migration/colo.c b/migration/colo.c
index ea7d1e9d4e..6e644db306 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
         return NULL;
     }

+    colo_disable_ram_bulk_stage();
+
     failover_init_state();

     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 #else
-        abort();
+    abort();
 #endif
     vm_start();
     trace_colo_vm_state_change("stop", "run");
@@ -924,6 +926,8 @@ out:
         qemu_fclose(fb);
     }

+    colo_enable_ram_bulk_stage();
+
     /* Hope this not to be too long to loop here */
     qemu_sem_wait(&mis->colo_incoming_sem);
     qemu_sem_destroy(&mis->colo_incoming_sem);
diff --git a/migration/ram.c b/migration/ram.c
index 76d4fee5d5..65e9b12058 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
 }

+void colo_enable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = true;
+}
+
+void colo_disable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = false;
+}
+
 /*
  * Flush content of RAM cache into SVM's memory.
  * Only flush the pages that be dirtied by PVM or SVM or both.
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..c1c0ebbe0f 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);

+void colo_enable_ram_bulk_stage(void);
+void colo_disable_ram_bulk_stage(void);
+
 #endif
--
2.25.1
Derek Su Sept. 22, 2020, 5:46 a.m. UTC | #7
Hi, Chen and Lei

Using Lei's patch is OK to me.
Please help to add "Signed-off-by: Derek Su <dereksu@qnap.com>" for merging
it.
Thank you. :)

Regards
Derek

Zhang, Chen <chen.zhang@intel.com> 於 2020年9月22日 週二 下午1:37寫道:

> So, Derek, you will send new version patch?
>
>
>
> Thanks
>
> Zhang Chen
>
>
>
> *From:* Derek Su <dereksu@qnap.com>
> *Sent:* Tuesday, September 22, 2020 1:18 PM
> *To:* Rao, Lei <lei.rao@intel.com>
> *Cc:* Zhang, Chen <chen.zhang@intel.com>; qemu-devel <
> qemu-devel@nongnu.org>; zhang.zhanghailiang@huawei.com;
> quintela@redhat.com; dgilbert@redhat.com
> *Subject:* Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
>
>
> Hi, Lei
>
>
>
> Got it. Thanks.
>
>
>
> Regards,
>
> Derek
>
>
>
> Rao, Lei <lei.rao@intel.com> 於 2020年9月22日 週二 下午1:04寫道:
>
> Hi, Derek and Chen
>
> ram_bulk_stage is false by default before Hailiang's patch.
> For COLO, it does not seem to be used, so I think there is no need to
> reset it to true.
>
> Thanks,
> Lei.
>
> From: Derek Su <dereksu@qnap.com>
> Sent: Tuesday, September 22, 2020 11:48 AM
> To: Zhang, Chen <chen.zhang@intel.com>
> Cc: qemu-devel <qemu-devel@nongnu.org>; Rao, Lei <lei.rao@intel.com>;
> zhang.zhanghailiang@huawei.com; quintela@redhat.com; dgilbert@redhat.com
> Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
> Hi, Chen
>
> Sure.
>
> BTW, I just went through Lei's patch.
> ram_bulk_stage() might need to reset to true after stopping COLO service
> as my patch.
> How about your opinion?
>
> Thanks.
>
>
> Best regards,
> Derek
>
>
> Zhang, Chen <mailto:chen.zhang@intel.com> 於 2020年9月22日 週二 上午11:41寫道:
> Hi Derek and Lei,
>
> It looks same with Lei’ patch:
> [PATCH 2/3] Reduce the time of checkpoint for COLO
> Can you discuss to merge it into one patch?
>
> Thanks
> Zhang Chen
>
> From: Derek Su <mailto:dereksu@qnap.com>
> Sent: Tuesday, September 22, 2020 11:31 AM
> To: qemu-devel <mailto:qemu-devel@nongnu.org>
> Cc: mailto:zhang.zhanghailiang@huawei.com; mailto:quintela@redhat.com;
> mailto:dgilbert@redhat.com; Zhang, Chen <mailto:chen.zhang@intel.com>
> Subject: Re: [PATCH v1 1/1] COLO: only flush dirty ram pages from colo
> cache
>
> Hello, all
>
> Ping...
>
> Regards,
> Derek Su
>
> Derek Su <mailto:dereksu@qnap.com> 於 2020年9月10日 週四 下午6:47寫道:
> In secondary side, the colo_flush_ram_cache() calls
> migration_bitmap_find_dirty() to finding the dirty pages and
> flush them to host. But ram_state's ram_bulk_stage flag is always
> enabled in secondary side, it leads to the whole ram pages copy
> instead of only dirty pages.
>
> Here, the ram_bulk_stage in secondary side is disabled in the
> preparation of COLO incoming process to avoid the whole dirty
> ram pages flush.
>
> Signed-off-by: Derek Su <mailto:dereksu@qnap.com>
> ---
>  migration/colo.c |  6 +++++-
>  migration/ram.c  | 10 ++++++++++
>  migration/ram.h  |  3 +++
>  3 files changed, 18 insertions(+), 1 deletion(-)
>
> diff --git a/migration/colo.c b/migration/colo.c
> index ea7d1e9d4e..6e644db306 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -844,6 +844,8 @@ void *colo_process_incoming_thread(void *opaque)
>          return NULL;
>      }
>
> +    colo_disable_ram_bulk_stage();
> +
>      failover_init_state();
>
>      mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
> @@ -873,7 +875,7 @@ void *colo_process_incoming_thread(void *opaque)
>          goto out;
>      }
>  #else
> -        abort();
> +    abort();
>  #endif
>      vm_start();
>      trace_colo_vm_state_change("stop", "run");
> @@ -924,6 +926,8 @@ out:
>          qemu_fclose(fb);
>      }
>
> +    colo_enable_ram_bulk_stage();
> +
>      /* Hope this not to be too long to loop here */
>      qemu_sem_wait(&mis->colo_incoming_sem);
>      qemu_sem_destroy(&mis->colo_incoming_sem);
> diff --git a/migration/ram.c b/migration/ram.c
> index 76d4fee5d5..65e9b12058 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -3357,6 +3357,16 @@ static bool postcopy_is_running(void)
>      return ps >= POSTCOPY_INCOMING_LISTENING && ps <
> POSTCOPY_INCOMING_END;
>  }
>
> +void colo_enable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = true;
> +}
> +
> +void colo_disable_ram_bulk_stage(void)
> +{
> +    ram_state->ram_bulk_stage = false;
> +}
> +
>  /*
>   * Flush content of RAM cache into SVM's memory.
>   * Only flush the pages that be dirtied by PVM or SVM or both.
> diff --git a/migration/ram.h b/migration/ram.h
> index 2eeaacfa13..c1c0ebbe0f 100644
> --- a/migration/ram.h
> +++ b/migration/ram.h
> @@ -69,4 +69,7 @@ void colo_flush_ram_cache(void);
>  void colo_release_ram_cache(void);
>  void colo_incoming_start_dirty_log(void);
>
> +void colo_enable_ram_bulk_stage(void);
> +void colo_disable_ram_bulk_stage(void);
> +
>  #endif
> --
> 2.25.1
>
>
diff mbox series

Patch

diff --git a/migration/colo.c b/migration/colo.c
index ea7d1e9d4e..6e644db306 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -844,6 +844,8 @@  void *colo_process_incoming_thread(void *opaque)
         return NULL;
     }
 
+    colo_disable_ram_bulk_stage();
+
     failover_init_state();
 
     mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
@@ -873,7 +875,7 @@  void *colo_process_incoming_thread(void *opaque)
         goto out;
     }
 #else
-        abort();
+    abort();
 #endif
     vm_start();
     trace_colo_vm_state_change("stop", "run");
@@ -924,6 +926,8 @@  out:
         qemu_fclose(fb);
     }
 
+    colo_enable_ram_bulk_stage();
+
     /* Hope this not to be too long to loop here */
     qemu_sem_wait(&mis->colo_incoming_sem);
     qemu_sem_destroy(&mis->colo_incoming_sem);
diff --git a/migration/ram.c b/migration/ram.c
index 76d4fee5d5..65e9b12058 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -3357,6 +3357,16 @@  static bool postcopy_is_running(void)
     return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
 }
 
+void colo_enable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = true;
+}
+
+void colo_disable_ram_bulk_stage(void)
+{
+    ram_state->ram_bulk_stage = false;
+}
+
 /*
  * Flush content of RAM cache into SVM's memory.
  * Only flush the pages that be dirtied by PVM or SVM or both.
diff --git a/migration/ram.h b/migration/ram.h
index 2eeaacfa13..c1c0ebbe0f 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -69,4 +69,7 @@  void colo_flush_ram_cache(void);
 void colo_release_ram_cache(void);
 void colo_incoming_start_dirty_log(void);
 
+void colo_enable_ram_bulk_stage(void);
+void colo_disable_ram_bulk_stage(void);
+
 #endif