diff mbox series

[v2,2/4] kvm: Dirty ring autoconverge optmization for kvm_cpu_synchronize_kick_all

Message ID ee39636fb5d2fc95b90942b13b7a65ed55aa227f.1648091540.git.wucy11@chinatelecom.cn
State New
Headers show
Series Dirty ring and auto converge optimization | expand

Commit Message

Chongyun Wu March 28, 2022, 1:32 a.m. UTC
From: Chongyun Wu <wucy11@chinatelecom.cn>

Dirty ring feature need call kvm_cpu_synchronize_kick_all
to flush hardware buffers into KVMslots, but when aucoverge
run kvm_cpu_synchronize_kick_all calling will become more
and more time consuming. This will significantly reduce the
efficiency of dirty page queries, especially when memory
pressure is high and the speed limit is high.

When the CPU speed limit is high and kvm_cpu_synchronize_kick_all
is time-consuming, the rate of dirty pages generated by the VM
will also be significantly reduced, so it is not necessary to
call kvm_cpu_synchronize_kick_all at this time, just call it once
before stopping the VM. This will significantly improve the
efficiency of dirty page queries under high pressure.

Signed-off-by: Chongyun Wu <wucy11@chinatelecom.cn>
---
 accel/kvm/kvm-all.c       | 25 +++++--------------------
 include/exec/cpu-common.h |  2 ++
 migration/migration.c     | 11 +++++++++++
 softmmu/cpus.c            | 18 ++++++++++++++++++
 4 files changed, 36 insertions(+), 20 deletions(-)

Comments

Hyman Huang April 2, 2022, 7:22 a.m. UTC | #1
在 2022/3/28 9:32, wucy11@chinatelecom.cn 写道:
> From: Chongyun Wu <wucy11@chinatelecom.cn>
> 
> Dirty ring feature need call kvm_cpu_synchronize_kick_all
> to flush hardware buffers into KVMslots, but when aucoverge
> run kvm_cpu_synchronize_kick_all calling will become more
> and more time consuming. This will significantly reduce the
> efficiency of dirty page queries, especially when memory
> pressure is high and the speed limit is high.
> 
> When the CPU speed limit is high and kvm_cpu_synchronize_kick_all
> is time-consuming, the rate of dirty pages generated by the VM
> will also be significantly reduced, so it is not necessary to
> call kvm_cpu_synchronize_kick_all at this time, just call it once
> before stopping the VM. This will significantly improve the
> efficiency of dirty page queries under high pressure.
Again, i hope we could consider the migration time instead of just
guest performance.
> 
> Signed-off-by: Chongyun Wu <wucy11@chinatelecom.cn>
> ---
>   accel/kvm/kvm-all.c       | 25 +++++--------------------
>   include/exec/cpu-common.h |  2 ++
>   migration/migration.c     | 11 +++++++++++
>   softmmu/cpus.c            | 18 ++++++++++++++++++
>   4 files changed, 36 insertions(+), 20 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 65a4de8..5e02700 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -48,6 +48,8 @@
>   
>   #include "hw/boards.h"
>   
> +#include "sysemu/cpu-throttle.h"
> +
>   /* This check must be after config-host.h is included */
>   #ifdef CONFIG_EVENTFD
>   #include <sys/eventfd.h>
> @@ -839,25 +841,6 @@ static uint64_t kvm_dirty_ring_reap(KVMState *s)
>       return total;
>   }
>   
> -static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
> -{
> -    /* No need to do anything */
> -}
> -
> -/*
> - * Kick all vcpus out in a synchronized way.  When returned, we
> - * guarantee that every vcpu has been kicked and at least returned to
> - * userspace once.
> - */
> -static void kvm_cpu_synchronize_kick_all(void)
> -{
> -    CPUState *cpu;
> -
> -    CPU_FOREACH(cpu) {
> -        run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
> -    }
> -}
> -
>   /*
>    * Flush all the existing dirty pages to the KVM slot buffers.  When
>    * this call returns, we guarantee that all the touched dirty pages
> @@ -879,7 +862,9 @@ static void kvm_dirty_ring_flush(void)
>        * First make sure to flush the hardware buffers by kicking all
>        * vcpus out in a synchronous way.
>        */
> -    kvm_cpu_synchronize_kick_all();
> +    if (!cpu_throttle_get_percentage()) {
> +        qemu_kvm_cpu_synchronize_kick_all();
> +    }
For the code logic itself, kvm_dirty_ring_flush aims to flush all 
existing dirty pages, same as i reviewed in v1's first commit 
"kvm,memory: Optimize dirty page collection for dirty ring", we 
shouldn't consider the migation logic in this path.
>       kvm_dirty_ring_reap(kvm_state);
>       trace_kvm_dirty_ring_flush(1);
>   }
> diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
> index 50a7d29..13045b3 100644
> --- a/include/exec/cpu-common.h
> +++ b/include/exec/cpu-common.h
> @@ -160,4 +160,6 @@ extern int singlestep;
>   
>   void list_cpus(const char *optarg);
>   
> +void qemu_kvm_cpu_synchronize_kick_all(void);
> +
>   #endif /* CPU_COMMON_H */
> diff --git a/migration/migration.c b/migration/migration.c
> index 695f0f2..ca1db88 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -61,6 +61,7 @@
>   #include "sysemu/cpus.h"
>   #include "yank_functions.h"
>   #include "sysemu/qtest.h"
> +#include "sysemu/kvm.h"
>   
>   #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
>   
> @@ -3183,6 +3184,16 @@ static void migration_completion(MigrationState *s)
>   
>           if (!ret) {
>               bool inactivate = !migrate_colo_enabled();
> +            /*
> +             * Before stop vm do qemu_kvm_cpu_synchronize_kick_all to
> +             * fulsh hardware buffer into KVMslots for dirty ring
> +             * optmiaztion, If qemu_kvm_cpu_synchronize_kick_all is not
> +             * called when the CPU speed is limited to improve efficiency
> +             */
> +            if (kvm_dirty_ring_enabled()
> +                && cpu_throttle_get_percentage()) {
> +                qemu_kvm_cpu_synchronize_kick_all();
> +            }
>               ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
>               trace_migration_completion_vm_stop(ret);
>               if (ret >= 0) {
> diff --git a/softmmu/cpus.c b/softmmu/cpus.c
> index 7b75bb6..d028d83 100644
> --- a/softmmu/cpus.c
> +++ b/softmmu/cpus.c
> @@ -810,3 +810,21 @@ void qmp_inject_nmi(Error **errp)
>       nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
>   }
>   
[...]
> +static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
> +{
> +    /* No need to do anything */
> +}
> +
> +/*
> + * Kick all vcpus out in a synchronized way.  When returned, we
> + * guarantee that every vcpu has been kicked and at least returned to
> + * userspace once.
> + */
> +void qemu_kvm_cpu_synchronize_kick_all(void)
> +{
> +    CPUState *cpu;
> +
> +    CPU_FOREACH(cpu) {
> +        run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
> +    }
> +}
For the code style itself, IMHO, exporting helpers should split into a 
standalone commit.
Peter Xu May 13, 2022, 4:41 p.m. UTC | #2
On Sat, Apr 02, 2022 at 03:22:21PM +0800, Hyman Huang wrote:
> 在 2022/3/28 9:32, wucy11@chinatelecom.cn 写道:
> > @@ -879,7 +862,9 @@ static void kvm_dirty_ring_flush(void)
> >        * First make sure to flush the hardware buffers by kicking all
> >        * vcpus out in a synchronous way.
> >        */
> > -    kvm_cpu_synchronize_kick_all();
> > +    if (!cpu_throttle_get_percentage()) {
> > +        qemu_kvm_cpu_synchronize_kick_all();
> > +    }
> For the code logic itself, kvm_dirty_ring_flush aims to flush all existing
> dirty pages, same as i reviewed in v1's first commit "kvm,memory: Optimize
> dirty page collection for dirty ring", we shouldn't consider the migation
> logic in this path.

I agree with Yong's analysis.  I'm afraid this patch is breaking the api
here.

Say, memory_region_sync_dirty_bitmap() should be the memory api to flush
all dirty memory, we can't simply ignore the flushing just because it's
slow when we're throttling the cpus.  Things will already start to break,
e.g., the dirty rate calculation based on dirty ring relies on all dirty
pages be accounted after the sync() call and this patch will make the
reported value smaller than the reality.  It's not a major deal breaker in
dirty rate measurements but it's just one exmample that we're potentially
breaking the memory api.

AFAIU this is probably another reason why I don't really like the
auto-converge old solution because it's kind of brutal in this case by
putting the thread into a long sleep.

One fix could be that we do periodically sleep in auto-converge code so
that the vcpu threads can still handle any cpu sync requests, though I
didn't really check the cpu code path.

It's also racy in that cpu_throttle_get_percentage() is racy itself:
imagine a case where cpu_throttle_get_percentage() always return >0 _but_
it starts to return 0 when migration_complete() - we'll not call
qemu_kvm_cpu_synchronize_kick_all() even for completion and it can crash
the VM.
diff mbox series

Patch

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 65a4de8..5e02700 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -48,6 +48,8 @@ 
 
 #include "hw/boards.h"
 
+#include "sysemu/cpu-throttle.h"
+
 /* This check must be after config-host.h is included */
 #ifdef CONFIG_EVENTFD
 #include <sys/eventfd.h>
@@ -839,25 +841,6 @@  static uint64_t kvm_dirty_ring_reap(KVMState *s)
     return total;
 }
 
-static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
-{
-    /* No need to do anything */
-}
-
-/*
- * Kick all vcpus out in a synchronized way.  When returned, we
- * guarantee that every vcpu has been kicked and at least returned to
- * userspace once.
- */
-static void kvm_cpu_synchronize_kick_all(void)
-{
-    CPUState *cpu;
-
-    CPU_FOREACH(cpu) {
-        run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
-    }
-}
-
 /*
  * Flush all the existing dirty pages to the KVM slot buffers.  When
  * this call returns, we guarantee that all the touched dirty pages
@@ -879,7 +862,9 @@  static void kvm_dirty_ring_flush(void)
      * First make sure to flush the hardware buffers by kicking all
      * vcpus out in a synchronous way.
      */
-    kvm_cpu_synchronize_kick_all();
+    if (!cpu_throttle_get_percentage()) {
+        qemu_kvm_cpu_synchronize_kick_all();
+    }
     kvm_dirty_ring_reap(kvm_state);
     trace_kvm_dirty_ring_flush(1);
 }
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 50a7d29..13045b3 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -160,4 +160,6 @@  extern int singlestep;
 
 void list_cpus(const char *optarg);
 
+void qemu_kvm_cpu_synchronize_kick_all(void);
+
 #endif /* CPU_COMMON_H */
diff --git a/migration/migration.c b/migration/migration.c
index 695f0f2..ca1db88 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -61,6 +61,7 @@ 
 #include "sysemu/cpus.h"
 #include "yank_functions.h"
 #include "sysemu/qtest.h"
+#include "sysemu/kvm.h"
 
 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
 
@@ -3183,6 +3184,16 @@  static void migration_completion(MigrationState *s)
 
         if (!ret) {
             bool inactivate = !migrate_colo_enabled();
+            /*
+             * Before stop vm do qemu_kvm_cpu_synchronize_kick_all to
+             * fulsh hardware buffer into KVMslots for dirty ring
+             * optmiaztion, If qemu_kvm_cpu_synchronize_kick_all is not
+             * called when the CPU speed is limited to improve efficiency
+             */
+            if (kvm_dirty_ring_enabled()
+                && cpu_throttle_get_percentage()) {
+                qemu_kvm_cpu_synchronize_kick_all();
+            }
             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
             trace_migration_completion_vm_stop(ret);
             if (ret >= 0) {
diff --git a/softmmu/cpus.c b/softmmu/cpus.c
index 7b75bb6..d028d83 100644
--- a/softmmu/cpus.c
+++ b/softmmu/cpus.c
@@ -810,3 +810,21 @@  void qmp_inject_nmi(Error **errp)
     nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp);
 }
 
+static void do_kvm_cpu_synchronize_kick(CPUState *cpu, run_on_cpu_data arg)
+{
+    /* No need to do anything */
+}
+
+/*
+ * Kick all vcpus out in a synchronized way.  When returned, we
+ * guarantee that every vcpu has been kicked and at least returned to
+ * userspace once.
+ */
+void qemu_kvm_cpu_synchronize_kick_all(void)
+{
+    CPUState *cpu;
+
+    CPU_FOREACH(cpu) {
+        run_on_cpu(cpu, do_kvm_cpu_synchronize_kick, RUN_ON_CPU_NULL);
+    }
+}