diff mbox series

[v5,2/3] cpus: Fix throttling during vm_stop

Message ID 20190826103726.25538-3-yury-kotov@yandex-team.ru
State New
Headers show
Series High downtime with 95+ throttle pct | expand

Commit Message

Yury Kotov Aug. 26, 2019, 10:37 a.m. UTC
Throttling thread sleeps in VCPU thread. For high throttle percentage
this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
vm_stop() kicks all VCPUs and waits for them. It's called at the end of
migration and because of the long sleep the migration downtime might be
more than 100ms even for downtime-limit 1ms.
Use qemu_cond_timedwait for high percentage to wake up during vm_stop.

Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
---
 cpus.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

Comments

Eric Blake Sept. 5, 2019, 7:56 p.m. UTC | #1
On 8/26/19 5:37 AM, Yury Kotov wrote:
> Throttling thread sleeps in VCPU thread. For high throttle percentage
> this sleep is more than 10ms. E.g. for 60% - 15ms, for 99% - 990ms.
> vm_stop() kicks all VCPUs and waits for them. It's called at the end of
> migration and because of the long sleep the migration downtime might be
> more than 100ms even for downtime-limit 1ms.
> Use qemu_cond_timedwait for high percentage to wake up during vm_stop.
> 
> Signed-off-by: Yury Kotov <yury-kotov@yandex-team.ru>
> ---
>  cpus.c | 25 +++++++++++++++++--------
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 

> @@ -790,11 +792,20 @@ static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
>  
>      pct = (double)cpu_throttle_get_percentage()/100;
>      throttle_ratio = pct / (1 - pct);
> -    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
> -
> -    qemu_mutex_unlock_iothread();
> -    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
> -    qemu_mutex_lock_iothread();
> +    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
> +    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);

The cast to int64_t is not strictly necessary here, but doesn't hurt
(since it shows you DO know you are going from double to 64-bit int).

> +    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
> +    while (sleeptime_ns > 0 && !cpu->stop) {
> +        if (sleeptime_ns > SCALE_MS) {
> +            qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
> +                                sleeptime_ns / SCALE_MS);
> +        } else {
> +            qemu_mutex_unlock_iothread();
> +            g_usleep(sleeptime_ns / SCALE_US);
> +            qemu_mutex_lock_iothread();
> +        }
> +        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> +    }

Looks reasonable.

(I wonder if an alternative approach, of doing a poll() or similar
instead of g_usleep, and using a pipe-to-self where we write to the pipe
in the same scenarios where cpu->halt_cond would be broadcast, in order
to wake up the sleeping poll in a responsive manner, would be any easier
or more efficient - but don't rewrite the patch just because of my question)

Reviewed-by: Eric Blake <eblake@redhat.com>
diff mbox series

Patch

diff --git a/cpus.c b/cpus.c
index 85cd451a86..d2c61ff155 100644
--- a/cpus.c
+++ b/cpus.c
@@ -77,6 +77,8 @@ 
 
 #endif /* CONFIG_LINUX */
 
+static QemuMutex qemu_global_mutex;
+
 int64_t max_delay;
 int64_t max_advance;
 
@@ -782,7 +784,7 @@  static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 {
     double pct;
     double throttle_ratio;
-    long sleeptime_ns;
+    int64_t sleeptime_ns, endtime_ns;
 
     if (!cpu_throttle_get_percentage()) {
         return;
@@ -790,11 +792,20 @@  static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
 
     pct = (double)cpu_throttle_get_percentage()/100;
     throttle_ratio = pct / (1 - pct);
-    sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
-
-    qemu_mutex_unlock_iothread();
-    g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
-    qemu_mutex_lock_iothread();
+    /* Add 1ns to fix double's rounding error (like 0.9999999...) */
+    sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
+    endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
+    while (sleeptime_ns > 0 && !cpu->stop) {
+        if (sleeptime_ns > SCALE_MS) {
+            qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
+                                sleeptime_ns / SCALE_MS);
+        } else {
+            qemu_mutex_unlock_iothread();
+            g_usleep(sleeptime_ns / SCALE_US);
+            qemu_mutex_lock_iothread();
+        }
+        sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
     atomic_set(&cpu->throttle_thread_scheduled, 0);
 }
 
@@ -1172,8 +1183,6 @@  static void qemu_init_sigbus(void)
 }
 #endif /* !CONFIG_LINUX */
 
-static QemuMutex qemu_global_mutex;
-
 static QemuThread io_thread;
 
 /* cpu creation */