Patchwork [v4,2/4] enable vm_clock to "warp" in the iothread+icount case

login
register
mail settings
Submitter Paolo Bonzini
Date April 13, 2011, 8:03 a.m.
Message ID <1302681826-10236-3-git-send-email-pbonzini@redhat.com>
Download mbox | patch
Permalink /patch/90965/
State New
Headers show

Comments

Paolo Bonzini - April 13, 2011, 8:03 a.m.
The previous patch however is not enough, because if the virtual CPU
goes to sleep waiting for a future timer interrupt to wake it up, qemu
deadlocks.  The timer interrupt never comes because time is driven by
icount, but the vCPU doesn't run any insns.

You could say that VCPUs should never go to sleep in icount
mode if there is a pending vm_clock timer; rather time should
just warp to the next vm_clock event with no sleep ever taking place.
Even better, you can sleep for some time related to the
time left until the next event, to avoid that the warps are too visible
externally; for example, you could be sending network packets continously
instead of every 100ms.

This is what this patch implements.  qemu_clock_warp is called: 1)
whenever a vm_clock timer is adjusted, to ensure the warp_timer is
synchronized; 2) at strategic points in the CPU thread, to make sure
the insn counter is synchronized before the CPU starts running.
In any case, the warp_timer is disabled while the CPU is running,
because the insn counter will then be making progress on its own.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Edgar E. Iglesias <edgar.iglesias@gmail.com>
---
 cpus.c        |    8 ++++-
 qemu-common.h |    1 +
 qemu-timer.c  |   94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 qemu-timer.h  |    1 +
 roms/seabios  |    2 +-
 5 files changed, 103 insertions(+), 3 deletions(-)

Patch

diff --git a/cpus.c b/cpus.c
index cbeac7a..6a50199 100644
--- a/cpus.c
+++ b/cpus.c
@@ -155,7 +155,7 @@  static bool cpu_thread_is_idle(CPUState *env)
     return true;
 }
 
-static bool all_cpu_threads_idle(void)
+bool all_cpu_threads_idle(void)
 {
     CPUState *env;
 
@@ -739,6 +739,9 @@  static void qemu_tcg_wait_io_event(void)
     CPUState *env;
 
     while (all_cpu_threads_idle()) {
+       /* Start accounting real time to the virtual clock if the CPUs
+          are idle.  */
+        qemu_clock_warp(vm_clock);
         qemu_cond_wait(tcg_halt_cond, &qemu_global_mutex);
     }
 
@@ -1073,6 +1076,9 @@  bool cpu_exec_all(void)
 {
     int r;
 
+    /* Account partial waits to the vm_clock.  */
+    qemu_clock_warp(vm_clock);
+
     if (next_cpu == NULL) {
         next_cpu = first_cpu;
     }
diff --git a/qemu-common.h b/qemu-common.h
index 82e27c1..4f6037b 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -298,6 +298,7 @@  void qemu_notify_event(void);
 void qemu_cpu_kick(void *env);
 void qemu_cpu_kick_self(void);
 int qemu_cpu_is_self(void *env);
+bool all_cpu_threads_idle(void);
 
 /* work queue */
 struct qemu_work_item {
diff --git a/qemu-timer.c b/qemu-timer.c
index 50f1943..4959688 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -153,6 +153,8 @@  void cpu_disable_ticks(void)
 struct QEMUClock {
     int type;
     int enabled;
+
+    QEMUTimer *warp_timer;
 };
 
 struct QEMUTimer {
@@ -386,6 +388,90 @@  void qemu_clock_enable(QEMUClock *clock, int enabled)
     clock->enabled = enabled;
 }
 
+static int64_t vm_clock_warp_start;
+
+static void icount_warp_rt(void *opaque)
+{
+    if (vm_clock_warp_start == -1) {
+        return;
+    }
+
+    if (vm_running) {
+        int64_t clock = qemu_get_clock_ns(rt_clock);
+        int64_t warp_delta = clock - vm_clock_warp_start;
+        if (use_icount == 1) {
+            qemu_icount_bias += warp_delta;
+        } else {
+            /*
+             * In adaptive mode, do not let the vm_clock run too
+             * far ahead of real time.
+             */
+            int64_t cur_time = cpu_get_clock();
+            int64_t cur_icount = qemu_get_clock_ns(vm_clock);
+            int64_t delta = cur_time - cur_icount;
+            qemu_icount_bias += MIN(warp_delta, delta);
+        }
+        if (qemu_timer_expired(active_timers[QEMU_CLOCK_VIRTUAL],
+                               qemu_get_clock_ns(vm_clock))) {
+            qemu_notify_event();
+        }
+    }
+    vm_clock_warp_start = -1;
+}
+
+void qemu_clock_warp(QEMUClock *clock)
+{
+    int64_t deadline;
+
+    if (!clock->warp_timer) {
+        return;
+    }
+
+    /*
+     * There are too many global variables to make the "warp" behavior
+     * applicable to other clocks.  But a clock argument removes the
+     * need for if statements all over the place.
+     */
+    assert(clock == vm_clock);
+
+    /*
+     * If the CPUs have been sleeping, advance the vm_clock timer now.  This
+     * ensures that the deadline for the timer is computed correctly below.
+     * This also makes sure that the insn counter is synchronized before the
+     * CPU starts running, in case the CPU is woken by an event other than
+     * the earliest vm_clock timer.
+     */
+    icount_warp_rt(NULL);
+    if (!all_cpu_threads_idle() || !active_timers[clock->type]) {
+        qemu_del_timer(clock->warp_timer);
+        return;
+    }
+
+    vm_clock_warp_start = qemu_get_clock_ns(rt_clock);
+    deadline = qemu_next_deadline();
+    if (deadline > 0) {
+        /*
+         * Ensure the vm_clock proceeds even when the virtual CPU goes to
+         * sleep.  Otherwise, the CPU might be waiting for a future timer
+         * interrupt to wake it up, but the interrupt never comes because
+         * the vCPU isn't running any insns and thus doesn't advance the
+         * vm_clock.
+         *
+         * An extreme solution for this problem would be to never let VCPUs
+         * sleep in icount mode if there is a pending vm_clock timer; rather
+         * time could just advance to the next vm_clock event.  Instead, we
+         * do stop VCPUs and only advance vm_clock after some "real" time,
+         * (related to the time left until the next event) has passed.  This
+         * rt_clock timer will do this.  This avoids that the warps are too
+         * visible externally---for example, you will not be sending network
+         * packets continously instead of every 100ms.
+         */
+        qemu_mod_timer(clock->warp_timer, vm_clock_warp_start + deadline);
+    } else {
+        qemu_notify_event();
+    }
+}
+
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque)
 {
@@ -454,8 +540,10 @@  static void qemu_mod_timer_ns(QEMUTimer *ts, int64_t expire_time)
             qemu_rearm_alarm_timer(alarm_timer);
         }
         /* Interrupt execution to force deadline recalculation.  */
-        if (use_icount)
+        qemu_clock_warp(ts->clock);
+        if (use_icount) {
             qemu_notify_event();
+        }
     }
 }
 
@@ -576,6 +664,10 @@  void configure_icount(const char *option)
     if (!option)
         return;
 
+#ifdef CONFIG_IOTHREAD
+    vm_clock->warp_timer = qemu_new_timer_ns(rt_clock, icount_warp_rt, NULL);
+#endif
+
     if (strcmp(option, "auto") != 0) {
         icount_time_shift = strtol(option, NULL, 0);
         use_icount = 1;
diff --git a/qemu-timer.h b/qemu-timer.h
index 75d5675..c01bcab 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -39,6 +39,7 @@  extern QEMUClock *host_clock;
 
 int64_t qemu_get_clock_ns(QEMUClock *clock);
 void qemu_clock_enable(QEMUClock *clock, int enabled);
+void qemu_clock_warp(QEMUClock *clock);
 
 QEMUTimer *qemu_new_timer(QEMUClock *clock, int scale,
                           QEMUTimerCB *cb, void *opaque);
diff --git a/roms/seabios b/roms/seabios
index cc97564..06d0bdd 160000
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@ 
-Subproject commit cc975646af69f279396d4d5e1379ac6af80ee637
+Subproject commit 06d0bdd9e2e20377b3180e4986b14c8549b393e4