diff mbox

[uq/master] kvm: i8254: Fix conversion of in-kernel to userspace state

Message ID 4FCF691A.10502@siemens.com
State New
Headers show

Commit Message

Jan Kiszka June 6, 2012, 2:28 p.m. UTC
Due to a offset between the clock used to generate the in-kernel
count_load_time (CLOCK_MONOTONIC) and the clock used for processing this
in userspace (vm_clock), reading back the output of PIT channel 2 via
port 0x61 was broken. One use cases that suffered from it was the CPU
frequency calibration of SeaBIOS, which also affected IDE/AHCI timeouts.

This fixes it by calibrating the offset between both clocks on
kvm_pit_get and adjusting the kernel value before saving it in the
userspace state. As the calibration only works while the vm_clock is
running, we cache the in-kernel state across stopped phases.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

Should go into 1.1 as well if it's fine for master.

 hw/kvm/i8254.c |   57 +++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 52 insertions(+), 5 deletions(-)

Comments

Avi Kivity June 11, 2012, 10:07 a.m. UTC | #1
On 06/06/2012 05:28 PM, Jan Kiszka wrote:
> Due to a offset between the clock used to generate the in-kernel
> count_load_time (CLOCK_MONOTONIC) and the clock used for processing this
> in userspace (vm_clock), reading back the output of PIT channel 2 via
> port 0x61 was broken. One use cases that suffered from it was the CPU
> frequency calibration of SeaBIOS, which also affected IDE/AHCI timeouts.
> 
> This fixes it by calibrating the offset between both clocks on
> kvm_pit_get and adjusting the kernel value before saving it in the
> userspace state. As the calibration only works while the vm_clock is
> running, we cache the in-kernel state across stopped phases.

Applied, thanks.

> +    clock_offset = LLONG_MAX;

INT64_MAX would me more strictly correct, but in practice it makes no
difference.
diff mbox

Patch

diff --git a/hw/kvm/i8254.c b/hw/kvm/i8254.c
index bb5fe07..04333d8 100644
--- a/hw/kvm/i8254.c
+++ b/hw/kvm/i8254.c
@@ -23,31 +23,63 @@ 
  * THE SOFTWARE.
  */
 #include "qemu-timer.h"
+#include "sysemu.h"
 #include "hw/i8254.h"
 #include "hw/i8254_internal.h"
 #include "kvm.h"
 
 #define KVM_PIT_REINJECT_BIT 0
 
+#define CALIBRATION_ROUNDS   3
+
 typedef struct KVMPITState {
     PITCommonState pit;
     LostTickPolicy lost_tick_policy;
+    bool state_valid;
 } KVMPITState;
 
-static void kvm_pit_get(PITCommonState *s)
+static int64_t abs64(int64_t v)
 {
+    return v < 0 ? -v : v;
+}
+
+static void kvm_pit_get(PITCommonState *pit)
+{
+    KVMPITState *s = DO_UPCAST(KVMPITState, pit, pit);
     struct kvm_pit_state2 kpit;
     struct kvm_pit_channel_state *kchan;
     struct PITChannelState *sc;
+    int64_t offset, clock_offset;
+    struct timespec ts;
     int i, ret;
 
+    if (s->state_valid) {
+        return;
+    }
+
+    /*
+     * Measure the delta between CLOCK_MONOTONIC, the base used for
+     * kvm_pit_channel_state::count_load_time, and vm_clock. Take the
+     * minimum of several samples to filter out scheduling noise.
+     */
+    clock_offset = LLONG_MAX;
+    for (i = 0; i < CALIBRATION_ROUNDS; i++) {
+        offset = qemu_get_clock_ns(vm_clock);
+        clock_gettime(CLOCK_MONOTONIC, &ts);
+        offset -= ts.tv_nsec;
+        offset -= (int64_t)ts.tv_sec * 1000000000;
+        if (abs64(offset) < abs64(clock_offset)) {
+            clock_offset = offset;
+        }
+    }
+
     if (kvm_has_pit_state2()) {
         ret = kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, &kpit);
         if (ret < 0) {
             fprintf(stderr, "KVM_GET_PIT2 failed: %s\n", strerror(ret));
             abort();
         }
-        s->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY;
+        pit->channels[0].irq_disabled = kpit.flags & KVM_PIT_FLAGS_HPET_LEGACY;
     } else {
         /*
          * kvm_pit_state2 is superset of kvm_pit_state struct,
@@ -61,7 +93,7 @@  static void kvm_pit_get(PITCommonState *s)
     }
     for (i = 0; i < 3; i++) {
         kchan = &kpit.channels[i];
-        sc = &s->channels[i];
+        sc = &pit->channels[i];
         sc->count = kchan->count;
         sc->latched_count = kchan->latched_count;
         sc->count_latched = kchan->count_latched;
@@ -74,10 +106,10 @@  static void kvm_pit_get(PITCommonState *s)
         sc->mode = kchan->mode;
         sc->bcd = kchan->bcd;
         sc->gate = kchan->gate;
-        sc->count_load_time = kchan->count_load_time;
+        sc->count_load_time = kchan->count_load_time + clock_offset;
     }
 
-    sc = &s->channels[0];
+    sc = &pit->channels[0];
     sc->next_transition_time =
         pit_get_next_transition_time(sc, sc->count_load_time);
 }
@@ -173,6 +205,19 @@  static void kvm_pit_irq_control(void *opaque, int n, int enable)
     kvm_pit_put(pit);
 }
 
+static void kvm_pit_vm_state_change(void *opaque, int running,
+                                    RunState state)
+{
+    KVMPITState *s = opaque;
+
+    if (running) {
+        s->state_valid = false;
+    } else {
+        kvm_pit_get(&s->pit);
+        s->state_valid = true;
+    }
+}
+
 static int kvm_pit_initfn(PITCommonState *pit)
 {
     KVMPITState *s = DO_UPCAST(KVMPITState, pit, pit);
@@ -215,6 +260,8 @@  static int kvm_pit_initfn(PITCommonState *pit)
 
     qdev_init_gpio_in(&pit->dev.qdev, kvm_pit_irq_control, 1);
 
+    qemu_add_vm_change_state_handler(kvm_pit_vm_state_change, s);
+
     return 0;
 }