diff mbox

kvmclock, Migration, and NTP clock jitter

Message ID 20150116102109.GA4404@gmail.com
State New
Headers show

Commit Message

Mohammed Gamal Jan. 16, 2015, 10:21 a.m. UTC
On Thu, Jan 15, 2015 at 06:27:54PM +0100, Paolo Bonzini wrote:
> 
> 
> On 15/01/2015 17:39, Mohammed Gamal wrote:
> > The increase in the jitter and offset values is well within the 500 ppm
> > frequency tolerance limit, and therefore are easily corrected by
> > subsequent NTP clock sync events, but some live migrations do cause much
> > higher jitter and offset jumps, which can not be corrected by NTP and
> > cause the time to go way off. Any idea why this is the case?
> 
> It might be fixed in QEMU 2.2.
> 
> See https://lists.gnu.org/archive/html/qemu-devel/2014-09/msg01239.html
> 
> Paolo

Hi Paolo,

I did try to backport these patches to qemu 1.2. However, migrations 
resulted in *higher* jitter and offset values (i.e. in the order of 100+ ppm).
I am not sure if I've done the backporting correctly though. Here are my
patches on top of the qemu 1.2 stable tree.

Comments

Mohammed Gamal Jan. 21, 2015, 10:20 a.m. UTC | #1
On Fri, Jan 16, 2015 at 11:21 AM, Mohammed Gamal <
mohammed.gamal@profitbricks.com> wrote:

> On Thu, Jan 15, 2015 at 06:27:54PM +0100, Paolo Bonzini wrote:
> >
> >
> > On 15/01/2015 17:39, Mohammed Gamal wrote:
> > > The increase in the jitter and offset values is well within the 500 ppm
> > > frequency tolerance limit, and therefore are easily corrected by
> > > subsequent NTP clock sync events, but some live migrations do cause
> much
> > > higher jitter and offset jumps, which can not be corrected by NTP and
> > > cause the time to go way off. Any idea why this is the case?
> >
> > It might be fixed in QEMU 2.2.
> >
> > See https://lists.gnu.org/archive/html/qemu-devel/2014-09/msg01239.html
> >
> > Paolo
>
> Hi Paolo,
>
> I did try to backport these patches to qemu 1.2. However, migrations
> resulted in *higher* jitter and offset values (i.e. in the order of 100+
> ppm).
> I am not sure if I've done the backporting correctly though. Here are my
> patches on top of the qemu 1.2 stable tree.
>

Anyone?
diff mbox

Patch

diff --git a/cpus.c b/cpus.c
index 29aced5..e079ee5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -187,6 +187,15 @@  void cpu_disable_ticks(void)
     }
 }
 
+void cpu_clean_all_dirty(void)
+{
+    CPUArchState *cpu;
+
+    for (cpu = first_cpu; cpu; cpu = cpu->next_cpu) {
+        cpu_clean_state(cpu);
+    }
+}
+
 /* Correlation between real and virtual time is always going to be
    fairly approximate, so ignore small variation.
    When the guest is idle real and virtual time will be aligned in
diff --git a/cpus.h b/cpus.h
index 3fc1a4a..1ff166b 100644
--- a/cpus.h
+++ b/cpus.h
@@ -12,6 +12,7 @@  void unplug_vcpu(void *p);
 void cpu_synchronize_all_states(void);
 void cpu_synchronize_all_post_reset(void);
 void cpu_synchronize_all_post_init(void);
+void cpu_clean_all_dirty(void);
 
 void qtest_clock_warp(int64_t dest);
 
diff --git a/hw/kvm/clock.c b/hw/kvm/clock.c
index 824b978..b2bdda4 100644
--- a/hw/kvm/clock.c
+++ b/hw/kvm/clock.c
@@ -16,6 +16,8 @@ 
 #include "qemu-common.h"
 #include "sysemu.h"
 #include "kvm.h"
+#include "host-utils.h"
+#include "cpus.h"
 #include "hw/sysbus.h"
 #include "hw/kvm/clock.h"
 
@@ -28,6 +30,46 @@  typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
+struct pvclock_vcpu_time_info {
+    uint32_t   version;
+    uint32_t   pad0;
+    uint64_t   tsc_timestamp;
+    uint64_t   system_time;
+    uint32_t   tsc_to_system_mul;
+    int8_t     tsc_shift;
+    uint8_t    flags;
+    uint8_t    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+static uint64_t kvmclock_current_nsec(KVMClockState *s)
+{
+    CPUArchState *env = first_cpu;
+    uint64_t migration_tsc = env->tsc;
+    struct pvclock_vcpu_time_info time;
+    uint64_t delta;
+    uint64_t nsec_lo;
+    uint64_t nsec_hi;
+    uint64_t nsec;
+
+    if (!(env->system_time_msr & 1ULL)) {
+        /* KVM clock not active */
+        return 0;
+    }
+    cpu_physical_memory_read((env->system_time_msr & ~1ULL), &time, sizeof(time));
+
+    assert(time.tsc_timestamp <= migration_tsc);
+    delta = migration_tsc - time.tsc_timestamp;
+    if (time.tsc_shift < 0) {
+        delta >>= -time.tsc_shift;
+    } else {
+        delta <<= time.tsc_shift;
+    }
+
+    mulu64(&nsec_lo, &nsec_hi, delta, time.tsc_to_system_mul);
+    nsec = (nsec_lo >> 32) | (nsec_hi << 32);
+    return nsec + time.system_time;
+}
+
 static void kvmclock_pre_save(void *opaque)
 {
     KVMClockState *s = opaque;
@@ -37,6 +79,23 @@  static void kvmclock_pre_save(void *opaque)
     if (s->clock_valid) {
         return;
     }
+
+    cpu_synchronize_all_states();
+    /* In theory, the cpu_synchronize_all_states() call above wouldn't
+     * affect the rest of the code, as the VCPU state inside CPUArchState
+     * is supposed to always match the VCPU state on the kernel side.
+     *
+     * In practice, calling cpu_synchronize_state() too soon will load the
+     * kernel-side APIC state into X86CPU.apic_state too early, APIC state
+     * won't be reloaded later because CPUState.vcpu_dirty==true, and
+     * outdated APIC state may be migrated to another host.
+     *
+     * The real fix would be to make sure outdated APIC state is read
+     * from the kernel again when necessary. While this is not fixed, we
+     * need the cpu_clean_all_dirty() call below.
+     */
+    cpu_clean_all_dirty();
+
     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
     if (ret < 0) {
         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
@@ -55,6 +114,12 @@  static int kvmclock_post_load(void *opaque, int version_id)
 {
     KVMClockState *s = opaque;
     struct kvm_clock_data data;
+    uint64_t time_at_migration = kvmclock_current_nsec(s);
+
+    /* We can't rely on the migrated clock value, just discard it */
+    if (time_at_migration) {
+        s->clock = time_at_migration;
+    }
 
     data.clock = s->clock;
     data.flags = 0;
diff --git a/kvm-all.c b/kvm-all.c
index cd2ccbe..692944e 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -1547,6 +1547,11 @@  void kvm_cpu_synchronize_post_init(CPUArchState *env)
     env->kvm_vcpu_dirty = 0;
 }
 
+void kvm_cpu_clean_state(CPUArchState *env)
+{
+    env->kvm_vcpu_dirty = false;
+}
+
 int kvm_cpu_exec(CPUArchState *env)
 {
     struct kvm_run *run = env->kvm_run;
diff --git a/kvm.h b/kvm.h
index 2a68a52..92a17d8 100644
--- a/kvm.h
+++ b/kvm.h
@@ -234,6 +234,7 @@  uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function,
 void kvm_cpu_synchronize_state(CPUArchState *env);
 void kvm_cpu_synchronize_post_reset(CPUArchState *env);
 void kvm_cpu_synchronize_post_init(CPUArchState *env);
+void kvm_cpu_clean_state(CPUArchState *cpu);
 
 /* generic hooks - to be moved/refactored once there are more users */
 
@@ -258,6 +259,12 @@  static inline void cpu_synchronize_post_init(CPUArchState *env)
     }
 }
 
+static inline void cpu_clean_state(CPUArchState *env)
+{
+    if (kvm_enabled()) {
+        kvm_cpu_clean_state(env);
+    }
+}
 
 #if !defined(CONFIG_USER_ONLY)
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,