diff mbox

[qemu,2/2] kvmclock: reduce kvmclock difference on migration

Message ID 1023a283-77a7-45e5-8877-6264e08d0658@redhat.com
State New
Headers show

Commit Message

Paolo Bonzini Nov. 28, 2016, 1:47 p.m. UTC
On 17/11/2016 13:16, Marcelo Tosatti wrote:
> What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> of whether masterclock is enabled or not... it just depends
> on KVM_GET_CLOCK being correct for the masterclock case
> (108b249c453dd7132599ab6dc7e435a7036c193f).
> 
> So a "reliable KVM_GET_CLOCK" (that does not timebackward
> when masterclock is enabled) is much simpler to userspace
> than "whether masterclock is enabled or not".
> 
> If you have a reason why that should not be the case,
> let me know.

I still cannot understand this case.

If the source has masterclock _disabled_, shouldn't it read kvmclock 
from memory?  But that's not what your patch does.  To be clear, what
I mean is:

Comments

Eduardo Habkost Nov. 28, 2016, 2:28 p.m. UTC | #1
On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  But that's not what your patch does.  To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;

I still don't understand the reasoning behind
kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
but on either case, updating src_use_reliable_get_clock inside
kvm_get_clock() looks like the right thing to do.

>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }

This is equivalent to the logic I suggested on my reply to v3.
Much simpler.

>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>
Paolo Bonzini Nov. 28, 2016, 3:12 p.m. UTC | #2
On 28/11/2016 15:28, Eduardo Habkost wrote:
> > +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
> 
> I still don't understand the reasoning behind
> kvm_has_adjust_clock_stable() vs (flags & KVM_CLOCK_TSC_STABLE),
> but on either case, updating src_use_reliable_get_clock inside
> kvm_get_clock() looks like the right thing to do.

There are three possibility: the kernel tells you the clock is stable,
the kernel tells you the clock is unstable, the kernel is too old and
doesn't tell you anything.  Then:

    kvm_has_adjust_clock_stable() == true:
	if the clock is stable, KVM_CLOCK_TSC_STABLE will be set in "flags"
	if the clock is unstable, KVM_CLOCK_TSC_STABLE will be unset

    kvm_has_adjust_clock_stable() == false:
	you cannot know if the clock is stable

Paolo
Marcelo Tosatti Nov. 28, 2016, 4:36 p.m. UTC | #3
On Mon, Nov 28, 2016 at 02:47:18PM +0100, Paolo Bonzini wrote:
> 
> 
> On 17/11/2016 13:16, Marcelo Tosatti wrote:
> > What QEMU wants is to use KVM_GET_CLOCK at pre_save independently
> > of whether masterclock is enabled or not... it just depends
> > on KVM_GET_CLOCK being correct for the masterclock case
> > (108b249c453dd7132599ab6dc7e435a7036c193f).
> > 
> > So a "reliable KVM_GET_CLOCK" (that does not timebackward
> > when masterclock is enabled) is much simpler to userspace
> > than "whether masterclock is enabled or not".
> > 
> > If you have a reason why that should not be the case,
> > let me know.
> 
> I still cannot understand this case.
> 
> If the source has masterclock _disabled_, shouldn't it read kvmclock 
> from memory?  

If the source masterclock is disabled, then the guest does
not enable the optimization to not use a global variable 
to guarantee monotonicity. Therefore there will be no 
time backwards events (the timer backwards events crashed 
guests, and are the reason for reading from guest memory).

So if there are no flaws in the reasoning above, 
no, there is no need to read from memory if 
masterclock is disabled.

Can you state the reasons why you think it should be enabled?

> But that's not what your patch does.  

Its on purpose with the data available.

> To be clear, what
> I mean is:
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 653b0b4..6f6e2dc 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -97,6 +97,7 @@ static uint64_t kvm_get_clock(void)
>          fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
>                  abort();
>      }
> +    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
>      return data.clock;
>  }
>  
> @@ -110,34 +111,19 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>  
>      if (running) {
>          struct kvm_clock_data data = {};
> -        uint64_t pvclock_via_mem = 0;
>  
> -        /* local (running VM) restore */
> -        if (s->clock_valid) {
> -            /*
> -             * if host does not support reliable KVM_GET_CLOCK,
> -             * read kvmclock value from memory
> -             */
> -            if (!kvm_has_adjust_clock_stable()) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> -            }
> -        /* migration/savevm/init restore */
> -        } else {
> -            /*
> -             * use s->clock in case machine uses reliable
> -             * get clock and source host supported
> -             * reliable get clock
> -             */
> -            if (!s->src_use_reliable_get_clock) {
> -                pvclock_via_mem = kvmclock_current_nsec(s);
> +        /*
> +         * if last KVM_GET_CLOCK did not retrieve a reliable value,
> +         * we can't rely on the saved clock value.  Just discard it and
> +         * read kvmclock value from memory
> +         */
> +        if (!s->src_use_reliable_get_clock) {
> +            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
> +            if (pvclock_via_mem) {
> +                s->clock = pvclock_via_mem;
>              }
>          }
>  
> -        /* We can't rely on the saved clock value, just discard it */
> -        if (pvclock_via_mem) {
> -            s->clock = pvclock_via_mem;
> -        }
> -
>          s->clock_valid = false;
>  
>          data.clock = s->clock;
> @@ -181,16 +168,6 @@ static void kvmclock_realize(DeviceState *dev, Error **errp)
>  {
>      KVMClockState *s = KVM_CLOCK(dev);
>  
> -    /*
> -     * On machine types that support reliable KVM_GET_CLOCK,
> -     * if host kernel does provide reliable KVM_GET_CLOCK,
> -     * set src_use_reliable_get_clock=true so that destination
> -     * avoids reading kvmclock from memory.
> -     */
> -    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
> -        s->src_use_reliable_get_clock = true;
> -    }
> -
>      qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
>  }
>
Paolo Bonzini Nov. 28, 2016, 5:30 p.m. UTC | #4
On 28/11/2016 17:36, Marcelo Tosatti wrote:
> If the source masterclock is disabled, then the guest does
> not enable the optimization to not use a global variable 
> to guarantee monotonicity. Therefore there will be no 
> time backwards events (the timer backwards events crashed 
> guests, and are the reason for reading from guest memory).
> 
> So if there are no flaws in the reasoning above, 
> no, there is no need to read from memory if 
> masterclock is disabled.

Yeah, the reasoning is sound.  So you go from what Eduardo and I were
thinking:

    if last KVM_GET_CLOCK was not reliable then
        read from memory

to this:

    if last KVM_GET_CLOCK was not reliable && masterclock is enabled
        read from memory

but:

- on an old kernel, the left side is always true and the right side is
unknown (so we must assume it's true and read from memory)

- on a new kernel, the two sides of the "&&" are exactly the opposite,
so the result is always false

and then it becomes

    if old kernel then
        read from memory

Got it finally. :)

Paolo

> Can you state the reasons why you think it should be enabled?
>
diff mbox

Patch

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 653b0b4..6f6e2dc 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -97,6 +97,7 @@  static uint64_t kvm_get_clock(void)
         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
                 abort();
     }
+    s->src_use_reliable_get_clock = data.flags & KVM_CLOCK_TSC_STABLE;
     return data.clock;
 }
 
@@ -110,34 +111,19 @@  static void kvmclock_vm_state_change(void *opaque, int running,
 
     if (running) {
         struct kvm_clock_data data = {};
-        uint64_t pvclock_via_mem = 0;
 
-        /* local (running VM) restore */
-        if (s->clock_valid) {
-            /*
-             * if host does not support reliable KVM_GET_CLOCK,
-             * read kvmclock value from memory
-             */
-            if (!kvm_has_adjust_clock_stable()) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
-            }
-        /* migration/savevm/init restore */
-        } else {
-            /*
-             * use s->clock in case machine uses reliable
-             * get clock and source host supported
-             * reliable get clock
-             */
-            if (!s->src_use_reliable_get_clock) {
-                pvclock_via_mem = kvmclock_current_nsec(s);
+        /*
+         * if last KVM_GET_CLOCK did not retrieve a reliable value,
+         * we can't rely on the saved clock value.  Just discard it and
+         * read kvmclock value from memory
+         */
+        if (!s->src_use_reliable_get_clock) {
+            uint64_t pvclock_via_mem = kvmclock_current_nsec(s);
+            if (pvclock_via_mem) {
+                s->clock = pvclock_via_mem;
             }
         }
 
-        /* We can't rely on the saved clock value, just discard it */
-        if (pvclock_via_mem) {
-            s->clock = pvclock_via_mem;
-        }
-
         s->clock_valid = false;
 
         data.clock = s->clock;
@@ -181,16 +168,6 @@  static void kvmclock_realize(DeviceState *dev, Error **errp)
 {
     KVMClockState *s = KVM_CLOCK(dev);
 
-    /*
-     * On machine types that support reliable KVM_GET_CLOCK,
-     * if host kernel does provide reliable KVM_GET_CLOCK,
-     * set src_use_reliable_get_clock=true so that destination
-     * avoids reading kvmclock from memory.
-     */
-    if (s->mach_use_reliable_get_clock && kvm_has_adjust_clock_stable()) {
-        s->src_use_reliable_get_clock = true;
-    }
-
     qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
 }