Patchwork kvmclock: clock should count only if vm is running

login
register
mail settings
Submitter Marcelo Tosatti
Date June 8, 2013, 2 a.m.
Message ID <20130608020048.GA1412@amt.cnet>
Download mbox | patch
Permalink /patch/249913/
State New
Headers show

Comments

Marcelo Tosatti - June 8, 2013, 2 a.m.
kvmclock should not count while vm is paused, because:

1) if the vm is paused for long periods, timekeeping 
math can overflow while converting the (large) clocksource 
delta to nanoseconds.

2) Users rely on CLOCK_MONOTONIC to count run time, that is, 
time which OS has been in a runnable state (see CLOCK_BOOTTIME).

Change kvmclock driver so as to save clock value when vm transitions
from runnable to stopped state, and to restore clock value from stopped
to runnable transition.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Paolo Bonzini - June 18, 2013, 9:02 a.m.
Hi Marcelo, sorry for the late review.

Il 08/06/2013 04:00, Marcelo Tosatti ha scritto:
> kvmclock should not count while vm is paused, because:
> 
> 1) if the vm is paused for long periods, timekeeping 
> math can overflow while converting the (large) clocksource 
> delta to nanoseconds.
> 
> 2) Users rely on CLOCK_MONOTONIC to count run time, that is, 
> time which OS has been in a runnable state (see CLOCK_BOOTTIME).

Do you have any ideas on how to implement CLOCK_BOOTTIME for kvmclock?
I think we need to add more fields for the delta between CLOCK_MONOTONIC
and CLOCK_BOOTTIME.

> Change kvmclock driver so as to save clock value when vm transitions
> from runnable to stopped state, and to restore clock value from stopped
> to runnable transition.
> 
> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> 
> diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> index 87d4d0f..7d2d005 100644
> --- a/hw/i386/kvm/clock.c
> +++ b/hw/i386/kvm/clock.c
> @@ -28,38 +28,6 @@ typedef struct KVMClockState {
>      bool clock_valid;
>  } KVMClockState;
>  
> -static void kvmclock_pre_save(void *opaque)
> -{
> -    KVMClockState *s = opaque;
> -    struct kvm_clock_data data;
> -    int ret;
> -
> -    if (s->clock_valid) {
> -        return;
> -    }
> -    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
> -    if (ret < 0) {
> -        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
> -        data.clock = 0;
> -    }
> -    s->clock = data.clock;
> -    /*
> -     * If the VM is stopped, declare the clock state valid to avoid re-reading
> -     * it on next vmsave (which would return a different value). Will be reset
> -     * when the VM is continued.
> -     */
> -    s->clock_valid = !runstate_is_running();
> -}
> -
> -static int kvmclock_post_load(void *opaque, int version_id)
> -{
> -    KVMClockState *s = opaque;
> -    struct kvm_clock_data data;
> -
> -    data.clock = s->clock;
> -    data.flags = 0;
> -    return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
> -}
>  
>  static void kvmclock_vm_state_change(void *opaque, int running,
>                                       RunState state)
> @@ -70,8 +38,18 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>      int ret;
>  
>      if (running) {
> +        struct kvm_clock_data data;
> +

Do we need an "if (!s->clock_valid) return;" here, or an assertion?  (Or
alternatively, what happens if s->clock_valid == false?)

>          s->clock_valid = false;
>  
> +        data.clock = s->clock;
> +        data.flags = 0;
> +        ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
> +        if (ret < 0) {
> +            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret));
> +            abort();
> +        }
> +
>          if (!cap_clock_ctrl) {
>              return;
>          }
> @@ -84,6 +62,26 @@ static void kvmclock_vm_state_change(void *opaque, int running,
>                  return;
>              }
>          }
> +    } else {
> +        struct kvm_clock_data data;
> +        int ret;
> +
> +        if (s->clock_valid) {
> +            return;
> +        }
> +        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
> +        if (ret < 0) {
> +            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
> +            abort();
> +        }
> +        s->clock = data.clock;
> +
> +        /*
> +         * If the VM is stopped, declare the clock state valid to
> +         * avoid re-reading it on next vmsave (which would return
> +         * a different value). Will be reset when the VM is continued.
> +         */
> +        s->clock_valid = !runstate_is_running();

Here we know that !runstate_is_running() is true (we're in the else
branch of "if (running)") so it can just be "s->clock_valid = true".
This matches the false assignment in the other branch.  This is just a
nit, but it makes the code clearer.

Paolo

>      }
>  }
>  
> @@ -100,8 +98,6 @@ static const VMStateDescription kvmclock_vmsd = {
>      .version_id = 1,
>      .minimum_version_id = 1,
>      .minimum_version_id_old = 1,
> -    .pre_save = kvmclock_pre_save,
> -    .post_load = kvmclock_post_load,
>      .fields = (VMStateField[]) {
>          VMSTATE_UINT64(clock, KVMClockState),
>          VMSTATE_END_OF_LIST()
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
Marcelo Tosatti - June 18, 2013, 11:20 p.m.
On Tue, Jun 18, 2013 at 11:02:27AM +0200, Paolo Bonzini wrote:
> Hi Marcelo, sorry for the late review.
> 
> Il 08/06/2013 04:00, Marcelo Tosatti ha scritto:
> > kvmclock should not count while vm is paused, because:
> > 
> > 1) if the vm is paused for long periods, timekeeping 
> > math can overflow while converting the (large) clocksource 
> > delta to nanoseconds.
> > 
> > 2) Users rely on CLOCK_MONOTONIC to count run time, that is, 
> > time which OS has been in a runnable state (see CLOCK_BOOTTIME).
> 
> Do you have any ideas on how to implement CLOCK_BOOTTIME for kvmclock?
> I think we need to add more fields for the delta between CLOCK_MONOTONIC
> and CLOCK_BOOTTIME.

Unsure. An alternative to new fields is to use MSR_KVM_WALL_CLOCK
interface. Looking at two possibilities for catching up with real time
on unpause or vmload:

1) Catch up realtime via guest agent.
2) Catch up realtime via kvmclock interface.

But it would be good to have proper CLOCK_BOOTTIME behaviour at the same
time.

> > Change kvmclock driver so as to save clock value when vm transitions
> > from runnable to stopped state, and to restore clock value from stopped
> > to runnable transition.
> > 
> > Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
> > 
> > diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
> > index 87d4d0f..7d2d005 100644
> > --- a/hw/i386/kvm/clock.c
> > +++ b/hw/i386/kvm/clock.c
> > @@ -28,38 +28,6 @@ typedef struct KVMClockState {
> >      bool clock_valid;
> >  } KVMClockState;
> >  
> > -static void kvmclock_pre_save(void *opaque)
> > -{
> > -    KVMClockState *s = opaque;
> > -    struct kvm_clock_data data;
> > -    int ret;
> > -
> > -    if (s->clock_valid) {
> > -        return;
> > -    }
> > -    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
> > -    if (ret < 0) {
> > -        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
> > -        data.clock = 0;
> > -    }
> > -    s->clock = data.clock;
> > -    /*
> > -     * If the VM is stopped, declare the clock state valid to avoid re-reading
> > -     * it on next vmsave (which would return a different value). Will be reset
> > -     * when the VM is continued.
> > -     */
> > -    s->clock_valid = !runstate_is_running();
> > -}
> > -
> > -static int kvmclock_post_load(void *opaque, int version_id)
> > -{
> > -    KVMClockState *s = opaque;
> > -    struct kvm_clock_data data;
> > -
> > -    data.clock = s->clock;
> > -    data.flags = 0;
> > -    return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
> > -}
> >  
> >  static void kvmclock_vm_state_change(void *opaque, int running,
> >                                       RunState state)
> > @@ -70,8 +38,18 @@ static void kvmclock_vm_state_change(void *opaque, int running,
> >      int ret;
> >  
> >      if (running) {
> > +        struct kvm_clock_data data;
> > +
> 
> Do we need an "if (!s->clock_valid) return;" here, or an assertion?  (Or
> alternatively, what happens if s->clock_valid == false?)


s->clock_valid = true means the clock value at s->clock (saved in
QEMU) is valid. 

At the moment VM begins execution, it becomes invalid (which means
synchronizing that value is necessary).

For the stopped->running transition, it has no meaning (just as the load
functions assume the device state is valid when they are called).

That said, i see no reason for additional checks.

> > +         */
> > +        s->clock_valid = !runstate_is_running();
> 
> Here we know that !runstate_is_running() is true (we're in the else
> branch of "if (running)") so it can just be "s->clock_valid = true".
> This matches the false assignment in the other branch.  This is just a
> nit, but it makes the code clearer.
> 
> Paolo

Sure, resending.

Patch

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 87d4d0f..7d2d005 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -28,38 +28,6 @@  typedef struct KVMClockState {
     bool clock_valid;
 } KVMClockState;
 
-static void kvmclock_pre_save(void *opaque)
-{
-    KVMClockState *s = opaque;
-    struct kvm_clock_data data;
-    int ret;
-
-    if (s->clock_valid) {
-        return;
-    }
-    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
-    if (ret < 0) {
-        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
-        data.clock = 0;
-    }
-    s->clock = data.clock;
-    /*
-     * If the VM is stopped, declare the clock state valid to avoid re-reading
-     * it on next vmsave (which would return a different value). Will be reset
-     * when the VM is continued.
-     */
-    s->clock_valid = !runstate_is_running();
-}
-
-static int kvmclock_post_load(void *opaque, int version_id)
-{
-    KVMClockState *s = opaque;
-    struct kvm_clock_data data;
-
-    data.clock = s->clock;
-    data.flags = 0;
-    return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
-}
 
 static void kvmclock_vm_state_change(void *opaque, int running,
                                      RunState state)
@@ -70,8 +38,18 @@  static void kvmclock_vm_state_change(void *opaque, int running,
     int ret;
 
     if (running) {
+        struct kvm_clock_data data;
+
         s->clock_valid = false;
 
+        data.clock = s->clock;
+        data.flags = 0;
+        ret = kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
+        if (ret < 0) {
+            fprintf(stderr, "KVM_SET_CLOCK failed: %s\n", strerror(ret));
+            abort();
+        }
+
         if (!cap_clock_ctrl) {
             return;
         }
@@ -84,6 +62,26 @@  static void kvmclock_vm_state_change(void *opaque, int running,
                 return;
             }
         }
+    } else {
+        struct kvm_clock_data data;
+        int ret;
+
+        if (s->clock_valid) {
+            return;
+        }
+        ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+        if (ret < 0) {
+            fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+            abort();
+        }
+        s->clock = data.clock;
+
+        /*
+         * If the VM is stopped, declare the clock state valid to
+         * avoid re-reading it on next vmsave (which would return
+         * a different value). Will be reset when the VM is continued.
+         */
+        s->clock_valid = !runstate_is_running();
     }
 }
 
@@ -100,8 +98,6 @@  static const VMStateDescription kvmclock_vmsd = {
     .version_id = 1,
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
-    .pre_save = kvmclock_pre_save,
-    .post_load = kvmclock_post_load,
     .fields = (VMStateField[]) {
         VMSTATE_UINT64(clock, KVMClockState),
         VMSTATE_END_OF_LIST()