diff mbox

[v3,07/10] qemu-kvm: Cleanup/fix TSC and PV clock writeback

Message ID ac600f145451efb7233e5ab293be2e0f49c904f6.1267021065.git.jan.kiszka@siemens.com
State New
Headers show

Commit Message

Jan Kiszka Feb. 24, 2010, 2:17 p.m. UTC
Drop kvm_load_tsc in favor of level-dependent writeback in
kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
should therefore only be written back on full sync.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 qemu-kvm-x86.c        |   19 +++++--------------
 qemu-kvm.h            |    4 ----
 target-i386/machine.c |    5 -----
 3 files changed, 5 insertions(+), 23 deletions(-)

Comments

Marcelo Tosatti Feb. 24, 2010, 11:17 p.m. UTC | #1
On Wed, Feb 24, 2010 at 03:17:55PM +0100, Jan Kiszka wrote:
> Drop kvm_load_tsc in favor of level-dependent writeback in
> kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
> should therefore only be written back on full sync.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  qemu-kvm-x86.c        |   19 +++++--------------
>  qemu-kvm.h            |    4 ----
>  target-i386/machine.c |    5 -----
>  3 files changed, 5 insertions(+), 23 deletions(-)
> 
> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
> index 840c1c9..84fd7fa 100644
> --- a/qemu-kvm-x86.c
> +++ b/qemu-kvm-x86.c
> @@ -965,8 +965,11 @@ void kvm_arch_load_regs(CPUState *env, int level)
>          set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
>      }
>  #endif
> -    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
> -    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
> +    if (level == KVM_PUT_FULL_STATE) {
> +        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
> +        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
> +        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
> +    }

As things stand today, the TSC should only be written on migration. See
53f658b3c33616a4997ee254311b335e59063289 in the kernel.
Jan Kiszka Feb. 24, 2010, 11:45 p.m. UTC | #2
Marcelo Tosatti wrote:
> On Wed, Feb 24, 2010 at 03:17:55PM +0100, Jan Kiszka wrote:
>> Drop kvm_load_tsc in favor of level-dependent writeback in
>> kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
>> should therefore only be written back on full sync.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>> ---
>>  qemu-kvm-x86.c        |   19 +++++--------------
>>  qemu-kvm.h            |    4 ----
>>  target-i386/machine.c |    5 -----
>>  3 files changed, 5 insertions(+), 23 deletions(-)
>>
>> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
>> index 840c1c9..84fd7fa 100644
>> --- a/qemu-kvm-x86.c
>> +++ b/qemu-kvm-x86.c
>> @@ -965,8 +965,11 @@ void kvm_arch_load_regs(CPUState *env, int level)
>>          set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
>>      }
>>  #endif
>> -    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
>> -    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
>> +    if (level == KVM_PUT_FULL_STATE) {
>> +        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
>> +        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
>> +        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
>> +    }
> 
> As things stand today, the TSC should only be written on migration. See
> 53f658b3c33616a4997ee254311b335e59063289 in the kernel.

Migration and power-up - that's what this patch ensures (=>
KVM_PUT_FULL_STATE). Or where do you see any problem?

Jan
Marcelo Tosatti Feb. 24, 2010, 11:49 p.m. UTC | #3
On Thu, Feb 25, 2010 at 12:45:55AM +0100, Jan Kiszka wrote:
> Marcelo Tosatti wrote:
> > On Wed, Feb 24, 2010 at 03:17:55PM +0100, Jan Kiszka wrote:
> >> Drop kvm_load_tsc in favor of level-dependent writeback in
> >> kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
> >> should therefore only be written back on full sync.
> >>
> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >> ---
> >>  qemu-kvm-x86.c        |   19 +++++--------------
> >>  qemu-kvm.h            |    4 ----
> >>  target-i386/machine.c |    5 -----
> >>  3 files changed, 5 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
> >> index 840c1c9..84fd7fa 100644
> >> --- a/qemu-kvm-x86.c
> >> +++ b/qemu-kvm-x86.c
> >> @@ -965,8 +965,11 @@ void kvm_arch_load_regs(CPUState *env, int level)
> >>          set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
> >>      }
> >>  #endif
> >> -    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
> >> -    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
> >> +    if (level == KVM_PUT_FULL_STATE) {
> >> +        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
> >> +        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
> >> +        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
> >> +    }
> > 
> > As things stand today, the TSC should only be written on migration. See
> > 53f658b3c33616a4997ee254311b335e59063289 in the kernel.
> 
> Migration and power-up - that's what this patch ensures (=>
> KVM_PUT_FULL_STATE). Or where do you see any problem?
> 
> Jan
> 

The problem is it should not write on power up (the kernel attempts
to synchronize the TSCs in that case, see the commit).
Jan Kiszka Feb. 24, 2010, 11:58 p.m. UTC | #4
Marcelo Tosatti wrote:
> On Thu, Feb 25, 2010 at 12:45:55AM +0100, Jan Kiszka wrote:
>> Marcelo Tosatti wrote:
>>> On Wed, Feb 24, 2010 at 03:17:55PM +0100, Jan Kiszka wrote:
>>>> Drop kvm_load_tsc in favor of level-dependent writeback in
>>>> kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
>>>> should therefore only be written back on full sync.
>>>>
>>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
>>>> ---
>>>>  qemu-kvm-x86.c        |   19 +++++--------------
>>>>  qemu-kvm.h            |    4 ----
>>>>  target-i386/machine.c |    5 -----
>>>>  3 files changed, 5 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
>>>> index 840c1c9..84fd7fa 100644
>>>> --- a/qemu-kvm-x86.c
>>>> +++ b/qemu-kvm-x86.c
>>>> @@ -965,8 +965,11 @@ void kvm_arch_load_regs(CPUState *env, int level)
>>>>          set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
>>>>      }
>>>>  #endif
>>>> -    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
>>>> -    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
>>>> +    if (level == KVM_PUT_FULL_STATE) {
>>>> +        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
>>>> +        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
>>>> +        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
>>>> +    }
>>> As things stand today, the TSC should only be written on migration. See
>>> 53f658b3c33616a4997ee254311b335e59063289 in the kernel.
>> Migration and power-up - that's what this patch ensures (=>
>> KVM_PUT_FULL_STATE). Or where do you see any problem?
>>
>> Jan
>>
> 
> The problem is it should not write on power up (the kernel attempts
> to synchronize the TSCs in that case, see the commit).
> 

OK, need to read this more carefully.

I do not yet understand the difference from user space POV: it tries to
transfer the identical TSC values to all currently stopped VCPU threads.
That should not be different if we are booting a fresh VM or loading a
complete state of a migrated image. If it does, it looks like a KVM
kernel deficit on first glance.

Jan
Marcelo Tosatti Feb. 25, 2010, 3:58 a.m. UTC | #5
On Thu, Feb 25, 2010 at 12:58:26AM +0100, Jan Kiszka wrote:
> Marcelo Tosatti wrote:
> > On Thu, Feb 25, 2010 at 12:45:55AM +0100, Jan Kiszka wrote:
> >> Marcelo Tosatti wrote:
> >>> On Wed, Feb 24, 2010 at 03:17:55PM +0100, Jan Kiszka wrote:
> >>>> Drop kvm_load_tsc in favor of level-dependent writeback in
> >>>> kvm_arch_load_regs. KVM's PV clock MSRs fall in the same category and
> >>>> should therefore only be written back on full sync.
> >>>>
> >>>> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> >>>> ---
> >>>>  qemu-kvm-x86.c        |   19 +++++--------------
> >>>>  qemu-kvm.h            |    4 ----
> >>>>  target-i386/machine.c |    5 -----
> >>>>  3 files changed, 5 insertions(+), 23 deletions(-)
> >>>>
> >>>> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
> >>>> index 840c1c9..84fd7fa 100644
> >>>> --- a/qemu-kvm-x86.c
> >>>> +++ b/qemu-kvm-x86.c
> >>>> @@ -965,8 +965,11 @@ void kvm_arch_load_regs(CPUState *env, int level)
> >>>>          set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
> >>>>      }
> >>>>  #endif
> >>>> -    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
> >>>> -    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
> >>>> +    if (level == KVM_PUT_FULL_STATE) {
> >>>> +        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
> >>>> +        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
> >>>> +        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
> >>>> +    }
> >>> As things stand today, the TSC should only be written on migration. See
> >>> 53f658b3c33616a4997ee254311b335e59063289 in the kernel.
> >> Migration and power-up - that's what this patch ensures (=>
> >> KVM_PUT_FULL_STATE). Or where do you see any problem?
> >>
> >> Jan
> >>
> > 
> > The problem is it should not write on power up (the kernel attempts
> > to synchronize the TSCs in that case, see the commit).
> > 
> 
> OK, need to read this more carefully.
> 
> I do not yet understand the difference from user space POV: it tries to
> transfer the identical TSC values to all currently stopped VCPU threads.

guest tsc = host tsc + offset

So at the time you set_msr(TSC), the guest visible TSC starts ticking. 
For SMP guests, this does not happen exactly at the same time for all
vcpus.

> That should not be different if we are booting a fresh VM or loading a
> complete state of a migrated image. If it does, it looks like a KVM
> kernel deficit on first glance.

Yes it is a deficit. After migration TSCs of SMP guests go out of sync.
Zachary is working on that.

> 
> Jan
>
diff mbox

Patch

diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c
index 840c1c9..84fd7fa 100644
--- a/qemu-kvm-x86.c
+++ b/qemu-kvm-x86.c
@@ -965,8 +965,11 @@  void kvm_arch_load_regs(CPUState *env, int level)
         set_msr_entry(&msrs[n++], MSR_LSTAR  ,           env->lstar);
     }
 #endif
-    set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME,  env->system_time_msr);
-    set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK,  env->wall_clock_msr);
+    if (level == KVM_PUT_FULL_STATE) {
+        set_msr_entry(&msrs[n++], MSR_IA32_TSC, env->tsc);
+        set_msr_entry(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr);
+        set_msr_entry(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
+    }
 
     rc = kvm_set_msrs(env, msrs, n);
     if (rc == -1)
@@ -986,18 +989,6 @@  void kvm_arch_load_regs(CPUState *env, int level)
     kvm_guest_debug_workarounds(env);
 }
 
-void kvm_load_tsc(CPUState *env)
-{
-    int rc;
-    struct kvm_msr_entry msr;
-
-    set_msr_entry(&msr, MSR_IA32_TSC, env->tsc);
-
-    rc = kvm_set_msrs(env, &msr, 1);
-    if (rc == -1)
-        perror("kvm_set_tsc FAILED.\n");
-}
-
 void kvm_arch_save_regs(CPUState *env)
 {
     struct kvm_regs regs;
diff --git a/qemu-kvm.h b/qemu-kvm.h
index c6ff652..827cac5 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -958,7 +958,6 @@  int handle_tpr_access(void *opaque, CPUState *env, uint64_t rip,
 #ifdef TARGET_I386
 #define qemu_kvm_has_pit_state2() kvm_has_pit_state2(kvm_context)
 #endif
-void kvm_load_tsc(CPUState *env);
 #else
 #define kvm_nested 0
 #define qemu_kvm_has_gsi_routing() (0)
@@ -966,9 +965,6 @@  void kvm_load_tsc(CPUState *env);
 #define qemu_kvm_has_pit_state2() (0)
 #endif
 #define qemu_kvm_cpu_stop(env) do {} while(0)
-static inline void kvm_load_tsc(CPUState *env)
-{
-}
 #endif
 
 void kvm_mutex_unlock(void);
diff --git a/target-i386/machine.c b/target-i386/machine.c
index bcc315b..b547e2a 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -354,11 +354,6 @@  static int cpu_post_load(void *opaque, int version_id)
         hw_breakpoint_insert(env, i);
 
     tlb_flush(env, 1);
-
-    if (kvm_enabled()) {
-        kvm_load_tsc(env);
-    }
-
     return 0;
 }