Message ID | 09d3dc5c4cc2a36e61d2584b1b706143b9ce7765.1267021065.git.jan.kiszka@siemens.com |
---|---|
State | New |
Headers | show |
On Wed, Feb 24, 2010 at 03:17:53PM +0100, Jan Kiszka wrote: > Do not write nmi_pending, sipi_vector, and mpstate unless we at least go > through a reset. And TSC as well as KVM wallclocks should only be > written on full sync, otherwise we risk to drop some time on during > state read-modify-write. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > kvm.h | 2 +- > qemu-kvm-x86.c | 2 +- > target-i386/kvm.c | 32 ++++++++++++++++++++------------ > target-i386/machine.c | 2 +- > 4 files changed, 23 insertions(+), 15 deletions(-) > > diff --git a/kvm.h b/kvm.h > index 3ec5b59..3ee307d 100644 > --- a/kvm.h > +++ b/kvm.h > @@ -44,7 +44,7 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size); > int kvm_has_sync_mmu(void); > int kvm_has_vcpu_events(void); > int kvm_has_robust_singlestep(void); > -int kvm_put_vcpu_events(CPUState *env); > +int kvm_put_vcpu_events(CPUState *env, int level); > int kvm_get_vcpu_events(CPUState *env); > > void kvm_cpu_register_phys_memory_client(void); > diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c > index 4e6ae70..b0f9670 100644 > --- a/qemu-kvm-x86.c > +++ b/qemu-kvm-x86.c > @@ -1391,7 +1391,7 @@ void kvm_arch_push_nmi(void *opaque) > void kvm_arch_cpu_reset(CPUState *env) > { > kvm_arch_reset_vcpu(env); > - kvm_put_vcpu_events(env); > + kvm_put_vcpu_events(env, KVM_PUT_RESET_STATE); > kvm_reset_mpstate(env); > if (!cpu_is_bsp(env) && !kvm_irqchip_in_kernel()) { > env->interrupt_request &= ~CPU_INTERRUPT_HARD; > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index 5f0829b..f1f44d3 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -541,7 +541,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, > entry->data = value; > } > > -static int kvm_put_msrs(CPUState *env) > +static int kvm_put_msrs(CPUState *env, int level) > { > struct { > struct kvm_msrs info; > @@ -555,7 +555,6 @@ static int kvm_put_msrs(CPUState *env) > kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); > if (kvm_has_msr_star(env)) > kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); > - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); > kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); > #ifdef TARGET_X86_64 > /* FIXME if lm capable */ > @@ -564,8 +563,12 @@ static int kvm_put_msrs(CPUState *env) > kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); > kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); > #endif > - kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); > - kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); > + if (level == KVM_PUT_FULL_STATE) { > + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); > + kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, > + env->system_time_msr); > + kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); > + } > > msr_data.info.nmsrs = n; > > @@ -783,7 +786,7 @@ static int kvm_get_mp_state(CPUState *env) > } > #endif > > -int kvm_put_vcpu_events(CPUState *env) > +int kvm_put_vcpu_events(CPUState *env, int level) > { > #ifdef KVM_CAP_VCPU_EVENTS > struct kvm_vcpu_events events; > @@ -807,8 +810,11 @@ int kvm_put_vcpu_events(CPUState *env) > > events.sipi_vector = env->sipi_vector; > > - events.flags = > - KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; > + events.flags = 0; > + if (level >= KVM_PUT_RESET_STATE) { > + events.flags |= > + KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; > + } > > return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events); What is the reason for write-back of any vcpu-event state for RUNTIME case again? The debug workaround?
Marcelo Tosatti wrote: > On Wed, Feb 24, 2010 at 03:17:53PM +0100, Jan Kiszka wrote: >> Do not write nmi_pending, sipi_vector, and mpstate unless we at least go >> through a reset. And TSC as well as KVM wallclocks should only be >> written on full sync, otherwise we risk to drop some time on during >> state read-modify-write. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> >> --- >> kvm.h | 2 +- >> qemu-kvm-x86.c | 2 +- >> target-i386/kvm.c | 32 ++++++++++++++++++++------------ >> target-i386/machine.c | 2 +- >> 4 files changed, 23 insertions(+), 15 deletions(-) >> >> diff --git a/kvm.h b/kvm.h >> index 3ec5b59..3ee307d 100644 >> --- a/kvm.h >> +++ b/kvm.h >> @@ -44,7 +44,7 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size); >> int kvm_has_sync_mmu(void); >> int kvm_has_vcpu_events(void); >> int kvm_has_robust_singlestep(void); >> -int kvm_put_vcpu_events(CPUState *env); >> +int kvm_put_vcpu_events(CPUState *env, int level); >> int kvm_get_vcpu_events(CPUState *env); >> >> void kvm_cpu_register_phys_memory_client(void); >> diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c >> index 4e6ae70..b0f9670 100644 >> --- a/qemu-kvm-x86.c >> +++ b/qemu-kvm-x86.c >> @@ -1391,7 +1391,7 @@ void kvm_arch_push_nmi(void *opaque) >> void kvm_arch_cpu_reset(CPUState *env) >> { >> kvm_arch_reset_vcpu(env); >> - kvm_put_vcpu_events(env); >> + kvm_put_vcpu_events(env, KVM_PUT_RESET_STATE); >> kvm_reset_mpstate(env); >> if (!cpu_is_bsp(env) && !kvm_irqchip_in_kernel()) { >> env->interrupt_request &= ~CPU_INTERRUPT_HARD; >> diff --git a/target-i386/kvm.c b/target-i386/kvm.c >> index 5f0829b..f1f44d3 100644 >> --- a/target-i386/kvm.c >> +++ b/target-i386/kvm.c >> @@ -541,7 +541,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, >> entry->data = value; >> } >> >> -static int kvm_put_msrs(CPUState *env) >> +static int kvm_put_msrs(CPUState *env, int level) >> { >> struct { >> struct kvm_msrs info; >> @@ -555,7 +555,6 @@ static int kvm_put_msrs(CPUState *env) >> kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); >> if (kvm_has_msr_star(env)) >> kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); >> - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); >> kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); >> #ifdef TARGET_X86_64 >> /* FIXME if lm capable */ >> @@ -564,8 +563,12 @@ static int kvm_put_msrs(CPUState *env) >> kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); >> kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); >> #endif >> - kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); >> - kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); >> + if (level == KVM_PUT_FULL_STATE) { >> + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); >> + kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, >> + env->system_time_msr); >> + kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); >> + } >> >> msr_data.info.nmsrs = n; >> >> @@ -783,7 +786,7 @@ static int kvm_get_mp_state(CPUState *env) >> } >> #endif >> >> -int kvm_put_vcpu_events(CPUState *env) >> +int kvm_put_vcpu_events(CPUState *env, int level) >> { >> #ifdef KVM_CAP_VCPU_EVENTS >> struct kvm_vcpu_events events; >> @@ -807,8 +810,11 @@ int kvm_put_vcpu_events(CPUState *env) >> >> events.sipi_vector = env->sipi_vector; >> >> - events.flags = >> - KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; >> + events.flags = 0; >> + if (level >= KVM_PUT_RESET_STATE) { >> + events.flags |= >> + KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; >> + } >> >> return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events); > > What is the reason for write-back of any vcpu-event state for RUNTIME > case again? > > The debug workaround? Consistency and maximum flexibility. I don't want to start fiddling with this again when we start to manipulate some VCPU runtime state that may not require writeback yet (workarounds like the guest debugging stuff can be a reason for that). Instead, we should now establish a clean concept that only knows those three types and their well-defined writeback points. Jan
diff --git a/kvm.h b/kvm.h index 3ec5b59..3ee307d 100644 --- a/kvm.h +++ b/kvm.h @@ -44,7 +44,7 @@ int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size); int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); int kvm_has_robust_singlestep(void); -int kvm_put_vcpu_events(CPUState *env); +int kvm_put_vcpu_events(CPUState *env, int level); int kvm_get_vcpu_events(CPUState *env); void kvm_cpu_register_phys_memory_client(void); diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 4e6ae70..b0f9670 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1391,7 +1391,7 @@ void kvm_arch_push_nmi(void *opaque) void kvm_arch_cpu_reset(CPUState *env) { kvm_arch_reset_vcpu(env); - kvm_put_vcpu_events(env); + kvm_put_vcpu_events(env, KVM_PUT_RESET_STATE); kvm_reset_mpstate(env); if (!cpu_is_bsp(env) && !kvm_irqchip_in_kernel()) { env->interrupt_request &= ~CPU_INTERRUPT_HARD; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 5f0829b..f1f44d3 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -541,7 +541,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, entry->data = value; } -static int kvm_put_msrs(CPUState *env) +static int kvm_put_msrs(CPUState *env, int level) { struct { struct kvm_msrs info; @@ -555,7 +555,6 @@ static int kvm_put_msrs(CPUState *env) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); if (kvm_has_msr_star(env)) kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); #ifdef TARGET_X86_64 /* FIXME if lm capable */ @@ -564,8 +563,12 @@ static int kvm_put_msrs(CPUState *env) kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); #endif - kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); - kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); + if (level == KVM_PUT_FULL_STATE) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); + kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, + env->system_time_msr); + kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); + } msr_data.info.nmsrs = n; @@ -783,7 +786,7 @@ static int kvm_get_mp_state(CPUState *env) } #endif -int kvm_put_vcpu_events(CPUState *env) +int kvm_put_vcpu_events(CPUState *env, int level) { #ifdef KVM_CAP_VCPU_EVENTS struct kvm_vcpu_events events; @@ -807,8 +810,11 @@ int kvm_put_vcpu_events(CPUState *env) events.sipi_vector = env->sipi_vector; - events.flags = - KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; + events.flags = 0; + if (level >= KVM_PUT_RESET_STATE) { + events.flags |= + KVM_VCPUEVENT_VALID_NMI_PENDING | KVM_VCPUEVENT_VALID_SIPI_VECTOR; + } return kvm_vcpu_ioctl(env, KVM_SET_VCPU_EVENTS, &events); #else @@ -901,15 +907,17 @@ int kvm_arch_put_registers(CPUState *env, int level) if (ret < 0) return ret; - ret = kvm_put_msrs(env); + ret = kvm_put_msrs(env, level); if (ret < 0) return ret; - ret = kvm_put_mp_state(env); - if (ret < 0) - return ret; + if (level >= KVM_PUT_RESET_STATE) { + ret = kvm_put_mp_state(env); + if (ret < 0) + return ret; + } - ret = kvm_put_vcpu_events(env); + ret = kvm_put_vcpu_events(env, level); if (ret < 0) return ret; diff --git a/target-i386/machine.c b/target-i386/machine.c index 61e6a87..6fca559 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -362,7 +362,7 @@ static int cpu_post_load(void *opaque, int version_id) if (kvm_enabled()) { kvm_load_tsc(env); - kvm_put_vcpu_events(env); + kvm_put_vcpu_events(env, KVM_PUT_FULL_STATE); } return 0;
Do not write nmi_pending, sipi_vector, and mpstate unless we at least go through a reset. And TSC as well as KVM wallclocks should only be written on full sync, otherwise we risk to drop some time on during state read-modify-write. Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> --- kvm.h | 2 +- qemu-kvm-x86.c | 2 +- target-i386/kvm.c | 32 ++++++++++++++++++++------------ target-i386/machine.c | 2 +- 4 files changed, 23 insertions(+), 15 deletions(-)