diff mbox

[uq/master] kvm: x86: Separately write feature control MSR on reset

Message ID 52B0A069.8060807@siemens.com
State New
Headers show

Commit Message

Jan Kiszka Dec. 17, 2013, 7:05 p.m. UTC
If the guest is running in nested mode on system reset, clearing the
feature MSR signals the kernel to leave this mode. Recent kernels
processes this properly, but leave the VCPU state undefined behind. It
is the job of userspace to bring it to a proper shape. Therefore, write
this specific MSR first so that no state transfer gets lost.

This allows to cleanly reset a guest with VMX in use.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 target-i386/kvm.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

Comments

Paolo Bonzini Dec. 18, 2013, 9:23 a.m. UTC | #1
Il 17/12/2013 20:05, Jan Kiszka ha scritto:
> If the guest is running in nested mode on system reset, clearing the
> feature MSR signals the kernel to leave this mode. Recent kernels
> processes this properly, but leave the VCPU state undefined behind. It
> is the job of userspace to bring it to a proper shape. Therefore, write
> this specific MSR first so that no state transfer gets lost.
> 
> This allows to cleanly reset a guest with VMX in use.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  target-i386/kvm.c | 32 ++++++++++++++++++++++++++++----
>  1 file changed, 28 insertions(+), 4 deletions(-)
> 
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 1188482..ec51447 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -1104,6 +1104,25 @@ static int kvm_put_tscdeadline_msr(X86CPU *cpu)
>      return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
>  }
>  
> +/*
> + * Provide a separate write service for the feature control MSR in order to
> + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
> + * before writing any other state because forcibly leaving nested mode
> + * invalidates the VCPU state.
> + */
> +static int kvm_put_msr_feature_control(X86CPU *cpu)
> +{
> +    struct {
> +        struct kvm_msrs info;
> +        struct kvm_msr_entry entry;
> +    } msr_data;
> +
> +    kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL,
> +                      cpu->env.msr_ia32_feature_control);
> +    msr_data.info.nmsrs = 1;
> +    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
> +}
> +
>  static int kvm_put_msrs(X86CPU *cpu, int level)
>  {
>      CPUX86State *env = &cpu->env;
> @@ -1204,10 +1223,8 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>          if (cpu->hyperv_vapic) {
>              kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
>          }
> -        if (has_msr_feature_control) {
> -            kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL,
> -                              env->msr_ia32_feature_control);
> -        }
> +        /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
> +         *       kvm_put_msr_feature_control. */
>      }
>      if (env->mcg_cap) {
>          int i;
> @@ -1801,6 +1818,13 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
>  
>      assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
>  
> +    if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) {
> +        ret = kvm_put_msr_feature_control(x86_cpu);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
>      ret = kvm_getput_regs(x86_cpu, 1);
>      if (ret < 0) {
>          return ret;
> 

Applied, thanks!
diff mbox

Patch

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 1188482..ec51447 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1104,6 +1104,25 @@  static int kvm_put_tscdeadline_msr(X86CPU *cpu)
     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
 }
 
+/*
+ * Provide a separate write service for the feature control MSR in order to
+ * kick the VCPU out of VMXON or even guest mode on reset. This has to be done
+ * before writing any other state because forcibly leaving nested mode
+ * invalidates the VCPU state.
+ */
+static int kvm_put_msr_feature_control(X86CPU *cpu)
+{
+    struct {
+        struct kvm_msrs info;
+        struct kvm_msr_entry entry;
+    } msr_data;
+
+    kvm_msr_entry_set(&msr_data.entry, MSR_IA32_FEATURE_CONTROL,
+                      cpu->env.msr_ia32_feature_control);
+    msr_data.info.nmsrs = 1;
+    return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, &msr_data);
+}
+
 static int kvm_put_msrs(X86CPU *cpu, int level)
 {
     CPUX86State *env = &cpu->env;
@@ -1204,10 +1223,8 @@  static int kvm_put_msrs(X86CPU *cpu, int level)
         if (cpu->hyperv_vapic) {
             kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_APIC_ASSIST_PAGE, 0);
         }
-        if (has_msr_feature_control) {
-            kvm_msr_entry_set(&msrs[n++], MSR_IA32_FEATURE_CONTROL,
-                              env->msr_ia32_feature_control);
-        }
+        /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+         *       kvm_put_msr_feature_control. */
     }
     if (env->mcg_cap) {
         int i;
@@ -1801,6 +1818,13 @@  int kvm_arch_put_registers(CPUState *cpu, int level)
 
     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
 
+    if (level >= KVM_PUT_RESET_STATE && has_msr_feature_control) {
+        ret = kvm_put_msr_feature_control(x86_cpu);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     ret = kvm_getput_regs(x86_cpu, 1);
     if (ret < 0) {
         return ret;