diff mbox

[v2,2/3] x86: kvm: Add MTRR support for kvm_get|put_msrs()

Message ID 20140814192409.13303.58779.stgit@gimli.home
State New
Headers show

Commit Message

Alex Williamson Aug. 14, 2014, 7:24 p.m. UTC
The MTRR state in KVM currently runs completely independent of the
QEMU state in CPUX86State.mtrr_*.  This means that on migration, the
target loses MTRR state from the source.  Generally that's ok though
because KVM ignores it and maps everything as write-back anyway.  The
exception to this rule is when we have an assigned device and an IOMMU
that doesn't promote NoSnoop transactions from that device to be cache
coherent.  In that case KVM trusts the guest mapping of memory as
configured in the MTRR.

This patch updates kvm_get|put_msrs() so that we retrieve the actual
vCPU MTRR settings and therefore keep CPUX86State synchronized for
migration.  kvm_put_msrs() is also used on vCPU reset and therefore
allows future modificaitons of MTRR state at reset to be realized.

Note that the entries array used by both functions was already
slightly undersized for holding every possible MSR, so this patch
increases it beyond the 28 new entries necessary for MTRR state.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: qemu-stable@nongnu.org
---

 target-i386/cpu.h |    2 +
 target-i386/kvm.c |  101 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 2 deletions(-)

Comments

Laszlo Ersek Aug. 14, 2014, 9:20 p.m. UTC | #1
You're going to use my name in contexts that I won't wish to be privy
to. :) I like everything about this patch except:

On 08/14/14 21:24, Alex Williamson wrote:
> The MTRR state in KVM currently runs completely independent of the
> QEMU state in CPUX86State.mtrr_*.  This means that on migration, the
> target loses MTRR state from the source.  Generally that's ok though
> because KVM ignores it and maps everything as write-back anyway.  The
> exception to this rule is when we have an assigned device and an IOMMU
> that doesn't promote NoSnoop transactions from that device to be cache
> coherent.  In that case KVM trusts the guest mapping of memory as
> configured in the MTRR.
> 
> This patch updates kvm_get|put_msrs() so that we retrieve the actual
> vCPU MTRR settings and therefore keep CPUX86State synchronized for
> migration.  kvm_put_msrs() is also used on vCPU reset and therefore
> allows future modificaitons of MTRR state at reset to be realized.
> 
> Note that the entries array used by both functions was already
> slightly undersized for holding every possible MSR, so this patch
> increases it beyond the 28 new entries necessary for MTRR state.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: qemu-stable@nongnu.org
> ---
> 
>  target-i386/cpu.h |    2 +
>  target-i386/kvm.c |  101 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 101 insertions(+), 2 deletions(-)
> 
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index d37d857..3460b12 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -337,6 +337,8 @@
>  #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
>  #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
>  
> +#define MSR_MTRRphysIndex(addr)         ((((addr) & ~1u) - 0x200) / 2)
> +
>  #define MSR_MTRRfix64K_00000            0x250
>  #define MSR_MTRRfix16K_80000            0x258
>  #define MSR_MTRRfix16K_A0000            0x259
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index 097fe11..3c46d4a 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -79,6 +79,7 @@ static int lm_capable_kernel;
>  static bool has_msr_hv_hypercall;
>  static bool has_msr_hv_vapic;
>  static bool has_msr_hv_tsc;
> +static bool has_msr_mtrr;
>  
>  static bool has_msr_architectural_pmu;
>  static uint32_t num_architectural_pmu_counters;
> @@ -739,6 +740,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
>          env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
>      }
>  
> +    if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
> +        has_msr_mtrr = true;
> +    }
> +
>      return 0;
>  }
>  
> @@ -1183,7 +1188,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>      CPUX86State *env = &cpu->env;
>      struct {
>          struct kvm_msrs info;
> -        struct kvm_msr_entry entries[100];
> +        struct kvm_msr_entry entries[150];
>      } msr_data;
>      struct kvm_msr_entry *msrs = msr_data.entries;
>      int n = 0, i;
> @@ -1278,6 +1283,37 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
>              kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
>                                env->msr_hv_tsc);
>          }
> +        if (has_msr_mtrr) {
> +            kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
> +            kvm_msr_entry_set(&msrs[n++],
> +                              MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
> +            for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
> +                kvm_msr_entry_set(&msrs[n++],
> +                                  MSR_MTRRphysBase(i), env->mtrr_var[i].base);
> +                kvm_msr_entry_set(&msrs[n++],
> +                                  MSR_MTRRphysMask(i), env->mtrr_var[i].mask);
> +            }
> +        }
>  
>          /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
>           *       kvm_put_msr_feature_control. */
> @@ -1484,7 +1520,7 @@ static int kvm_get_msrs(X86CPU *cpu)
>      CPUX86State *env = &cpu->env;
>      struct {
>          struct kvm_msrs info;
> -        struct kvm_msr_entry entries[100];
> +        struct kvm_msr_entry entries[150];
>      } msr_data;
>      struct kvm_msr_entry *msrs = msr_data.entries;
>      int ret, i, n;
> @@ -1572,6 +1608,24 @@ static int kvm_get_msrs(X86CPU *cpu)
>      if (has_msr_hv_tsc) {
>          msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
>      }
> +    if (has_msr_mtrr) {
> +        msrs[n++].index = MSR_MTRRdefType;
> +        msrs[n++].index = MSR_MTRRfix64K_00000;
> +        msrs[n++].index = MSR_MTRRfix16K_80000;
> +        msrs[n++].index = MSR_MTRRfix16K_A0000;
> +        msrs[n++].index = MSR_MTRRfix4K_C0000;
> +        msrs[n++].index = MSR_MTRRfix4K_C8000;
> +        msrs[n++].index = MSR_MTRRfix4K_D0000;
> +        msrs[n++].index = MSR_MTRRfix4K_D8000;
> +        msrs[n++].index = MSR_MTRRfix4K_E0000;
> +        msrs[n++].index = MSR_MTRRfix4K_E8000;
> +        msrs[n++].index = MSR_MTRRfix4K_F0000;
> +        msrs[n++].index = MSR_MTRRfix4K_F8000;
> +        for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
> +            msrs[n++].index = MSR_MTRRphysBase(i);
> +            msrs[n++].index = MSR_MTRRphysMask(i);
> +        }
> +    }
>  
>      msr_data.info.nmsrs = n;
>      ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
> @@ -1692,6 +1746,49 @@ static int kvm_get_msrs(X86CPU *cpu)
>          case HV_X64_MSR_REFERENCE_TSC:
>              env->msr_hv_tsc = msrs[i].data;
>              break;
> +        case MSR_MTRRdefType:
> +            env->mtrr_deftype = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix64K_00000:
> +            env->mtrr_fixed[0] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix16K_80000:
> +            env->mtrr_fixed[1] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix16K_A0000:
> +            env->mtrr_fixed[2] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_C0000:
> +            env->mtrr_fixed[3] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_C8000:
> +            env->mtrr_fixed[4] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_D0000:
> +            env->mtrr_fixed[5] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_D8000:
> +            env->mtrr_fixed[6] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_E0000:
> +            env->mtrr_fixed[7] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_E8000:
> +            env->mtrr_fixed[8] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_F0000:
> +            env->mtrr_fixed[9] = msrs[i].data;
> +            break;
> +        case MSR_MTRRfix4K_F8000:
> +            env->mtrr_fixed[10] = msrs[i].data;
> +            break;
> +        case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT):

... the off-by-one in this case range. Everything is cool and the range
conforms to
<https://gcc.gnu.org/onlinedocs/gcc-4.9.1/gcc/Case-Ranges.html> (ie. the
range is inclusive), but the *argument* of the MSR_MTRRphysMask() macro
is off-by-one. You should say

    case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):

Peek up to the for loops: the greatest argument you ever pass to
MSR_MTRRphysMask() is (MSR_MTRRcap_VCNT - 1).

Of course this causes no visible bug, because we don't use those
register indices at all (and if we *did* use them, then we'd add new
case labels for them, and then gcc would be required by the standard to
complain about duplicated case labels [*]).

Still, we should be precise.

(
[*]

  6.8.4.2 The switch statement
  Constraints
  [...]
  3 The expression of each case label shall be an integer constant
    expression and no two of the case constant expressions in the same
    switch statement shall have the same value after conversion. [...]

  5.1.1.3 Diagnostics
  1 A conforming implementation shall produce at least one diagnostic
    message (identified in an implementation-defined manner) if a
    preprocessing translation unit or translation unit contains a
    violation of any syntax rule or constraint, [...]
)

> +            if (index & 1) {
> +                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
> +            } else {
> +                env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
> +            }
> +            break;
>          }
>      }
>  
> 

Thanks,
Laszlo
Laszlo Ersek Aug. 14, 2014, 9:27 p.m. UTC | #2
On 08/14/14 21:24, Alex Williamson wrote:
> The MTRR state in KVM currently runs completely independent of the
> QEMU state in CPUX86State.mtrr_*.  This means that on migration, the
> target loses MTRR state from the source.  Generally that's ok though
> because KVM ignores it and maps everything as write-back anyway.  The
> exception to this rule is when we have an assigned device and an IOMMU
> that doesn't promote NoSnoop transactions from that device to be cache
> coherent.  In that case KVM trusts the guest mapping of memory as
> configured in the MTRR.
> 
> This patch updates kvm_get|put_msrs() so that we retrieve the actual
> vCPU MTRR settings and therefore keep CPUX86State synchronized for
> migration.  kvm_put_msrs() is also used on vCPU reset and therefore
> allows future modificaitons of MTRR state at reset to be realized.
> 
> Note that the entries array used by both functions was already
> slightly undersized for holding every possible MSR, so this patch
> increases it beyond the 28 new entries necessary for MTRR state.
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Cc: Laszlo Ersek <lersek@redhat.com>
> Cc: qemu-stable@nongnu.org
> ---
> 
>  target-i386/cpu.h |    2 +
>  target-i386/kvm.c |  101 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 101 insertions(+), 2 deletions(-)

Another (positive) remark I wanted to add: if we migrate from an
MTRR-capable KVM host that lacks these patches, to an MTRR-capable KVM
host that has these patches, then the migration stream will simply
contain zeros (because the patch-less source never fetched those from
the source-side KVM), so when we send those zeros to the target KVM, we
won't regress (because those zeroes should match the "initial KVM MTRR
state" that the target comes up in anyway).

If we migrate from patchful to patchless (ie. reverse direction), then
we lose MTRR state, which is the current status quo; not bad.

Thanks
Laszlo
Alex Williamson Aug. 14, 2014, 9:32 p.m. UTC | #3
On Thu, 2014-08-14 at 23:20 +0200, Laszlo Ersek wrote:
> You're going to use my name in contexts that I won't wish to be privy
> to. :) I like everything about this patch except:
> 
> > +        case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT):
> 
> ... the off-by-one in this case range. Everything is cool and the range
> conforms to
> <https://gcc.gnu.org/onlinedocs/gcc-4.9.1/gcc/Case-Ranges.html> (ie. the
> range is inclusive), but the *argument* of the MSR_MTRRphysMask() macro
> is off-by-one. You should say
> 
>     case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1):
> 
> Peek up to the for loops: the greatest argument you ever pass to
> MSR_MTRRphysMask() is (MSR_MTRRcap_VCNT - 1).
> 
> Of course this causes no visible bug, because we don't use those
> register indices at all (and if we *did* use them, then we'd add new
> case labels for them, and then gcc would be required by the standard to
> complain about duplicated case labels [*]).

Nope, legitimate bug.  v3 on the way...
diff mbox

Patch

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index d37d857..3460b12 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -337,6 +337,8 @@ 
 #define MSR_MTRRphysBase(reg)           (0x200 + 2 * (reg))
 #define MSR_MTRRphysMask(reg)           (0x200 + 2 * (reg) + 1)
 
+#define MSR_MTRRphysIndex(addr)         ((((addr) & ~1u) - 0x200) / 2)
+
 #define MSR_MTRRfix64K_00000            0x250
 #define MSR_MTRRfix16K_80000            0x258
 #define MSR_MTRRfix16K_A0000            0x259
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 097fe11..3c46d4a 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -79,6 +79,7 @@  static int lm_capable_kernel;
 static bool has_msr_hv_hypercall;
 static bool has_msr_hv_vapic;
 static bool has_msr_hv_tsc;
+static bool has_msr_mtrr;
 
 static bool has_msr_architectural_pmu;
 static uint32_t num_architectural_pmu_counters;
@@ -739,6 +740,10 @@  int kvm_arch_init_vcpu(CPUState *cs)
         env->kvm_xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave));
     }
 
+    if (env->features[FEAT_1_EDX] & CPUID_MTRR) {
+        has_msr_mtrr = true;
+    }
+
     return 0;
 }
 
@@ -1183,7 +1188,7 @@  static int kvm_put_msrs(X86CPU *cpu, int level)
     CPUX86State *env = &cpu->env;
     struct {
         struct kvm_msrs info;
-        struct kvm_msr_entry entries[100];
+        struct kvm_msr_entry entries[150];
     } msr_data;
     struct kvm_msr_entry *msrs = msr_data.entries;
     int n = 0, i;
@@ -1278,6 +1283,37 @@  static int kvm_put_msrs(X86CPU *cpu, int level)
             kvm_msr_entry_set(&msrs[n++], HV_X64_MSR_REFERENCE_TSC,
                               env->msr_hv_tsc);
         }
+        if (has_msr_mtrr) {
+            kvm_msr_entry_set(&msrs[n++], MSR_MTRRdefType, env->mtrr_deftype);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix64K_00000, env->mtrr_fixed[0]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix16K_80000, env->mtrr_fixed[1]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]);
+            kvm_msr_entry_set(&msrs[n++],
+                              MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]);
+            for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+                kvm_msr_entry_set(&msrs[n++],
+                                  MSR_MTRRphysBase(i), env->mtrr_var[i].base);
+                kvm_msr_entry_set(&msrs[n++],
+                                  MSR_MTRRphysMask(i), env->mtrr_var[i].mask);
+            }
+        }
 
         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
          *       kvm_put_msr_feature_control. */
@@ -1484,7 +1520,7 @@  static int kvm_get_msrs(X86CPU *cpu)
     CPUX86State *env = &cpu->env;
     struct {
         struct kvm_msrs info;
-        struct kvm_msr_entry entries[100];
+        struct kvm_msr_entry entries[150];
     } msr_data;
     struct kvm_msr_entry *msrs = msr_data.entries;
     int ret, i, n;
@@ -1572,6 +1608,24 @@  static int kvm_get_msrs(X86CPU *cpu)
     if (has_msr_hv_tsc) {
         msrs[n++].index = HV_X64_MSR_REFERENCE_TSC;
     }
+    if (has_msr_mtrr) {
+        msrs[n++].index = MSR_MTRRdefType;
+        msrs[n++].index = MSR_MTRRfix64K_00000;
+        msrs[n++].index = MSR_MTRRfix16K_80000;
+        msrs[n++].index = MSR_MTRRfix16K_A0000;
+        msrs[n++].index = MSR_MTRRfix4K_C0000;
+        msrs[n++].index = MSR_MTRRfix4K_C8000;
+        msrs[n++].index = MSR_MTRRfix4K_D0000;
+        msrs[n++].index = MSR_MTRRfix4K_D8000;
+        msrs[n++].index = MSR_MTRRfix4K_E0000;
+        msrs[n++].index = MSR_MTRRfix4K_E8000;
+        msrs[n++].index = MSR_MTRRfix4K_F0000;
+        msrs[n++].index = MSR_MTRRfix4K_F8000;
+        for (i = 0; i < MSR_MTRRcap_VCNT; i++) {
+            msrs[n++].index = MSR_MTRRphysBase(i);
+            msrs[n++].index = MSR_MTRRphysMask(i);
+        }
+    }
 
     msr_data.info.nmsrs = n;
     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data);
@@ -1692,6 +1746,49 @@  static int kvm_get_msrs(X86CPU *cpu)
         case HV_X64_MSR_REFERENCE_TSC:
             env->msr_hv_tsc = msrs[i].data;
             break;
+        case MSR_MTRRdefType:
+            env->mtrr_deftype = msrs[i].data;
+            break;
+        case MSR_MTRRfix64K_00000:
+            env->mtrr_fixed[0] = msrs[i].data;
+            break;
+        case MSR_MTRRfix16K_80000:
+            env->mtrr_fixed[1] = msrs[i].data;
+            break;
+        case MSR_MTRRfix16K_A0000:
+            env->mtrr_fixed[2] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_C0000:
+            env->mtrr_fixed[3] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_C8000:
+            env->mtrr_fixed[4] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_D0000:
+            env->mtrr_fixed[5] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_D8000:
+            env->mtrr_fixed[6] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_E0000:
+            env->mtrr_fixed[7] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_E8000:
+            env->mtrr_fixed[8] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_F0000:
+            env->mtrr_fixed[9] = msrs[i].data;
+            break;
+        case MSR_MTRRfix4K_F8000:
+            env->mtrr_fixed[10] = msrs[i].data;
+            break;
+        case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT):
+            if (index & 1) {
+                env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data;
+            } else {
+                env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data;
+            }
+            break;
         }
     }