Patchwork resent: x86/cpuid: propagate further CPUID leafs when -cpu host

login
register
mail settings
Submitter Andre Przywara
Date May 21, 2010, 7:50 a.m.
Message ID <1274428240-10801-1-git-send-email-andre.przywara@amd.com>
Download mbox | patch
Permalink /patch/53159/
State New
Headers show

Comments

Andre Przywara - May 21, 2010, 7:50 a.m.
-cpu host currently only propagates the CPU's family/model/stepping,
the brand name and the feature bits.
Add a whitelist of safe CPUID leafs to let the guest see the actual
CPU's cache details and other things.

Signed-off-by: Andre Przywara <andre.przywara@amd.com>
---
 target-i386/cpu.h   |    6 +++++-
 target-i386/cpuid.c |   45 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 42 insertions(+), 9 deletions(-)

Again this patch was not yet applied, without any discussion I can remember.
Please apply.

Thanks,
Andre.
Anthony Liguori - May 24, 2010, 10:10 p.m.
On 05/21/2010 02:50 AM, Andre Przywara wrote:
> -cpu host currently only propagates the CPU's family/model/stepping,
> the brand name and the feature bits.
> Add a whitelist of safe CPUID leafs to let the guest see the actual
> CPU's cache details and other things.
>
> Signed-off-by: Andre Przywara<andre.przywara@amd.com>
>    

The problem I can see is that this greatly increases the chances of 
problems with live migration since we don't migrate the cpuid state.

What's the benefit of exposing this information to the guest?

Regards,

Anthony Liguori

> ---
>   target-i386/cpu.h   |    6 +++++-
>   target-i386/cpuid.c |   45 +++++++++++++++++++++++++++++++++++++--------
>   2 files changed, 42 insertions(+), 9 deletions(-)
>
> Again this patch was not yet applied, without any discussion I can remember.
> Please apply.
>
> Thanks,
> Andre.
>
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 548ab80..77cbbdb 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -581,6 +581,10 @@ typedef struct {
>
>   #define NB_MMU_MODES 2
>
> +#define CPUID_FLAGS_BUILTIN          (1<<  0)
> +#define CPUID_FLAGS_VENDOR_OVERRIDE  (1<<  1)
> +#define CPUID_FLAGS_HOST             (1<<  2)
> +
>   typedef struct CPUX86State {
>       /* standard registers */
>       target_ulong regs[CPU_NB_REGS];
> @@ -685,7 +689,7 @@ typedef struct CPUX86State {
>       uint32_t cpuid_ext2_features;
>       uint32_t cpuid_ext3_features;
>       uint32_t cpuid_apic_id;
> -    int cpuid_vendor_override;
> +    uint32_t cpuid_flags;
>
>       /* MTRRs */
>       uint64_t mtrr_fixed[11];
> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
> index 99d1f44..a80baa4 100644
> --- a/target-i386/cpuid.c
> +++ b/target-i386/cpuid.c
> @@ -213,7 +213,6 @@ typedef struct x86_def_t {
>       uint32_t features, ext_features, ext2_features, ext3_features, kvm_features;
>       uint32_t xlevel;
>       char model_id[48];
> -    int vendor_override;
>       uint32_t flags;
>   } x86_def_t;
>
> @@ -489,7 +488,7 @@ static int cpu_x86_fill_host(x86_def_t *x86_cpu_def)
>       x86_cpu_def->ext2_features = edx;
>       x86_cpu_def->ext3_features = ecx;
>       cpu_x86_fill_model_id(x86_cpu_def->model_id);
> -    x86_cpu_def->vendor_override = 0;
> +    x86_cpu_def->flags = CPUID_FLAGS_HOST;
>
>       return 0;
>   }
> @@ -633,7 +632,7 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
>                       x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4])<<  (8 * i);
>                       x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8])<<  (8 * i);
>                   }
> -                x86_cpu_def->vendor_override = 1;
> +                x86_cpu_def->flags |= CPUID_FLAGS_VENDOR_OVERRIDE;
>               } else if (!strcmp(featurestr, "model_id")) {
>                   pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
>                           val);
> @@ -731,7 +730,8 @@ void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
>           return;
>       }
>       for (def = x86_defs; def; def = def->next) {
> -        snprintf(buf, sizeof (buf), def->flags ? "[%s]": "%s", def->name);
> +        snprintf(buf, sizeof (buf),
> +                 def->flags&  CPUID_FLAGS_BUILTIN ? "[%s]": "%s", def->name);
>           if (model || dump) {
>               (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
>           } else {
> @@ -785,7 +785,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
>           env->cpuid_vendor2 = CPUID_VENDOR_INTEL_2;
>           env->cpuid_vendor3 = CPUID_VENDOR_INTEL_3;
>       }
> -    env->cpuid_vendor_override = def->vendor_override;
> +    env->cpuid_flags = def->flags;
>       env->cpuid_level = def->level;
>       if (def->family>  0x0f)
>           env->cpuid_version = 0xf00 | ((def->family - 0x0f)<<  20);
> @@ -941,7 +941,7 @@ void x86_cpudef_setup(void)
>
>       for (i = 0; i<  ARRAY_SIZE(builtin_x86_defs); ++i) {
>           builtin_x86_defs[i].next = x86_defs;
> -        builtin_x86_defs[i].flags = 1;
> +        builtin_x86_defs[i].flags |= CPUID_FLAGS_BUILTIN;
>           x86_defs =&builtin_x86_defs[i];
>       }
>   #if !defined(CONFIG_USER_ONLY)
> @@ -962,22 +962,51 @@ static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
>        * this if you want to use KVM's sysenter/syscall emulation
>        * in compatibility mode and when doing cross vendor migration
>        */
> -    if (kvm_enabled()&&  ! env->cpuid_vendor_override) {
> +    if (kvm_enabled()&&
> +        (env->cpuid_flags&  CPUID_FLAGS_VENDOR_OVERRIDE) == 0) {
>           host_cpuid(0, 0, NULL, ebx, ecx, edx);
>       }
>   }
>
> +/* safe CPUID leafs to propagate to guest if -cpu host is specified
> + * Intel defined leafs:
> + * Cache descriptors (0x02)
> + * Deterministic cache parameters (0x04)
> + * Monitor/MWAIT parameters (0x05)
> + */
> +#define CPUID_LEAF_PROPAGATE ((1<<  0x02) | (1<<  0x04) | (1<<  0x05))
> +
> +/* AMD defined leafs:
> + * L1 Cache and TLB (0x05)
> + * L2+L3 TLB (0x06)
> + * LongMode address size (0x08)
> + * 1GB page TLB (0x19)
> + * Performance optimization (0x1A)
> + */
> +#define CPUID_LEAF_PROPAGATE_EXTENDED ((1<<  0x05) | (1<<  0x06) |\
> +                                       (1<<  0x08) | (1<<  0x19) | (1<<  0x1A))
> +
>   void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
>                      uint32_t *eax, uint32_t *ebx,
>                      uint32_t *ecx, uint32_t *edx)
>   {
> -    /* test if maximum index reached */
>       if (index&  0x80000000) {
> +    /* test if maximum index reached */
>           if (index>  env->cpuid_xlevel)
>               index = env->cpuid_level;
> +        if ((env->cpuid_flags&  CPUID_FLAGS_HOST)&&
> +            ((1<<  (index - 0x80000000))&  CPUID_LEAF_PROPAGATE_EXTENDED)) {
> +            host_cpuid(index, count, eax, ebx, ecx, edx);
> +            return;
> +        }
>       } else {
>           if (index>  env->cpuid_level)
>               index = env->cpuid_level;
> +        if ((env->cpuid_flags&  CPUID_FLAGS_HOST)&&
> +            ((1<<  index)&  CPUID_LEAF_PROPAGATE)) {
> +            host_cpuid(index, count, eax, ebx, ecx, edx);
> +            return;
> +        }
>       }
>
>       switch(index) {
>
Avi Kivity - May 25, 2010, 7:27 a.m.
On 05/25/2010 01:10 AM, Anthony Liguori wrote:
> On 05/21/2010 02:50 AM, Andre Przywara wrote:
>> -cpu host currently only propagates the CPU's family/model/stepping,
>> the brand name and the feature bits.
>> Add a whitelist of safe CPUID leafs to let the guest see the actual
>> CPU's cache details and other things.
>>
>> Signed-off-by: Andre Przywara<andre.przywara@amd.com>
>
> The problem I can see is that this greatly increases the chances of 
> problems with live migration since we don't migrate the cpuid state.

-cpu host is already problematic for live migration.

Are you talking about the state maintained by the cpuid instruction?  
Yes, we need to migrate those bits.

>
> What's the benefit of exposing this information to the guest?
>

Some algorithms adjust themselves based on the cache size.  If you have 
several passes over a large data set, it's often better to run each set 
of passes on a subset of the dataset that fits in cache, then stitch the 
subsets together.
Andre Przywara - May 25, 2010, 1:21 p.m.
Anthony Liguori wrote:
> On 05/21/2010 02:50 AM, Andre Przywara wrote:
>> -cpu host currently only propagates the CPU's family/model/stepping,
>> the brand name and the feature bits.
>> Add a whitelist of safe CPUID leafs to let the guest see the actual
>> CPU's cache details and other things.
>>
>> Signed-off-by: Andre Przywara<andre.przywara@amd.com>
>>    
> 
> The problem I can see is that this greatly increases the chances of 
> problems with live migration since we don't migrate the cpuid state.
I think that should be fixed. Although -cpu host is not a wise choice 
for migration, even without these additional leaves the feature bits 
probably don't match between source and target.

> What's the benefit of exposing this information to the guest?

That is mostly to propagate the cache size and organization parameters 
to the guest:
 >> +/* safe CPUID leafs to propagate to guest if -cpu host is specified
 >> + * Intel defined leafs:
 >> + * Cache descriptors (0x02)
 >> + * Deterministic cache parameters (0x04)
 >> + * Monitor/MWAIT parameters (0x05)
 >> + *
 >> + * AMD defined leafs:
 >> + * L1 Cache and TLB (0x05)
 >> + * L2+L3 TLB (0x06)
 >> + * LongMode address size (0x08)
 >> + * 1GB page TLB (0x19)
 >> + * Performance optimization (0x1A)
 >> + */
Since at least L1 and L2 caches are mostly private to vCPUs, I see no 
reason to disguise them.

Regards,
Andre.
Anthony Liguori - May 25, 2010, 1:26 p.m.
On 05/25/2010 08:21 AM, Andre Przywara wrote:
>> What's the benefit of exposing this information to the guest?
>
> That is mostly to propagate the cache size and organization parameters 
> to the guest:
> >> +/* safe CPUID leafs to propagate to guest if -cpu host is specified
> >> + * Intel defined leafs:
> >> + * Cache descriptors (0x02)
> >> + * Deterministic cache parameters (0x04)
> >> + * Monitor/MWAIT parameters (0x05)
> >> + *
> >> + * AMD defined leafs:
> >> + * L1 Cache and TLB (0x05)
> >> + * L2+L3 TLB (0x06)
> >> + * LongMode address size (0x08)
> >> + * 1GB page TLB (0x19)
> >> + * Performance optimization (0x1A)
> >> + */
> Since at least L1 and L2 caches are mostly private to vCPUs, I see no 
> reason to disguise them.

But in practice, what is it useful for?  Just because we can expose it 
doesn't mean we should.

Regards,

Anthony Liguori

> Regards,
> Andre.
>
Avi Kivity - May 25, 2010, 1:47 p.m.
On 05/25/2010 04:26 PM, Anthony Liguori wrote:
> On 05/25/2010 08:21 AM, Andre Przywara wrote:
>>> What's the benefit of exposing this information to the guest?
>>
>> That is mostly to propagate the cache size and organization 
>> parameters to the guest:
>> >> +/* safe CPUID leafs to propagate to guest if -cpu host is specified
>> >> + * Intel defined leafs:
>> >> + * Cache descriptors (0x02)
>> >> + * Deterministic cache parameters (0x04)
>> >> + * Monitor/MWAIT parameters (0x05)
>> >> + *
>> >> + * AMD defined leafs:
>> >> + * L1 Cache and TLB (0x05)
>> >> + * L2+L3 TLB (0x06)
>> >> + * LongMode address size (0x08)
>> >> + * 1GB page TLB (0x19)
>> >> + * Performance optimization (0x1A)
>> >> + */
>> Since at least L1 and L2 caches are mostly private to vCPUs, I see no 
>> reason to disguise them.
>
> But in practice, what is it useful for? 

See my other mail.

> Just because we can expose it doesn't mean we should.

What's the point of -cpu host then?
Andre Przywara - May 26, 2010, 10:52 a.m.
Anthony Liguori wrote:
> On 05/25/2010 08:21 AM, Andre Przywara wrote:
>>> What's the benefit of exposing this information to the guest?
>> That is mostly to propagate the cache size and organization parameters 
>> to the guest:
>>>> +/* safe CPUID leafs to propagate to guest if -cpu host is specified
>>>> + * Intel defined leafs:
>>>> + * Cache descriptors (0x02)
>>>> + * Deterministic cache parameters (0x04)
>>>> + * Monitor/MWAIT parameters (0x05)
>>>> + *
>>>> + * AMD defined leafs:
>>>> + * L1 Cache and TLB (0x05)
>>>> + * L2+L3 TLB (0x06)
>>>> + * LongMode address size (0x08)
>>>> + * 1GB page TLB (0x19)
>>>> + * Performance optimization (0x1A)
>>>> + */
>> Since at least L1 and L2 caches are mostly private to vCPUs, I see no 
>> reason to disguise them.
> 
> But in practice, what is it useful for?  Just because we can expose it 
> doesn't mean we should.
Beside the obvious high performance libraries (like the AMD ACML) also 
JVMs (and probably other managed code runtimes) use the cache 
information for optimization. Given the fact that we have a fairly large 
range of actual L2 cache sizes (from 256KB to 6MB on Intel) the fixed 
cache size that QEMU reports (1MB) is quite a bit off most of the times.
-cpu host is targeted to give the best performance to the user, with the 
drawback of missing or very limited migration experience. So we should 
expose as many features as possible.

Regards,
Andre.

Patch

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 548ab80..77cbbdb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -581,6 +581,10 @@  typedef struct {
 
 #define NB_MMU_MODES 2
 
+#define CPUID_FLAGS_BUILTIN          (1 << 0)
+#define CPUID_FLAGS_VENDOR_OVERRIDE  (1 << 1)
+#define CPUID_FLAGS_HOST             (1 << 2) 
+
 typedef struct CPUX86State {
     /* standard registers */
     target_ulong regs[CPU_NB_REGS];
@@ -685,7 +689,7 @@  typedef struct CPUX86State {
     uint32_t cpuid_ext2_features;
     uint32_t cpuid_ext3_features;
     uint32_t cpuid_apic_id;
-    int cpuid_vendor_override;
+    uint32_t cpuid_flags;
 
     /* MTRRs */
     uint64_t mtrr_fixed[11];
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 99d1f44..a80baa4 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -213,7 +213,6 @@  typedef struct x86_def_t {
     uint32_t features, ext_features, ext2_features, ext3_features, kvm_features;
     uint32_t xlevel;
     char model_id[48];
-    int vendor_override;
     uint32_t flags;
 } x86_def_t;
 
@@ -489,7 +488,7 @@  static int cpu_x86_fill_host(x86_def_t *x86_cpu_def)
     x86_cpu_def->ext2_features = edx;
     x86_cpu_def->ext3_features = ecx;
     cpu_x86_fill_model_id(x86_cpu_def->model_id);
-    x86_cpu_def->vendor_override = 0;
+    x86_cpu_def->flags = CPUID_FLAGS_HOST;
 
     return 0;
 }
@@ -633,7 +632,7 @@  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
                     x86_cpu_def->vendor2 |= ((uint8_t)val[i + 4]) << (8 * i);
                     x86_cpu_def->vendor3 |= ((uint8_t)val[i + 8]) << (8 * i);
                 }
-                x86_cpu_def->vendor_override = 1;
+                x86_cpu_def->flags |= CPUID_FLAGS_VENDOR_OVERRIDE;
             } else if (!strcmp(featurestr, "model_id")) {
                 pstrcpy(x86_cpu_def->model_id, sizeof(x86_cpu_def->model_id),
                         val);
@@ -731,7 +730,8 @@  void x86_cpu_list (FILE *f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
         return;
     }
     for (def = x86_defs; def; def = def->next) {
-        snprintf(buf, sizeof (buf), def->flags ? "[%s]": "%s", def->name);
+        snprintf(buf, sizeof (buf),
+                 def->flags & CPUID_FLAGS_BUILTIN ? "[%s]": "%s", def->name);
         if (model || dump) {
             (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
         } else {
@@ -785,7 +785,7 @@  int cpu_x86_register (CPUX86State *env, const char *cpu_model)
         env->cpuid_vendor2 = CPUID_VENDOR_INTEL_2;
         env->cpuid_vendor3 = CPUID_VENDOR_INTEL_3;
     }
-    env->cpuid_vendor_override = def->vendor_override;
+    env->cpuid_flags = def->flags;
     env->cpuid_level = def->level;
     if (def->family > 0x0f)
         env->cpuid_version = 0xf00 | ((def->family - 0x0f) << 20);
@@ -941,7 +941,7 @@  void x86_cpudef_setup(void)
 
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) {
         builtin_x86_defs[i].next = x86_defs;
-        builtin_x86_defs[i].flags = 1;
+        builtin_x86_defs[i].flags |= CPUID_FLAGS_BUILTIN;
         x86_defs = &builtin_x86_defs[i];
     }
 #if !defined(CONFIG_USER_ONLY)
@@ -962,22 +962,51 @@  static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
      * this if you want to use KVM's sysenter/syscall emulation
      * in compatibility mode and when doing cross vendor migration
      */
-    if (kvm_enabled() && ! env->cpuid_vendor_override) {
+    if (kvm_enabled() &&
+        (env->cpuid_flags & CPUID_FLAGS_VENDOR_OVERRIDE) == 0) {
         host_cpuid(0, 0, NULL, ebx, ecx, edx);
     }
 }
 
+/* safe CPUID leafs to propagate to guest if -cpu host is specified
+ * Intel defined leafs:
+ * Cache descriptors (0x02)
+ * Deterministic cache parameters (0x04)
+ * Monitor/MWAIT parameters (0x05)
+ */
+#define CPUID_LEAF_PROPAGATE ((1 << 0x02) | (1 << 0x04) | (1 << 0x05))
+
+/* AMD defined leafs:
+ * L1 Cache and TLB (0x05)
+ * L2+L3 TLB (0x06)
+ * LongMode address size (0x08)
+ * 1GB page TLB (0x19)
+ * Performance optimization (0x1A)
+ */
+#define CPUID_LEAF_PROPAGATE_EXTENDED ((1 << 0x05) | (1 << 0x06) |\
+                                       (1 << 0x08) | (1 << 0x19) | (1 << 0x1A))
+
 void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
                    uint32_t *eax, uint32_t *ebx,
                    uint32_t *ecx, uint32_t *edx)
 {
-    /* test if maximum index reached */
     if (index & 0x80000000) {
+    /* test if maximum index reached */
         if (index > env->cpuid_xlevel)
             index = env->cpuid_level;
+        if ((env->cpuid_flags & CPUID_FLAGS_HOST) &&
+            ((1 << (index - 0x80000000)) & CPUID_LEAF_PROPAGATE_EXTENDED)) {
+            host_cpuid(index, count, eax, ebx, ecx, edx);
+            return;
+        }
     } else {
         if (index > env->cpuid_level)
             index = env->cpuid_level;
+        if ((env->cpuid_flags & CPUID_FLAGS_HOST) &&
+            ((1 << index) & CPUID_LEAF_PROPAGATE)) {
+            host_cpuid(index, count, eax, ebx, ecx, edx);
+            return;
+        }
     }
 
     switch(index) {