Patchwork [2/2] Expose tsc deadline timer cpuid to guest

login
register
mail settings
Submitter Liu, Jinsong
Date Dec. 28, 2011, 9:55 p.m.
Message ID <DE8DF0795D48FD4CA783C40EC8292335F1B6@SHSMSX101.ccr.corp.intel.com>
Download mbox | patch
Permalink /patch/133463/
State New
Headers show

Comments

Liu, Jinsong - Dec. 28, 2011, 9:55 p.m.
From 3a78adf8006ec6189bfe2f55f7ae213e75bf3815 Mon Sep 17 00:00:00 2001
From: Liu Jinsong <jinsong.liu@intel.com>
Date: Thu, 29 Dec 2011 05:28:12 +0800
Subject: [PATCH 2/2] Expose tsc deadline timer cpuid to guest

Depend on several factors:
1. Considering live migration, user enable/disable tsc deadline timer;
2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
3. If in the future qemu support tsc deadline timer emulation,
   and guest use qemu apic, add cpuid exposing case then.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
---
 target-i386/cpu.h   |    2 ++
 target-i386/cpuid.c |    7 ++++++-
 target-i386/kvm.c   |   13 +++++++++++++
 3 files changed, 21 insertions(+), 1 deletions(-)
Jan Kiszka - Jan. 4, 2012, 4:48 p.m.
On 2011-12-28 19:55, Liu, Jinsong wrote:
>>From 3a78adf8006ec6189bfe2f55f7ae213e75bf3815 Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Thu, 29 Dec 2011 05:28:12 +0800
> Subject: [PATCH 2/2] Expose tsc deadline timer cpuid to guest
> 
> Depend on several factors:
> 1. Considering live migration, user enable/disable tsc deadline timer;
> 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
> 3. If in the future qemu support tsc deadline timer emulation,
>    and guest use qemu apic, add cpuid exposing case then.

This requires some logic change and then rewording:

- enable TSC deadline timer support by default if in-kernel irqchip is
  used
- disable it on user request via a cpu feature flag
- disable it for older machine types (see below) by default

TSC deadline timer emulation in user space is a different story to be
told once we have a patch for it.

> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> ---
>  target-i386/cpu.h   |    2 ++
>  target-i386/cpuid.c |    7 ++++++-
>  target-i386/kvm.c   |   13 +++++++++++++
>  3 files changed, 21 insertions(+), 1 deletions(-)
> 
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 177d8aa..f2d0ad5 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -399,6 +399,7 @@
>  #define CPUID_EXT_X2APIC   (1 << 21)
>  #define CPUID_EXT_MOVBE    (1 << 22)
>  #define CPUID_EXT_POPCNT   (1 << 23)
> +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
>  #define CPUID_EXT_XSAVE    (1 << 26)
>  #define CPUID_EXT_OSXSAVE  (1 << 27)
>  #define CPUID_EXT_HYPERVISOR  (1 << 31)
> @@ -693,6 +694,7 @@ typedef struct CPUX86State {
>  
>      uint64_t tsc;
>      uint64_t tsc_deadline;
> +    bool tsc_deadline_timer_enabled;
>  
>      uint64_t mcg_status;
>      uint64_t msr_ia32_misc_enable;
> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
> index 0b3af90..fe749e0 100644
> --- a/target-i386/cpuid.c
> +++ b/target-i386/cpuid.c
> @@ -48,7 +48,7 @@ static const char *ext_feature_name[] = {
>      "fma", "cx16", "xtpr", "pdcm",
>      NULL, NULL, "dca", "sse4.1|sse4_1",
>      "sse4.2|sse4_2", "x2apic", "movbe", "popcnt",
> -    NULL, "aes", "xsave", "osxsave",
> +    "tsc_deadline", "aes", "xsave", "osxsave",
>      "avx", NULL, NULL, "hypervisor",
>  };
>  static const char *ext2_feature_name[] = {
> @@ -225,6 +225,7 @@ typedef struct x86_def_t {
>      int model;
>      int stepping;
>      int tsc_khz;
> +    bool tsc_deadline_timer_enabled;
>      uint32_t features, ext_features, ext2_features, ext3_features;
>      uint32_t kvm_features, svm_features;
>      uint32_t xlevel;
> @@ -742,6 +743,9 @@ static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
>      x86_cpu_def->ext3_features &= ~minus_ext3_features;
>      x86_cpu_def->kvm_features &= ~minus_kvm_features;
>      x86_cpu_def->svm_features &= ~minus_svm_features;
> +    /* Defaultly user don't against tsc_deadline_timer */
> +    x86_cpu_def->tsc_deadline_timer_enabled =
> +        !(minus_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER);
>      if (check_cpuid) {
>          if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
>              goto error;
> @@ -885,6 +889,7 @@ int cpu_x86_register (CPUX86State *env, const char *cpu_model)
>      env->cpuid_ext4_features = def->ext4_features;
>      env->cpuid_xlevel2 = def->xlevel2;
>      env->tsc_khz = def->tsc_khz;
> +    env->tsc_deadline_timer_enabled = def->tsc_deadline_timer_enabled;
>      if (!kvm_enabled()) {
>          env->cpuid_features &= TCG_FEATURES;
>          env->cpuid_ext_features &= TCG_EXT_FEATURES;
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index d50de90..79baf0b 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -370,6 +370,19 @@ int kvm_arch_init_vcpu(CPUState *env)
>      i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
>      env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX);
>      env->cpuid_ext_features |= i;
> +    /*
> +     * 1. Considering live migration, user enable/disable tsc deadline timer;
> +     * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
> +     * 3. If in the future qemu support tsc deadline timer emulation,
> +     *    and guest use qemu apic, add cpuid exposing case then.
> +     */

See above. Also, I don't think this comment applies very well to this
function.

> +    env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER;

Can that feature possibly be set in cpuid_ext_features? I thought the
kernel now refrains from this.

> +    if (env->tsc_deadline_timer_enabled) {
> +        if (kvm_irqchip_in_kernel() &&
> +            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
> +            env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
> +        }
> +    }
>  
>      env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001,
>                                                               0, R_EDX);

Sorry, it remains bogus to expose the tsc deadline timer feature on
machines < pc-1.1. That's just like we introduced kvmclock only to
pc-0.14 onward. The reason is that guest OSes so far running on qemu-1.0
or older without deadline timer support must not find that feature when
being migrated to a host with qemu-1.1 in pc-1.0 compat mode. Yes, the
user can explicitly disable it, but that is not the idea of legacy
machine models. They should provide the very same environment that older
qemu versions offered.

Jan
Liu, Jinsong - Jan. 5, 2012, 8:07 p.m.
> This requires some logic change and then rewording:
> 
> - enable TSC deadline timer support by default if in-kernel irqchip is
>   used
> - disable it on user request via a cpu feature flag

Yes, the logic has been implemented by the former patch as:
+    if (env->tsc_deadline_timer_enabled) {	// user control, default is to authorize tsc deadline timer feature
+        if (kvm_irqchip_in_kernel() &&		// in-kerenl irqchip is used
+            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
+            env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
+        }
+    }

> - disable it for older machine types (see below) by default
> 
> TSC deadline timer emulation in user space is a different story to be
> told once we have a patch for it.
> 
>> 
>> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> ---
>>  target-i386/cpu.h   |    2 ++
>>  target-i386/cpuid.c |    7 ++++++-
>>  target-i386/kvm.c   |   13 +++++++++++++
>>  3 files changed, 21 insertions(+), 1 deletions(-)
>> 
>> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
>> index 177d8aa..f2d0ad5 100644
>> --- a/target-i386/cpu.h
>> +++ b/target-i386/cpu.h
>> @@ -399,6 +399,7 @@
>>  #define CPUID_EXT_X2APIC   (1 << 21)
>>  #define CPUID_EXT_MOVBE    (1 << 22)
>>  #define CPUID_EXT_POPCNT   (1 << 23)
>> +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
>>  #define CPUID_EXT_XSAVE    (1 << 26)
>>  #define CPUID_EXT_OSXSAVE  (1 << 27)
>>  #define CPUID_EXT_HYPERVISOR  (1 << 31)
>> @@ -693,6 +694,7 @@ typedef struct CPUX86State {
>> 
>>      uint64_t tsc;
>>      uint64_t tsc_deadline;
>> +    bool tsc_deadline_timer_enabled;
>> 
>>      uint64_t mcg_status;
>>      uint64_t msr_ia32_misc_enable;
>> diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
>> index 0b3af90..fe749e0 100644
>> --- a/target-i386/cpuid.c
>> +++ b/target-i386/cpuid.c
>> @@ -48,7 +48,7 @@ static const char *ext_feature_name[] = {
>>      "fma", "cx16", "xtpr", "pdcm",
>>      NULL, NULL, "dca", "sse4.1|sse4_1",
>>      "sse4.2|sse4_2", "x2apic", "movbe", "popcnt",
>> -    NULL, "aes", "xsave", "osxsave",
>> +    "tsc_deadline", "aes", "xsave", "osxsave",
>>      "avx", NULL, NULL, "hypervisor",
>>  };
>>  static const char *ext2_feature_name[] = {
>> @@ -225,6 +225,7 @@ typedef struct x86_def_t {
>>      int model;
>>      int stepping;
>>      int tsc_khz;
>> +    bool tsc_deadline_timer_enabled;
>>      uint32_t features, ext_features, ext2_features, ext3_features;
>>      uint32_t kvm_features, svm_features;
>>      uint32_t xlevel;
>> @@ -742,6 +743,9 @@ static int cpu_x86_find_by_name(x86_def_t
>>      *x86_cpu_def, const char *cpu_model) x86_cpu_def->ext3_features
>>      &= ~minus_ext3_features; x86_cpu_def->kvm_features &=
>>      ~minus_kvm_features; x86_cpu_def->svm_features &=
>> ~minus_svm_features; +    /* Defaultly user don't against
>> tsc_deadline_timer */ +    x86_cpu_def->tsc_deadline_timer_enabled =
>> +        !(minus_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER);     
>>          if (check_cpuid) { if
>> (check_features_against_host(x86_cpu_def) && enforce_cpuid)         
>>      goto error; @@ -885,6 +889,7 @@ int cpu_x86_register
>>      (CPUX86State *env, const char *cpu_model)
>>      env->cpuid_ext4_features = def->ext4_features;
>> env->cpuid_xlevel2 = def->xlevel2; env->tsc_khz = def->tsc_khz; +   
>>          env->tsc_deadline_timer_enabled =
>>          def->tsc_deadline_timer_enabled;      if (!kvm_enabled()) {
>> env->cpuid_features &= TCG_FEATURES; env->cpuid_ext_features &=
>> TCG_EXT_FEATURES;  
>> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
>> index d50de90..79baf0b 100644
>> --- a/target-i386/kvm.c
>> +++ b/target-i386/kvm.c
>> @@ -370,6 +370,19 @@ int kvm_arch_init_vcpu(CPUState *env)
>>      i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
>>      env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1,
>>      0, R_ECX); env->cpuid_ext_features |= i;
>> +    /*
>> +     * 1. Considering live migration, user enable/disable tsc
>> deadline timer; +     * 2. If guest use kvm apic and kvm emulate tsc
>> deadline timer, expose it; +     * 3. If in the future qemu support
>> tsc deadline timer emulation, +     *    and guest use qemu apic,
>> add cpuid exposing case then. +     */
> 
> See above. Also, I don't think this comment applies very well to this
> function.

Yes, the comment is indeed ambiguous. Would elaborate more clear.

> 
>> +    env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER;
> 
> Can that feature possibly be set in cpuid_ext_features? I thought the
> kernel now refrains from this.

Yes, it's possible. Kernel didn't refrain it, just let qemu to make decision.

> 
>> +    if (env->tsc_deadline_timer_enabled) {
>> +        if (kvm_irqchip_in_kernel() &&
>> +            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
>> +            env->cpuid_ext_features |=
>> CPUID_EXT_TSC_DEADLINE_TIMER; +        } +    }
>> 
>>      env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s,
>>                                                              
>> 0x80000001, 0, R_EDX); 
> 
> Sorry, it remains bogus to expose the tsc deadline timer feature on
> machines < pc-1.1. That's just like we introduced kvmclock only to
> pc-0.14 onward. The reason is that guest OSes so far running on
> qemu-1.0 or older without deadline timer support must not find that
> feature when being migrated to a host with qemu-1.1 in pc-1.0 compat
> mode. Yes, the user can explicitly disable it, but that is not the
> idea of legacy machine models. They should provide the very same
> environment that older qemu versions offered.
> 

Not quite clear about this point.
Per my understanding, if a kvm guest running on an older qemu without tsc deadline timer support, 
then after migrate, the guest would still cannot find tsc deadline feature, no matter older or newer host/qemu/pc-xx it migrate to.

Thanks,
Jinsong
Jan Kiszka - Jan. 5, 2012, 8:34 p.m.
On 2012-01-05 18:07, Liu, Jinsong wrote:
>> Sorry, it remains bogus to expose the tsc deadline timer feature on
>> machines < pc-1.1. That's just like we introduced kvmclock only to
>> pc-0.14 onward. The reason is that guest OSes so far running on
>> qemu-1.0 or older without deadline timer support must not find that
>> feature when being migrated to a host with qemu-1.1 in pc-1.0 compat
>> mode. Yes, the user can explicitly disable it, but that is not the
>> idea of legacy machine models. They should provide the very same
>> environment that older qemu versions offered.
>>
> 
> Not quite clear about this point.
> Per my understanding, if a kvm guest running on an older qemu without tsc deadline timer support, 
> then after migrate, the guest would still cannot find tsc deadline feature, no matter older or newer host/qemu/pc-xx it migrate to.

What should prevent this? The feature flags are not part of the vmstate.
They are part of the vm configuration which is not migrated but defined
by starting qemu on the target host.

Jan
Liu, Jinsong - Jan. 7, 2012, 6:23 p.m.
Jan Kiszka wrote:
> On 2012-01-05 18:07, Liu, Jinsong wrote:
>>> Sorry, it remains bogus to expose the tsc deadline timer feature on
>>> machines < pc-1.1. That's just like we introduced kvmclock only to
>>> pc-0.14 onward. The reason is that guest OSes so far running on
>>> qemu-1.0 or older without deadline timer support must not find that
>>> feature when being migrated to a host with qemu-1.1 in pc-1.0 compat
>>> mode. Yes, the user can explicitly disable it, but that is not the
>>> idea of legacy machine models. They should provide the very same
>>> environment that older qemu versions offered.
>>> 
>> 
>> Not quite clear about this point.
>> Per my understanding, if a kvm guest running on an older qemu
>> without tsc deadline timer support, 
>> then after migrate, the guest would still cannot find tsc deadline
>> feature, no matter older or newer host/qemu/pc-xx it migrate to. 
> 
> What should prevent this? The feature flags are not part of the
> vmstate. They are part of the vm configuration which is not migrated
> but defined by starting qemu on the target host.
> 

Thanks! understand this point ("They are part of the vm configuration which is not migrated but defined by starting qemu on the target host").

But kvmclock example still cannot satisfy the purpose "guest running on old qemu/pc-0.13 without kvmclock support must not find kvmclock feature when being migrated to a host with new qemu/pc-0.13 compat mode". After migration, guest can possibily find kvmclock feature CPUID.0x40000001.KVM_FEATURE_CLOCKSOURCE:
pc_init1(..., kvmclock_enabled) 
{
    pc_cpus_init(cpu_model);    // the point to decide and expose cpuid features to guest

    if (kvmclock_enabled) {        // the difference point between pc-0.13 vs. pc-0.14, related nothing to cpuid features.
        kvmclock_create();
    }
}

Seems currently there is no good way to satisfy "guest running on old qemu/pc-xx without feature A support must not find feature A when being migrated to a host with new qemu/pc-xx compat mode", i.e. considering
* if running with '-cpu host' then migrate;
* each time we add a new cpuid feature it need add one or more new machine model? is it necessary to bind pc-xx with cpuid feature?
* logically cpuid features should better be controlled by cpu model, not by machine model.

Regards,
Jinsong
Jan Kiszka - Jan. 8, 2012, 9:24 p.m.
On 2012-01-07 19:23, Liu, Jinsong wrote:
> Jan Kiszka wrote:
>> On 2012-01-05 18:07, Liu, Jinsong wrote:
>>>> Sorry, it remains bogus to expose the tsc deadline timer feature on
>>>> machines < pc-1.1. That's just like we introduced kvmclock only to
>>>> pc-0.14 onward. The reason is that guest OSes so far running on
>>>> qemu-1.0 or older without deadline timer support must not find that
>>>> feature when being migrated to a host with qemu-1.1 in pc-1.0 compat
>>>> mode. Yes, the user can explicitly disable it, but that is not the
>>>> idea of legacy machine models. They should provide the very same
>>>> environment that older qemu versions offered.
>>>>
>>>
>>> Not quite clear about this point.
>>> Per my understanding, if a kvm guest running on an older qemu
>>> without tsc deadline timer support, 
>>> then after migrate, the guest would still cannot find tsc deadline
>>> feature, no matter older or newer host/qemu/pc-xx it migrate to. 
>>
>> What should prevent this? The feature flags are not part of the
>> vmstate. They are part of the vm configuration which is not migrated
>> but defined by starting qemu on the target host.
>>
> 
> Thanks! understand this point ("They are part of the vm configuration which is not migrated but defined by starting qemu on the target host").
> 
> But kvmclock example still cannot satisfy the purpose "guest running on old qemu/pc-0.13 without kvmclock support must not find kvmclock feature when being migrated to a host with new qemu/pc-0.13 compat mode". After migration, guest can possibily find kvmclock feature CPUID.0x40000001.KVM_FEATURE_CLOCKSOURCE:
> pc_init1(..., kvmclock_enabled) 
> {
>     pc_cpus_init(cpu_model);    // the point to decide and expose cpuid features to guest
> 
>     if (kvmclock_enabled) {        // the difference point between pc-0.13 vs. pc-0.14, related nothing to cpuid features.
>         kvmclock_create();
>     }
> }

Right, not a perfect example: the cpuid feature is not influenced by
this mechanism, only the fact if a kvmclock device (for save/restore)
should be created. I guess we ignored this back then, only focusing on
the more obvious issue of the addition device.

> 
> Seems currently there is no good way to satisfy "guest running on old qemu/pc-xx without feature A support must not find feature A when being migrated to a host with new qemu/pc-xx compat mode", i.e. considering
> * if running with '-cpu host' then migrate;
> * each time we add a new cpuid feature it need add one or more new machine model? is it necessary to bind pc-xx with cpuid feature?
> * logically cpuid features should better be controlled by cpu model, not by machine model.

The compatibility machines define the possible cpu models. If I select
pc-0.14, e.g. -cpu kvm64 should not give me features that 0.14 was not
exposing.

Jan
Liu, Jinsong - Feb. 27, 2012, 4:05 p.m.
Jan Kiszka wrote:
> On 2012-01-07 19:23, Liu, Jinsong wrote:
>> Jan Kiszka wrote:
>>> On 2012-01-05 18:07, Liu, Jinsong wrote:
>>>>> Sorry, it remains bogus to expose the tsc deadline timer feature
>>>>> on machines < pc-1.1. That's just like we introduced kvmclock
>>>>> only to pc-0.14 onward. The reason is that guest OSes so far
>>>>> running on qemu-1.0 or older without deadline timer support must
>>>>> not find that feature when being migrated to a host with qemu-1.1
>>>>> in pc-1.0 compat mode. Yes, the user can explicitly disable it,
>>>>> but that is not the idea of legacy machine models. They should
>>>>> provide the very same environment that older qemu versions
>>>>> offered. 
>>>>> 
>>>> 
>>>> Not quite clear about this point.
>>>> Per my understanding, if a kvm guest running on an older qemu
>>>> without tsc deadline timer support,
>>>> then after migrate, the guest would still cannot find tsc deadline
>>>> feature, no matter older or newer host/qemu/pc-xx it migrate to.
>>> 
>>> What should prevent this? The feature flags are not part of the
>>> vmstate. They are part of the vm configuration which is not migrated
>>> but defined by starting qemu on the target host.
>>> 
>> 
>> Thanks! understand this point ("They are part of the vm
>> configuration which is not migrated but defined by starting qemu on
>> the target host").  
>> 
>> But kvmclock example still cannot satisfy the purpose "guest running
>> on old qemu/pc-0.13 without kvmclock support must not find kvmclock
>> feature when being migrated to a host with new qemu/pc-0.13 compat
>>     mode". After migration, guest can possibily find kvmclock
>> feature CPUID.0x40000001.KVM_FEATURE_CLOCKSOURCE: pc_init1(...,
>> kvmclock_enabled) { pc_cpus_init(cpu_model);    // the point to
>> decide and expose cpuid features to guest   
>> 
>>     if (kvmclock_enabled) {        // the difference point between
>>     pc-0.13 vs. pc-0.14, related nothing to cpuid features.        
>> kvmclock_create(); } }
> 
> Right, not a perfect example: the cpuid feature is not influenced by
> this mechanism, only the fact if a kvmclock device (for save/restore)
> should be created. I guess we ignored this back then, only focusing on
> the more obvious issue of the addition device.
> 
>> 
>> Seems currently there is no good way to satisfy "guest running on
>> old qemu/pc-xx without feature A support must not find feature A
>> when being migrated to a host with new qemu/pc-xx compat mode", i.e.
>> considering   
>> * if running with '-cpu host' then migrate;
>> * each time we add a new cpuid feature it need add one or more new
>> machine model? is it necessary to bind pc-xx with cpuid feature? 
>> * logically cpuid features should better be controlled by cpu model,
>> not by machine model. 
> 
> The compatibility machines define the possible cpu models. If I select

How does machine define possible cpu models?
cpu model defined by qemu option '-cpu ...', while machine model defined by '-machine ...'

> pc-0.14, e.g. -cpu kvm64 should not give me features that 0.14 was not
> exposing.
> 

in such case, it's '-cpu kvm64' who take effect to decide what cpuid features would exposed to guest, not '-machine pc-0.14'.

IMO, what our patch need to do is to expose a cpuid feature to guest (CPUID.01H:ECX.TSC_Deadline[bit 24]), it decided by cpu model, not machine model:
pc_init1(..., cpu_model, ...)
{
    pc_cpus_init(cpu_model);   // this is the whole logic exposing cpuid features to guest
    ...
}

Do I misunderstanding something?

Thanks,
Jinsong
Jan Kiszka - Feb. 27, 2012, 5:18 p.m.
On 2012-02-27 17:05, Liu, Jinsong wrote:
> Jan Kiszka wrote:
>> On 2012-01-07 19:23, Liu, Jinsong wrote:
>>> Jan Kiszka wrote:
>>>> On 2012-01-05 18:07, Liu, Jinsong wrote:
>>>>>> Sorry, it remains bogus to expose the tsc deadline timer feature
>>>>>> on machines < pc-1.1. That's just like we introduced kvmclock
>>>>>> only to pc-0.14 onward. The reason is that guest OSes so far
>>>>>> running on qemu-1.0 or older without deadline timer support must
>>>>>> not find that feature when being migrated to a host with qemu-1.1
>>>>>> in pc-1.0 compat mode. Yes, the user can explicitly disable it,
>>>>>> but that is not the idea of legacy machine models. They should
>>>>>> provide the very same environment that older qemu versions
>>>>>> offered. 
>>>>>>
>>>>>
>>>>> Not quite clear about this point.
>>>>> Per my understanding, if a kvm guest running on an older qemu
>>>>> without tsc deadline timer support,
>>>>> then after migrate, the guest would still cannot find tsc deadline
>>>>> feature, no matter older or newer host/qemu/pc-xx it migrate to.
>>>>
>>>> What should prevent this? The feature flags are not part of the
>>>> vmstate. They are part of the vm configuration which is not migrated
>>>> but defined by starting qemu on the target host.
>>>>
>>>
>>> Thanks! understand this point ("They are part of the vm
>>> configuration which is not migrated but defined by starting qemu on
>>> the target host").  
>>>
>>> But kvmclock example still cannot satisfy the purpose "guest running
>>> on old qemu/pc-0.13 without kvmclock support must not find kvmclock
>>> feature when being migrated to a host with new qemu/pc-0.13 compat
>>>     mode". After migration, guest can possibily find kvmclock
>>> feature CPUID.0x40000001.KVM_FEATURE_CLOCKSOURCE: pc_init1(...,
>>> kvmclock_enabled) { pc_cpus_init(cpu_model);    // the point to
>>> decide and expose cpuid features to guest   
>>>
>>>     if (kvmclock_enabled) {        // the difference point between
>>>     pc-0.13 vs. pc-0.14, related nothing to cpuid features.        
>>> kvmclock_create(); } }
>>
>> Right, not a perfect example: the cpuid feature is not influenced by
>> this mechanism, only the fact if a kvmclock device (for save/restore)
>> should be created. I guess we ignored this back then, only focusing on
>> the more obvious issue of the addition device.
>>
>>>
>>> Seems currently there is no good way to satisfy "guest running on
>>> old qemu/pc-xx without feature A support must not find feature A
>>> when being migrated to a host with new qemu/pc-xx compat mode", i.e.
>>> considering   
>>> * if running with '-cpu host' then migrate;
>>> * each time we add a new cpuid feature it need add one or more new
>>> machine model? is it necessary to bind pc-xx with cpuid feature? 
>>> * logically cpuid features should better be controlled by cpu model,
>>> not by machine model. 
>>
>> The compatibility machines define the possible cpu models. If I select
> 
> How does machine define possible cpu models?
> cpu model defined by qemu option '-cpu ...', while machine model defined by '-machine ...'
> 
>> pc-0.14, e.g. -cpu kvm64 should not give me features that 0.14 was not
>> exposing.
>>
> 
> in such case, it's '-cpu kvm64' who take effect to decide what cpuid features would exposed to guest, not '-machine pc-0.14'.
> 
> IMO, what our patch need to do is to expose a cpuid feature to guest (CPUID.01H:ECX.TSC_Deadline[bit 24]), it decided by cpu model, not machine model:
> pc_init1(..., cpu_model, ...)
> {
>     pc_cpus_init(cpu_model);   // this is the whole logic exposing cpuid features to guest
>     ...
> }
> 
> Do I misunderstanding something?

My point is that

  qemu-version-A [-cpu whatever]

should provide the same VM as

  qemu-version-B -machine pc-A [-cpu whatever]

specifically if you leave out the cpu specification.

So the compat machine could establish a feature mask (e.g. append some
"-tsc_deadline" in this case). But, indeed, we need a new channel for this.

Jan

Patch

diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 177d8aa..f2d0ad5 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -399,6 +399,7 @@ 
 #define CPUID_EXT_X2APIC   (1 << 21)
 #define CPUID_EXT_MOVBE    (1 << 22)
 #define CPUID_EXT_POPCNT   (1 << 23)
+#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
 #define CPUID_EXT_XSAVE    (1 << 26)
 #define CPUID_EXT_OSXSAVE  (1 << 27)
 #define CPUID_EXT_HYPERVISOR  (1 << 31)
@@ -693,6 +694,7 @@  typedef struct CPUX86State {
 
     uint64_t tsc;
     uint64_t tsc_deadline;
+    bool tsc_deadline_timer_enabled;
 
     uint64_t mcg_status;
     uint64_t msr_ia32_misc_enable;
diff --git a/target-i386/cpuid.c b/target-i386/cpuid.c
index 0b3af90..fe749e0 100644
--- a/target-i386/cpuid.c
+++ b/target-i386/cpuid.c
@@ -48,7 +48,7 @@  static const char *ext_feature_name[] = {
     "fma", "cx16", "xtpr", "pdcm",
     NULL, NULL, "dca", "sse4.1|sse4_1",
     "sse4.2|sse4_2", "x2apic", "movbe", "popcnt",
-    NULL, "aes", "xsave", "osxsave",
+    "tsc_deadline", "aes", "xsave", "osxsave",
     "avx", NULL, NULL, "hypervisor",
 };
 static const char *ext2_feature_name[] = {
@@ -225,6 +225,7 @@  typedef struct x86_def_t {
     int model;
     int stepping;
     int tsc_khz;
+    bool tsc_deadline_timer_enabled;
     uint32_t features, ext_features, ext2_features, ext3_features;
     uint32_t kvm_features, svm_features;
     uint32_t xlevel;
@@ -742,6 +743,9 @@  static int cpu_x86_find_by_name(x86_def_t *x86_cpu_def, const char *cpu_model)
     x86_cpu_def->ext3_features &= ~minus_ext3_features;
     x86_cpu_def->kvm_features &= ~minus_kvm_features;
     x86_cpu_def->svm_features &= ~minus_svm_features;
+    /* Defaultly user don't against tsc_deadline_timer */
+    x86_cpu_def->tsc_deadline_timer_enabled =
+        !(minus_ext_features & CPUID_EXT_TSC_DEADLINE_TIMER);
     if (check_cpuid) {
         if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
             goto error;
@@ -885,6 +889,7 @@  int cpu_x86_register (CPUX86State *env, const char *cpu_model)
     env->cpuid_ext4_features = def->ext4_features;
     env->cpuid_xlevel2 = def->xlevel2;
     env->tsc_khz = def->tsc_khz;
+    env->tsc_deadline_timer_enabled = def->tsc_deadline_timer_enabled;
     if (!kvm_enabled()) {
         env->cpuid_features &= TCG_FEATURES;
         env->cpuid_ext_features &= TCG_EXT_FEATURES;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d50de90..79baf0b 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -370,6 +370,19 @@  int kvm_arch_init_vcpu(CPUState *env)
     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
     env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX);
     env->cpuid_ext_features |= i;
+    /*
+     * 1. Considering live migration, user enable/disable tsc deadline timer;
+     * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
+     * 3. If in the future qemu support tsc deadline timer emulation,
+     *    and guest use qemu apic, add cpuid exposing case then.
+     */
+    env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER;
+    if (env->tsc_deadline_timer_enabled) {
+        if (kvm_irqchip_in_kernel() &&
+            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) {
+            env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
+        }
+    }
 
     env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001,
                                                              0, R_EDX);