Patchwork Expose tsc deadline timer cpuid to guest

login
register
mail settings
Submitter Liu, Jinsong
Date Dec. 26, 2011, 8:49 p.m.
Message ID <DE8DF0795D48FD4CA783C40EC8292335C663@SHSMSX101.ccr.corp.intel.com>
Download mbox | patch
Permalink /patch/133284/
State New
Headers show

Comments

Liu, Jinsong - Dec. 26, 2011, 8:49 p.m.
From 19caf1db1f93e6f6b736e1dfd5e91a0c7669adec Mon Sep 17 00:00:00 2001
From: Liu Jinsong <jinsong.liu@intel.com>
Date: Tue, 27 Dec 2011 04:08:27 +0800
Subject: [PATCH] Expose tsc deadline timer cpuid to guest

Depend on several factors:
1. Considering live migration, user enable/disable tsc deadline timer;
2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
3. If in the future qemu support tsc deadline timer emulation,
   and guest use qemu apic, add cpuid exposing case then.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
---
 linux-headers/linux/kvm.h |    1 +
 qemu-kvm.h                |    1 +
 qemu-options.hx           |    3 +++
 target-i386/cpu.h         |    1 +
 target-i386/kvm.c         |   12 ++++++++++++
 vl.c                      |    4 ++++
 6 files changed, 22 insertions(+), 0 deletions(-)
Jan Kiszka - Dec. 27, 2011, 3:09 p.m.
On 2011-12-26 21:49, Liu, Jinsong wrote:
> From 19caf1db1f93e6f6b736e1dfd5e91a0c7669adec Mon Sep 17 00:00:00 2001
> From: Liu Jinsong <jinsong.liu@intel.com>
> Date: Tue, 27 Dec 2011 04:08:27 +0800
> Subject: [PATCH] Expose tsc deadline timer cpuid to guest
> 
> Depend on several factors:
> 1. Considering live migration, user enable/disable tsc deadline timer;
> 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
> 3. If in the future qemu support tsc deadline timer emulation,
>    and guest use qemu apic, add cpuid exposing case then.
> 
> Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
> ---
>  linux-headers/linux/kvm.h |    1 +
>  qemu-kvm.h                |    1 +
>  qemu-options.hx           |    3 +++
>  target-i386/cpu.h         |    1 +
>  target-i386/kvm.c         |   12 ++++++++++++
>  vl.c                      |    4 ++++
>  6 files changed, 22 insertions(+), 0 deletions(-)
> 
> diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
> index a8761d3..1d3a4f4 100644
> --- a/linux-headers/linux/kvm.h
> +++ b/linux-headers/linux/kvm.h
> @@ -558,6 +558,7 @@ struct kvm_ppc_pvinfo {
>  #define KVM_CAP_PPC_PAPR 68
>  #define KVM_CAP_SW_TLB 69
>  #define KVM_CAP_ONE_REG 70
> +#define KVM_CAP_TSC_DEADLINE_TIMER 72
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  

Please split linux-header update from functional changes (then the
former should just be the outcome of a update-linux-headers.sh run).

> diff --git a/qemu-kvm.h b/qemu-kvm.h
> index 2bd5602..8c6c2ea 100644
> --- a/qemu-kvm.h
> +++ b/qemu-kvm.h
> @@ -260,6 +260,7 @@ extern int kvm_irqchip;
>  extern int kvm_pit;
>  extern int kvm_pit_reinject;
>  extern unsigned int kvm_shadow_memory;
> +extern int tsc_deadline_timer;
>  
>  int kvm_handle_tpr_access(CPUState *env);
>  void kvm_tpr_enable_vapic(CPUState *env);
> diff --git a/qemu-options.hx b/qemu-options.hx
> index f6df6b9..eff6644 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection,
>      "-no-kvm-pit-reinjection\n"
>      "                disable KVM kernel mode PIT interrupt reinjection\n",
>      QEMU_ARCH_I386)
> +DEF("no-tsc-deadline-timer", 0, QEMU_OPTION_no_tsc_deadline_timer,
> +    "-no-tsc-deadline-timer     disable tsc deadline timer\n",
> +    QEMU_ARCH_I386)

Hmm, I would really prefer to stop adding switches like this. They won't
make it upstream anyway.

Can't this control be attached to legacy qemu machine models, ie. here
anything <= pc-1.0? See how we handle kvmclock.

>  DEF("tdf", 0, QEMU_OPTION_tdf,
>      "-tdf            enable guest time drift compensation\n", QEMU_ARCH_ALL)
>  DEF("kvm-shadow-memory", HAS_ARG, QEMU_OPTION_kvm_shadow_memory,
> diff --git a/target-i386/cpu.h b/target-i386/cpu.h
> index 177d8aa..767d2eb 100644
> --- a/target-i386/cpu.h
> +++ b/target-i386/cpu.h
> @@ -399,6 +399,7 @@
>  #define CPUID_EXT_X2APIC   (1 << 21)
>  #define CPUID_EXT_MOVBE    (1 << 22)
>  #define CPUID_EXT_POPCNT   (1 << 23)
> +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
>  #define CPUID_EXT_XSAVE    (1 << 26)
>  #define CPUID_EXT_OSXSAVE  (1 << 27)
>  #define CPUID_EXT_HYPERVISOR  (1 << 31)
> diff --git a/target-i386/kvm.c b/target-i386/kvm.c
> index d50de90..740184d 100644
> --- a/target-i386/kvm.c
> +++ b/target-i386/kvm.c
> @@ -370,6 +370,18 @@ int kvm_arch_init_vcpu(CPUState *env)
>      i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
>      env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX);
>      env->cpuid_ext_features |= i;
> +    /*
> +     * 1. Considering live migration, user enable/disable tsc deadline timer;
> +     * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
> +     * 3. If in the future qemu support tsc deadline timer emulation,
> +     *    and guest use qemu apic, add cpuid exposing case then.
> +     */
> +    env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER;
> +    if (tsc_deadline_timer) {
> +        if (kvm_irqchip_in_kernel() &&
> +            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER))
> +            env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
> +    }

Please mind the coding style.

Jan
Liu, Jinsong - Dec. 28, 2011, 5:35 p.m.
>> diff --git a/qemu-kvm.h b/qemu-kvm.h
>> index 2bd5602..8c6c2ea 100644
>> --- a/qemu-kvm.h
>> +++ b/qemu-kvm.h
>> @@ -260,6 +260,7 @@ extern int kvm_irqchip;
>>  extern int kvm_pit;
>>  extern int kvm_pit_reinject;
>>  extern unsigned int kvm_shadow_memory;
>> +extern int tsc_deadline_timer;
>> 
>>  int kvm_handle_tpr_access(CPUState *env);
>>  void kvm_tpr_enable_vapic(CPUState *env);
>> diff --git a/qemu-options.hx b/qemu-options.hx
>> index f6df6b9..eff6644 100644
>> --- a/qemu-options.hx
>> +++ b/qemu-options.hx
>> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0,
>>      QEMU_OPTION_no_kvm_pit_reinjection, "-no-kvm-pit-reinjection\n"
>>      "                disable KVM kernel mode PIT interrupt
>> reinjection\n",      QEMU_ARCH_I386) +DEF("no-tsc-deadline-timer",
>> 0, QEMU_OPTION_no_tsc_deadline_timer, +    "-no-tsc-deadline-timer  
>> disable tsc deadline timer\n", +    QEMU_ARCH_I386)
> 
> Hmm, I would really prefer to stop adding switches like this. They
> won't make it upstream anyway.

OK, I will try to write a patch w/ better user control cpuid method, i.e. by plus_features and minus_features.

> 
> Can't this control be attached to legacy qemu machine models, ie. here
> anything <= pc-1.0? See how we handle kvmclock.
> 

You mean, by adding input para like pc_init1(..., kvmclock_enabled, tscdeadline_enabled)?
I think that's not a good way. With more and more cpuid features (N) controlled in this way, machine models would be 2^N.

Thanks,
Jinsong
Jan Kiszka - Dec. 28, 2011, 7:18 p.m.
On 2011-12-28 18:35, Liu, Jinsong wrote:
>>> diff --git a/qemu-kvm.h b/qemu-kvm.h
>>> index 2bd5602..8c6c2ea 100644
>>> --- a/qemu-kvm.h
>>> +++ b/qemu-kvm.h
>>> @@ -260,6 +260,7 @@ extern int kvm_irqchip;
>>>  extern int kvm_pit;
>>>  extern int kvm_pit_reinject;
>>>  extern unsigned int kvm_shadow_memory;
>>> +extern int tsc_deadline_timer;
>>>
>>>  int kvm_handle_tpr_access(CPUState *env);
>>>  void kvm_tpr_enable_vapic(CPUState *env);
>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>> index f6df6b9..eff6644 100644
>>> --- a/qemu-options.hx
>>> +++ b/qemu-options.hx
>>> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0,
>>>      QEMU_OPTION_no_kvm_pit_reinjection, "-no-kvm-pit-reinjection\n"
>>>      "                disable KVM kernel mode PIT interrupt
>>> reinjection\n",      QEMU_ARCH_I386) +DEF("no-tsc-deadline-timer",
>>> 0, QEMU_OPTION_no_tsc_deadline_timer, +    "-no-tsc-deadline-timer  
>>> disable tsc deadline timer\n", +    QEMU_ARCH_I386)
>>
>> Hmm, I would really prefer to stop adding switches like this. They
>> won't make it upstream anyway.
> 
> OK, I will try to write a patch w/ better user control cpuid method, i.e. by plus_features and minus_features.

Yep, that would be better.

> 
>>
>> Can't this control be attached to legacy qemu machine models, ie. here
>> anything <= pc-1.0? See how we handle kvmclock.
>>
> 
> You mean, by adding input para like pc_init1(..., kvmclock_enabled, tscdeadline_enabled)?
> I think that's not a good way.

I think it is mandatory as older qemu versions won't expose tscdeadline
to the guest, thus newer versions must not do this when emulating older
machines.

> With more and more cpuid features (N) controlled in this way, machine models would be 2^N.

We likely need a better way to express this via code, I agree. Likely
something declarative as for compat_props.

Jan
Liu, Jinsong - Dec. 29, 2011, 6:37 p.m.
Jan Kiszka wrote:
> On 2011-12-28 18:35, Liu, Jinsong wrote:
>>>> diff --git a/qemu-kvm.h b/qemu-kvm.h
>>>> index 2bd5602..8c6c2ea 100644
>>>> --- a/qemu-kvm.h
>>>> +++ b/qemu-kvm.h
>>>> @@ -260,6 +260,7 @@ extern int kvm_irqchip;
>>>>  extern int kvm_pit;
>>>>  extern int kvm_pit_reinject;
>>>>  extern unsigned int kvm_shadow_memory;
>>>> +extern int tsc_deadline_timer;
>>>> 
>>>>  int kvm_handle_tpr_access(CPUState *env);
>>>>  void kvm_tpr_enable_vapic(CPUState *env);
>>>> diff --git a/qemu-options.hx b/qemu-options.hx
>>>> index f6df6b9..eff6644 100644
>>>> --- a/qemu-options.hx
>>>> +++ b/qemu-options.hx
>>>> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0,
>>>>      QEMU_OPTION_no_kvm_pit_reinjection,
>>>>      "-no-kvm-pit-reinjection\n" "                disable KVM
>>>> kernel mode PIT interrupt reinjection\n",      QEMU_ARCH_I386)
>>>> +DEF("no-tsc-deadline-timer", 0,
>>>> QEMU_OPTION_no_tsc_deadline_timer, +    "-no-tsc-deadline-timer
>>>> disable tsc deadline timer\n", +    QEMU_ARCH_I386) 
>>> 
>>> Hmm, I would really prefer to stop adding switches like this. They
>>> won't make it upstream anyway.
>> 
>> OK, I will try to write a patch w/ better user control cpuid method,
>> i.e. by plus_features and minus_features. 
> 
> Yep, that would be better.

Thanks, patch updated according to above idea, and sent out.

> 
>> 
>>> 
>>> Can't this control be attached to legacy qemu machine models, ie.
>>> here anything <= pc-1.0? See how we handle kvmclock.
>>> 
>> 
>> You mean, by adding input para like pc_init1(..., kvmclock_enabled,
>> tscdeadline_enabled)? 
>> I think that's not a good way.
> 
> I think it is mandatory as older qemu versions won't expose
> tscdeadline to the guest, thus newer versions must not do this when
> emulating older machines.

Hmm, if an old qemu machine runs at a new host platform (say, -cpu host), it would expose many *new* cpuid features to guest.

IMO, qemu machine is a *virt* platform for guest,
tsc deadline timer is a cpuid features, not much necessary to be bound to some qemu machine version.
whether tsc deadline timer cpuid expose to guest can only decided by:
1. user authorize enable (default yes)
2. kvm_irqchip_in_kernel
3. KVM_CAP_TSC_DEADLINE_TIMER
If yes, it can be exposed to guest, and would not break anything no matter what qemu machine version is.


Thanks,
Jinsong

> 
>> With more and more cpuid features (N) controlled in this way,
>> machine models would be 2^N. 
> 
> We likely need a better way to express this via code, I agree. Likely
> something declarative as for compat_props.
>

Patch

diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index a8761d3..1d3a4f4 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -558,6 +558,7 @@  struct kvm_ppc_pvinfo {
 #define KVM_CAP_PPC_PAPR 68
 #define KVM_CAP_SW_TLB 69
 #define KVM_CAP_ONE_REG 70
+#define KVM_CAP_TSC_DEADLINE_TIMER 72
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/qemu-kvm.h b/qemu-kvm.h
index 2bd5602..8c6c2ea 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -260,6 +260,7 @@  extern int kvm_irqchip;
 extern int kvm_pit;
 extern int kvm_pit_reinject;
 extern unsigned int kvm_shadow_memory;
+extern int tsc_deadline_timer;
 
 int kvm_handle_tpr_access(CPUState *env);
 void kvm_tpr_enable_vapic(CPUState *env);
diff --git a/qemu-options.hx b/qemu-options.hx
index f6df6b9..eff6644 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2619,6 +2619,9 @@  DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection,
     "-no-kvm-pit-reinjection\n"
     "                disable KVM kernel mode PIT interrupt reinjection\n",
     QEMU_ARCH_I386)
+DEF("no-tsc-deadline-timer", 0, QEMU_OPTION_no_tsc_deadline_timer,
+    "-no-tsc-deadline-timer     disable tsc deadline timer\n",
+    QEMU_ARCH_I386)
 DEF("tdf", 0, QEMU_OPTION_tdf,
     "-tdf            enable guest time drift compensation\n", QEMU_ARCH_ALL)
 DEF("kvm-shadow-memory", HAS_ARG, QEMU_OPTION_kvm_shadow_memory,
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 177d8aa..767d2eb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -399,6 +399,7 @@ 
 #define CPUID_EXT_X2APIC   (1 << 21)
 #define CPUID_EXT_MOVBE    (1 << 22)
 #define CPUID_EXT_POPCNT   (1 << 23)
+#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
 #define CPUID_EXT_XSAVE    (1 << 26)
 #define CPUID_EXT_OSXSAVE  (1 << 27)
 #define CPUID_EXT_HYPERVISOR  (1 << 31)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d50de90..740184d 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -370,6 +370,18 @@  int kvm_arch_init_vcpu(CPUState *env)
     i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR;
     env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX);
     env->cpuid_ext_features |= i;
+    /*
+     * 1. Considering live migration, user enable/disable tsc deadline timer;
+     * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it;
+     * 3. If in the future qemu support tsc deadline timer emulation,
+     *    and guest use qemu apic, add cpuid exposing case then.
+     */
+    env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER;
+    if (tsc_deadline_timer) {
+        if (kvm_irqchip_in_kernel() &&
+            kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER))
+            env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER;
+    }
 
     env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001,
                                                              0, R_EDX);
diff --git a/vl.c b/vl.c
index 89a618b..583de58 100644
--- a/vl.c
+++ b/vl.c
@@ -2160,6 +2160,7 @@  int kvm_irqchip = 1;
 int kvm_pit = 1;
 int kvm_pit_reinject = 1;
 #endif
+int tsc_deadline_timer = 1;
 
 int main(int argc, char **argv, char **envp)
 {
@@ -2875,6 +2876,9 @@  int main(int argc, char **argv, char **envp)
                 break;
             }
 #endif
+            case QEMU_OPTION_no_tsc_deadline_timer:
+                tsc_deadline_timer = 0;
+                break;
             case QEMU_OPTION_usb:
                 usb_enabled = 1;
                 break;