Message ID | DE8DF0795D48FD4CA783C40EC8292335C663@SHSMSX101.ccr.corp.intel.com |
---|---|
State | New |
Headers | show |
On 2011-12-26 21:49, Liu, Jinsong wrote: > From 19caf1db1f93e6f6b736e1dfd5e91a0c7669adec Mon Sep 17 00:00:00 2001 > From: Liu Jinsong <jinsong.liu@intel.com> > Date: Tue, 27 Dec 2011 04:08:27 +0800 > Subject: [PATCH] Expose tsc deadline timer cpuid to guest > > Depend on several factors: > 1. Considering live migration, user enable/disable tsc deadline timer; > 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it; > 3. If in the future qemu support tsc deadline timer emulation, > and guest use qemu apic, add cpuid exposing case then. > > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > --- > linux-headers/linux/kvm.h | 1 + > qemu-kvm.h | 1 + > qemu-options.hx | 3 +++ > target-i386/cpu.h | 1 + > target-i386/kvm.c | 12 ++++++++++++ > vl.c | 4 ++++ > 6 files changed, 22 insertions(+), 0 deletions(-) > > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index a8761d3..1d3a4f4 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h > @@ -558,6 +558,7 @@ struct kvm_ppc_pvinfo { > #define KVM_CAP_PPC_PAPR 68 > #define KVM_CAP_SW_TLB 69 > #define KVM_CAP_ONE_REG 70 > +#define KVM_CAP_TSC_DEADLINE_TIMER 72 > > #ifdef KVM_CAP_IRQ_ROUTING > Please split linux-header update from functional changes (then the former should just be the outcome of a update-linux-headers.sh run). > diff --git a/qemu-kvm.h b/qemu-kvm.h > index 2bd5602..8c6c2ea 100644 > --- a/qemu-kvm.h > +++ b/qemu-kvm.h > @@ -260,6 +260,7 @@ extern int kvm_irqchip; > extern int kvm_pit; > extern int kvm_pit_reinject; > extern unsigned int kvm_shadow_memory; > +extern int tsc_deadline_timer; > > int kvm_handle_tpr_access(CPUState *env); > void kvm_tpr_enable_vapic(CPUState *env); > diff --git a/qemu-options.hx b/qemu-options.hx > index f6df6b9..eff6644 100644 > --- a/qemu-options.hx > +++ b/qemu-options.hx > @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection, > "-no-kvm-pit-reinjection\n" > " disable KVM kernel mode PIT interrupt reinjection\n", > QEMU_ARCH_I386) > +DEF("no-tsc-deadline-timer", 0, QEMU_OPTION_no_tsc_deadline_timer, > + "-no-tsc-deadline-timer disable tsc deadline timer\n", > + QEMU_ARCH_I386) Hmm, I would really prefer to stop adding switches like this. They won't make it upstream anyway. Can't this control be attached to legacy qemu machine models, ie. here anything <= pc-1.0? See how we handle kvmclock. > DEF("tdf", 0, QEMU_OPTION_tdf, > "-tdf enable guest time drift compensation\n", QEMU_ARCH_ALL) > DEF("kvm-shadow-memory", HAS_ARG, QEMU_OPTION_kvm_shadow_memory, > diff --git a/target-i386/cpu.h b/target-i386/cpu.h > index 177d8aa..767d2eb 100644 > --- a/target-i386/cpu.h > +++ b/target-i386/cpu.h > @@ -399,6 +399,7 @@ > #define CPUID_EXT_X2APIC (1 << 21) > #define CPUID_EXT_MOVBE (1 << 22) > #define CPUID_EXT_POPCNT (1 << 23) > +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24) > #define CPUID_EXT_XSAVE (1 << 26) > #define CPUID_EXT_OSXSAVE (1 << 27) > #define CPUID_EXT_HYPERVISOR (1 << 31) > diff --git a/target-i386/kvm.c b/target-i386/kvm.c > index d50de90..740184d 100644 > --- a/target-i386/kvm.c > +++ b/target-i386/kvm.c > @@ -370,6 +370,18 @@ int kvm_arch_init_vcpu(CPUState *env) > i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; > env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); > env->cpuid_ext_features |= i; > + /* > + * 1. Considering live migration, user enable/disable tsc deadline timer; > + * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it; > + * 3. If in the future qemu support tsc deadline timer emulation, > + * and guest use qemu apic, add cpuid exposing case then. > + */ > + env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER; > + if (tsc_deadline_timer) { > + if (kvm_irqchip_in_kernel() && > + kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) > + env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER; > + } Please mind the coding style. Jan
>> diff --git a/qemu-kvm.h b/qemu-kvm.h >> index 2bd5602..8c6c2ea 100644 >> --- a/qemu-kvm.h >> +++ b/qemu-kvm.h >> @@ -260,6 +260,7 @@ extern int kvm_irqchip; >> extern int kvm_pit; >> extern int kvm_pit_reinject; >> extern unsigned int kvm_shadow_memory; >> +extern int tsc_deadline_timer; >> >> int kvm_handle_tpr_access(CPUState *env); >> void kvm_tpr_enable_vapic(CPUState *env); >> diff --git a/qemu-options.hx b/qemu-options.hx >> index f6df6b9..eff6644 100644 >> --- a/qemu-options.hx >> +++ b/qemu-options.hx >> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, >> QEMU_OPTION_no_kvm_pit_reinjection, "-no-kvm-pit-reinjection\n" >> " disable KVM kernel mode PIT interrupt >> reinjection\n", QEMU_ARCH_I386) +DEF("no-tsc-deadline-timer", >> 0, QEMU_OPTION_no_tsc_deadline_timer, + "-no-tsc-deadline-timer >> disable tsc deadline timer\n", + QEMU_ARCH_I386) > > Hmm, I would really prefer to stop adding switches like this. They > won't make it upstream anyway. OK, I will try to write a patch w/ better user control cpuid method, i.e. by plus_features and minus_features. > > Can't this control be attached to legacy qemu machine models, ie. here > anything <= pc-1.0? See how we handle kvmclock. > You mean, by adding input para like pc_init1(..., kvmclock_enabled, tscdeadline_enabled)? I think that's not a good way. With more and more cpuid features (N) controlled in this way, machine models would be 2^N. Thanks, Jinsong
On 2011-12-28 18:35, Liu, Jinsong wrote: >>> diff --git a/qemu-kvm.h b/qemu-kvm.h >>> index 2bd5602..8c6c2ea 100644 >>> --- a/qemu-kvm.h >>> +++ b/qemu-kvm.h >>> @@ -260,6 +260,7 @@ extern int kvm_irqchip; >>> extern int kvm_pit; >>> extern int kvm_pit_reinject; >>> extern unsigned int kvm_shadow_memory; >>> +extern int tsc_deadline_timer; >>> >>> int kvm_handle_tpr_access(CPUState *env); >>> void kvm_tpr_enable_vapic(CPUState *env); >>> diff --git a/qemu-options.hx b/qemu-options.hx >>> index f6df6b9..eff6644 100644 >>> --- a/qemu-options.hx >>> +++ b/qemu-options.hx >>> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, >>> QEMU_OPTION_no_kvm_pit_reinjection, "-no-kvm-pit-reinjection\n" >>> " disable KVM kernel mode PIT interrupt >>> reinjection\n", QEMU_ARCH_I386) +DEF("no-tsc-deadline-timer", >>> 0, QEMU_OPTION_no_tsc_deadline_timer, + "-no-tsc-deadline-timer >>> disable tsc deadline timer\n", + QEMU_ARCH_I386) >> >> Hmm, I would really prefer to stop adding switches like this. They >> won't make it upstream anyway. > > OK, I will try to write a patch w/ better user control cpuid method, i.e. by plus_features and minus_features. Yep, that would be better. > >> >> Can't this control be attached to legacy qemu machine models, ie. here >> anything <= pc-1.0? See how we handle kvmclock. >> > > You mean, by adding input para like pc_init1(..., kvmclock_enabled, tscdeadline_enabled)? > I think that's not a good way. I think it is mandatory as older qemu versions won't expose tscdeadline to the guest, thus newer versions must not do this when emulating older machines. > With more and more cpuid features (N) controlled in this way, machine models would be 2^N. We likely need a better way to express this via code, I agree. Likely something declarative as for compat_props. Jan
Jan Kiszka wrote: > On 2011-12-28 18:35, Liu, Jinsong wrote: >>>> diff --git a/qemu-kvm.h b/qemu-kvm.h >>>> index 2bd5602..8c6c2ea 100644 >>>> --- a/qemu-kvm.h >>>> +++ b/qemu-kvm.h >>>> @@ -260,6 +260,7 @@ extern int kvm_irqchip; >>>> extern int kvm_pit; >>>> extern int kvm_pit_reinject; >>>> extern unsigned int kvm_shadow_memory; >>>> +extern int tsc_deadline_timer; >>>> >>>> int kvm_handle_tpr_access(CPUState *env); >>>> void kvm_tpr_enable_vapic(CPUState *env); >>>> diff --git a/qemu-options.hx b/qemu-options.hx >>>> index f6df6b9..eff6644 100644 >>>> --- a/qemu-options.hx >>>> +++ b/qemu-options.hx >>>> @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, >>>> QEMU_OPTION_no_kvm_pit_reinjection, >>>> "-no-kvm-pit-reinjection\n" " disable KVM >>>> kernel mode PIT interrupt reinjection\n", QEMU_ARCH_I386) >>>> +DEF("no-tsc-deadline-timer", 0, >>>> QEMU_OPTION_no_tsc_deadline_timer, + "-no-tsc-deadline-timer >>>> disable tsc deadline timer\n", + QEMU_ARCH_I386) >>> >>> Hmm, I would really prefer to stop adding switches like this. They >>> won't make it upstream anyway. >> >> OK, I will try to write a patch w/ better user control cpuid method, >> i.e. by plus_features and minus_features. > > Yep, that would be better. Thanks, patch updated according to above idea, and sent out. > >> >>> >>> Can't this control be attached to legacy qemu machine models, ie. >>> here anything <= pc-1.0? See how we handle kvmclock. >>> >> >> You mean, by adding input para like pc_init1(..., kvmclock_enabled, >> tscdeadline_enabled)? >> I think that's not a good way. > > I think it is mandatory as older qemu versions won't expose > tscdeadline to the guest, thus newer versions must not do this when > emulating older machines. Hmm, if an old qemu machine runs at a new host platform (say, -cpu host), it would expose many *new* cpuid features to guest. IMO, qemu machine is a *virt* platform for guest, tsc deadline timer is a cpuid features, not much necessary to be bound to some qemu machine version. whether tsc deadline timer cpuid expose to guest can only decided by: 1. user authorize enable (default yes) 2. kvm_irqchip_in_kernel 3. KVM_CAP_TSC_DEADLINE_TIMER If yes, it can be exposed to guest, and would not break anything no matter what qemu machine version is. Thanks, Jinsong > >> With more and more cpuid features (N) controlled in this way, >> machine models would be 2^N. > > We likely need a better way to express this via code, I agree. Likely > something declarative as for compat_props. >
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index a8761d3..1d3a4f4 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -558,6 +558,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_PPC_PAPR 68 #define KVM_CAP_SW_TLB 69 #define KVM_CAP_ONE_REG 70 +#define KVM_CAP_TSC_DEADLINE_TIMER 72 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/qemu-kvm.h b/qemu-kvm.h index 2bd5602..8c6c2ea 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -260,6 +260,7 @@ extern int kvm_irqchip; extern int kvm_pit; extern int kvm_pit_reinject; extern unsigned int kvm_shadow_memory; +extern int tsc_deadline_timer; int kvm_handle_tpr_access(CPUState *env); void kvm_tpr_enable_vapic(CPUState *env); diff --git a/qemu-options.hx b/qemu-options.hx index f6df6b9..eff6644 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -2619,6 +2619,9 @@ DEF("no-kvm-pit-reinjection", 0, QEMU_OPTION_no_kvm_pit_reinjection, "-no-kvm-pit-reinjection\n" " disable KVM kernel mode PIT interrupt reinjection\n", QEMU_ARCH_I386) +DEF("no-tsc-deadline-timer", 0, QEMU_OPTION_no_tsc_deadline_timer, + "-no-tsc-deadline-timer disable tsc deadline timer\n", + QEMU_ARCH_I386) DEF("tdf", 0, QEMU_OPTION_tdf, "-tdf enable guest time drift compensation\n", QEMU_ARCH_ALL) DEF("kvm-shadow-memory", HAS_ARG, QEMU_OPTION_kvm_shadow_memory, diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 177d8aa..767d2eb 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -399,6 +399,7 @@ #define CPUID_EXT_X2APIC (1 << 21) #define CPUID_EXT_MOVBE (1 << 22) #define CPUID_EXT_POPCNT (1 << 23) +#define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24) #define CPUID_EXT_XSAVE (1 << 26) #define CPUID_EXT_OSXSAVE (1 << 27) #define CPUID_EXT_HYPERVISOR (1 << 31) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index d50de90..740184d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -370,6 +370,18 @@ int kvm_arch_init_vcpu(CPUState *env) i = env->cpuid_ext_features & CPUID_EXT_HYPERVISOR; env->cpuid_ext_features &= kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX); env->cpuid_ext_features |= i; + /* + * 1. Considering live migration, user enable/disable tsc deadline timer; + * 2. If guest use kvm apic and kvm emulate tsc deadline timer, expose it; + * 3. If in the future qemu support tsc deadline timer emulation, + * and guest use qemu apic, add cpuid exposing case then. + */ + env->cpuid_ext_features &= ~CPUID_EXT_TSC_DEADLINE_TIMER; + if (tsc_deadline_timer) { + if (kvm_irqchip_in_kernel() && + kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) + env->cpuid_ext_features |= CPUID_EXT_TSC_DEADLINE_TIMER; + } env->cpuid_ext2_features &= kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX); diff --git a/vl.c b/vl.c index 89a618b..583de58 100644 --- a/vl.c +++ b/vl.c @@ -2160,6 +2160,7 @@ int kvm_irqchip = 1; int kvm_pit = 1; int kvm_pit_reinject = 1; #endif +int tsc_deadline_timer = 1; int main(int argc, char **argv, char **envp) { @@ -2875,6 +2876,9 @@ int main(int argc, char **argv, char **envp) break; } #endif + case QEMU_OPTION_no_tsc_deadline_timer: + tsc_deadline_timer = 0; + break; case QEMU_OPTION_usb: usb_enabled = 1; break;