Message ID | BC00F5384FCFC9499AF06F92E8B78A9E254455B6C3@shsmsx502.ccr.corp.intel.com |
---|---|
State | New |
Headers | show |
On Tue, Sep 13, 2011 at 10:36:51PM +0800, Liu, Jinsong wrote: > >From 7b12021e1d1b79797b49e41cc0a7be05a6180d9a Mon Sep 17 00:00:00 2001 > From: Liu, Jinsong <jinsong.liu@intel.com> > Date: Tue, 13 Sep 2011 21:52:54 +0800 > Subject: [PATCH] KVM: emulate lapic tsc deadline timer for guest > > This patch emulate lapic tsc deadline timer for guest: > Enumerate tsc deadline timer capability by CPUID; > Enable tsc deadline timer mode by lapic MMIO; > Start tsc deadline timer by WRMSR; > > Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com> > --- > arch/x86/include/asm/apicdef.h | 2 + > arch/x86/include/asm/cpufeature.h | 3 + > arch/x86/include/asm/kvm_host.h | 2 + > arch/x86/include/asm/msr-index.h | 2 + > arch/x86/kvm/kvm_timer.h | 2 + > arch/x86/kvm/lapic.c | 122 ++++++++++++++++++++++++++++++------- > arch/x86/kvm/lapic.h | 3 + > arch/x86/kvm/x86.c | 20 ++++++- > 8 files changed, 132 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h > index 34595d5..3925d80 100644 > --- a/arch/x86/include/asm/apicdef.h > +++ b/arch/x86/include/asm/apicdef.h > @@ -100,7 +100,9 @@ > #define APIC_TIMER_BASE_CLKIN 0x0 > #define APIC_TIMER_BASE_TMBASE 0x1 > #define APIC_TIMER_BASE_DIV 0x2 > +#define APIC_LVT_TIMER_ONESHOT (0 << 17) > #define APIC_LVT_TIMER_PERIODIC (1 << 17) > +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) > #define APIC_LVT_MASKED (1 << 16) > #define APIC_LVT_LEVEL_TRIGGER (1 << 15) > #define APIC_LVT_REMOTE_IRR (1 << 14) Please have a separate, introductory patch for definitions that are not KVM specific. > +++ b/arch/x86/include/asm/kvm_host.h > @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); > > extern bool tdp_enabled; > > +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); > + No need for extern. > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 2b2255b..925d4b9 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -135,9 +135,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) > return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; > } > > +static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) > +{ > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); > +} > + > static inline int apic_lvtt_period(struct kvm_lapic *apic) > { > - return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); > +} > + > +static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) > +{ > + return ((apic_get_reg(apic, APIC_LVTT) & > + apic->lapic_timer.timer_mode_mask) == > + APIC_LVT_TIMER_TSCDEADLINE); > } > > static inline int apic_lvt_nmi_mode(u32 lvt_val) > @@ -166,7 +180,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) > } > > static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { > - LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ > + LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ > LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ > LVT_MASK | APIC_MODE_MASK, /* LVTPC */ > LINT_MASK, LINT_MASK, /* LVT0-1 */ > @@ -570,6 +584,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) > break; > > case APIC_TMCCT: /* Timer CCR */ > + if (apic_lvtt_tscdeadline(apic)) > + return 0; > + > val = apic_get_tmcct(apic); > break; > > @@ -664,29 +681,32 @@ static void update_divide_count(struct kvm_lapic *apic) > > static void start_apic_timer(struct kvm_lapic *apic) > { > - ktime_t now = apic->lapic_timer.timer.base->get_time(); > - > - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * > - APIC_BUS_CYCLE_NS * apic->divide_count; > + ktime_t now; > atomic_set(&apic->lapic_timer.pending, 0); > > - if (!apic->lapic_timer.period) > - return; > - /* > - * Do not allow the guest to program periodic timers with small > - * interval, since the hrtimers are not throttled by the host > - * scheduler. > - */ > - if (apic_lvtt_period(apic)) { > - if (apic->lapic_timer.period < NSEC_PER_MSEC/2) > - apic->lapic_timer.period = NSEC_PER_MSEC/2; > - } > + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { > + /* lapic timer in oneshot or peroidic mode */ > + now = apic->lapic_timer.timer.base->get_time(); > + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) > + * APIC_BUS_CYCLE_NS * apic->divide_count; > + > + if (!apic->lapic_timer.period) > + return; > + /* > + * Do not allow the guest to program periodic timers with small > + * interval, since the hrtimers are not throttled by the host > + * scheduler. > + */ > + if (apic_lvtt_period(apic)) { > + if (apic->lapic_timer.period < NSEC_PER_MSEC/2) > + apic->lapic_timer.period = NSEC_PER_MSEC/2; > + } > > - hrtimer_start(&apic->lapic_timer.timer, > - ktime_add_ns(now, apic->lapic_timer.period), > - HRTIMER_MODE_ABS); > + hrtimer_start(&apic->lapic_timer.timer, > + ktime_add_ns(now, apic->lapic_timer.period), > + HRTIMER_MODE_ABS); > > - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" > PRIx64 ", " > "timer initial count 0x%x, period %lldns, " > "expire @ 0x%016" PRIx64 ".\n", __func__, > @@ -695,6 +715,28 @@ static void start_apic_timer(struct kvm_lapic *apic) > apic->lapic_timer.period, > ktime_to_ns(ktime_add_ns(now, > apic->lapic_timer.period))); > + } else if (apic_lvtt_tscdeadline(apic)) { > + /* lapic timer in tsc deadline mode */ > + u64 guest_tsc, guest_tsc_delta, ns = 0; > + struct kvm_vcpu *vcpu = apic->vcpu; > + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); > + unsigned long flags; > + > + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) > + return; > + > + local_irq_save(flags); > + > + now = apic->lapic_timer.timer.base->get_time(); > + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); Use kvm_x86_ops->read_l1_tsc(vcpu) instead of direct MSR read (to avoid reading L2 guest TSC in case of nested virt). > + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; if (guest_tsc <= tscdeadline), the timer should start immediately. > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 6cb353c..a73c059 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -610,6 +610,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) > if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) > best->ecx |= bit(X86_FEATURE_OSXSAVE); > } > + > + /* > + * When cpu has tsc deadline timer capacibility, use bit 17/18 > + * as timer mode mask. Otherwise only use bit 17. > + */ > + if (cpu_has_tsc_deadline_timer && best->function == 0x1) { > + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); > + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3 << 17); > + } else > + vcpu->arch.apic->lapic_timer.timer_mode_mask = (1 << 17); > } The deadline timer is entirely emulated, whether the host CPU supports it or not is irrelevant. Why was this changed from previous submissions?
Marcelo Tosatti wrote: >> diff --git a/arch/x86/include/asm/apicdef.h >> b/arch/x86/include/asm/apicdef.h >> index 34595d5..3925d80 100644 >> --- a/arch/x86/include/asm/apicdef.h >> +++ b/arch/x86/include/asm/apicdef.h >> @@ -100,7 +100,9 @@ >> #define APIC_TIMER_BASE_CLKIN 0x0 >> #define APIC_TIMER_BASE_TMBASE 0x1 >> #define APIC_TIMER_BASE_DIV 0x2 >> +#define APIC_LVT_TIMER_ONESHOT (0 << 17) >> #define APIC_LVT_TIMER_PERIODIC (1 << 17) >> +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) >> #define APIC_LVT_MASKED (1 << 16) >> #define APIC_LVT_LEVEL_TRIGGER (1 << 15) >> #define APIC_LVT_REMOTE_IRR (1 << 14) > > Please have a separate, introductory patch for definitions that are > not KVM specific. > OK, will present a separate patch. BTW, will the separate patch still be send to kvm@vger.kernel.org? >> +++ b/arch/x86/include/asm/kvm_host.h >> @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu >> *vcpu, gfn_t gfn); >> >> extern bool tdp_enabled; >> >> +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); >> + > > No need for extern. > Any special concern, or, for coding style? a little curious :) >> + } else if (apic_lvtt_tscdeadline(apic)) { >> + /* lapic timer in tsc deadline mode */ >> + u64 guest_tsc, guest_tsc_delta, ns = 0; >> + struct kvm_vcpu *vcpu = apic->vcpu; >> + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); + unsigned long >> flags; + >> + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) >> + return; + >> + local_irq_save(flags); >> + >> + now = apic->lapic_timer.timer.base->get_time(); >> + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); > > Use kvm_x86_ops->read_l1_tsc(vcpu) instead of direct MSR read > (to avoid reading L2 guest TSC in case of nested virt). > Fine. I use some old version kvm (Jul 22), and didn't notice Nadav's patch checked in Aug 2 with read_l1_tsc hook. Thanks for tell me. >> + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; > > if (guest_tsc <= tscdeadline), the timer should start immediately. > >> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c >> index 6cb353c..a73c059 100644 >> --- a/arch/x86/kvm/x86.c >> +++ b/arch/x86/kvm/x86.c >> @@ -610,6 +610,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) >> if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) >> best->ecx |= bit(X86_FEATURE_OSXSAVE); >> } >> + >> + /* >> + * When cpu has tsc deadline timer capacibility, use bit 17/18 >> + * as timer mode mask. Otherwise only use bit 17. + */ >> + if (cpu_has_tsc_deadline_timer && best->function == 0x1) { >> + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); >> + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3 << 17); + } else >> + vcpu->arch.apic->lapic_timer.timer_mode_mask = (1 << 17); >> } > > The deadline timer is entirely emulated, whether the host CPU supports > it or not is irrelevant. > > Why was this changed from previous submissions? Hmm, will explain in next email. Thanks, Jinsong
Marcelo Tosatti wrote: >> + } else if (apic_lvtt_tscdeadline(apic)) { >> + /* lapic timer in tsc deadline mode */ >> + u64 guest_tsc, guest_tsc_delta, ns = 0; >> + struct kvm_vcpu *vcpu = apic->vcpu; >> + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); + unsigned long >> flags; + >> + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) >> + return; + >> + local_irq_save(flags); >> + >> + now = apic->lapic_timer.timer.base->get_time(); >> + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); > > Use kvm_x86_ops->read_l1_tsc(vcpu) instead of direct MSR read > (to avoid reading L2 guest TSC in case of nested virt). > >> + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; > > if (guest_tsc <= tscdeadline), the timer should start immediately. > Yes, under such case the timer does start immediately, with ns = 0 Thanks, Jinsong
On Thu, Sep 15, 2011 at 04:17:20PM +0800, Liu, Jinsong wrote: > Marcelo Tosatti wrote: > >> + } else if (apic_lvtt_tscdeadline(apic)) { > >> + /* lapic timer in tsc deadline mode */ > >> + u64 guest_tsc, guest_tsc_delta, ns = 0; > >> + struct kvm_vcpu *vcpu = apic->vcpu; > >> + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); + unsigned long > >> flags; + > >> + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) > >> + return; + > >> + local_irq_save(flags); > >> + > >> + now = apic->lapic_timer.timer.base->get_time(); > >> + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); > > > > Use kvm_x86_ops->read_l1_tsc(vcpu) instead of direct MSR read > > (to avoid reading L2 guest TSC in case of nested virt). > > > >> + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; > > > > if (guest_tsc <= tscdeadline), the timer should start immediately. > > > > Yes, under such case the timer does start immediately, with ns = 0 No, guest_tsc_delta is unsigned, so the "< 0" comparation fails.
On Thu, Sep 15, 2011 at 02:22:58PM +0800, Liu, Jinsong wrote: > Marcelo Tosatti wrote: > >> diff --git a/arch/x86/include/asm/apicdef.h > >> b/arch/x86/include/asm/apicdef.h > >> index 34595d5..3925d80 100644 > >> --- a/arch/x86/include/asm/apicdef.h > >> +++ b/arch/x86/include/asm/apicdef.h > >> @@ -100,7 +100,9 @@ > >> #define APIC_TIMER_BASE_CLKIN 0x0 > >> #define APIC_TIMER_BASE_TMBASE 0x1 > >> #define APIC_TIMER_BASE_DIV 0x2 > >> +#define APIC_LVT_TIMER_ONESHOT (0 << 17) > >> #define APIC_LVT_TIMER_PERIODIC (1 << 17) > >> +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) > >> #define APIC_LVT_MASKED (1 << 16) > >> #define APIC_LVT_LEVEL_TRIGGER (1 << 15) > >> #define APIC_LVT_REMOTE_IRR (1 << 14) > > > > Please have a separate, introductory patch for definitions that are > > not KVM specific. > > > > OK, will present a separate patch. BTW, will the separate patch still be send to kvm@vger.kernel.org? Yes. > > >> +++ b/arch/x86/include/asm/kvm_host.h > >> @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu > >> *vcpu, gfn_t gfn); > >> > >> extern bool tdp_enabled; > >> > >> +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); > >> + > > > > No need for extern. > > > > Any special concern, or, for coding style? a little curious :) It is not necessary.
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 34595d5..3925d80 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -100,7 +100,9 @@ #define APIC_TIMER_BASE_CLKIN 0x0 #define APIC_TIMER_BASE_TMBASE 0x1 #define APIC_TIMER_BASE_DIV 0x2 +#define APIC_LVT_TIMER_ONESHOT (0 << 17) #define APIC_LVT_TIMER_PERIODIC (1 << 17) +#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17) #define APIC_LVT_MASKED (1 << 16) #define APIC_LVT_LEVEL_TRIGGER (1 << 15) #define APIC_LVT_REMOTE_IRR (1 << 14) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 4258aac..8a26e48 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -120,6 +120,7 @@ #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER (4*32+24) /* Tsc deadline timer */ #define X86_FEATURE_AES (4*32+25) /* AES instructions */ #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ @@ -284,6 +285,8 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1) #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) +#define cpu_has_tsc_deadline_timer \ + boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 307e3cf..2ce6529 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -671,6 +671,8 @@ u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); extern bool tdp_enabled; +extern u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); + /* control of guest tsc rate supported? */ extern bool kvm_has_tsc_control; /* minimum supported tsc_khz for guests */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index d52609a..a6962d9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -229,6 +229,8 @@ #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_BASE (0xfffff<<12) +#define MSR_IA32_TSCDEADLINE 0x000006e0 + #define MSR_IA32_UCODE_WRITE 0x00000079 #define MSR_IA32_UCODE_REV 0x0000008b diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h index 64bc6ea..497dbaa 100644 --- a/arch/x86/kvm/kvm_timer.h +++ b/arch/x86/kvm/kvm_timer.h @@ -2,6 +2,8 @@ struct kvm_timer { struct hrtimer timer; s64 period; /* unit: ns */ + u32 timer_mode_mask; + u64 tscdeadline; atomic_t pending; /* accumulated triggered timers */ bool reinject; struct kvm_timer_ops *t_ops; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 2b2255b..925d4b9 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -135,9 +135,23 @@ static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type) return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK; } +static inline int apic_lvtt_oneshot(struct kvm_lapic *apic) +{ + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT); +} + static inline int apic_lvtt_period(struct kvm_lapic *apic) { - return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC); +} + +static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic) +{ + return ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) == + APIC_LVT_TIMER_TSCDEADLINE); } static inline int apic_lvt_nmi_mode(u32 lvt_val) @@ -166,7 +180,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) } static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { - LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ + LVT_MASK , /* part LVTT mask, timer mode mask added at runtime */ LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ LVT_MASK | APIC_MODE_MASK, /* LVTPC */ LINT_MASK, LINT_MASK, /* LVT0-1 */ @@ -570,6 +584,9 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) break; case APIC_TMCCT: /* Timer CCR */ + if (apic_lvtt_tscdeadline(apic)) + return 0; + val = apic_get_tmcct(apic); break; @@ -664,29 +681,32 @@ static void update_divide_count(struct kvm_lapic *apic) static void start_apic_timer(struct kvm_lapic *apic) { - ktime_t now = apic->lapic_timer.timer.base->get_time(); - - apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) * - APIC_BUS_CYCLE_NS * apic->divide_count; + ktime_t now; atomic_set(&apic->lapic_timer.pending, 0); - if (!apic->lapic_timer.period) - return; - /* - * Do not allow the guest to program periodic timers with small - * interval, since the hrtimers are not throttled by the host - * scheduler. - */ - if (apic_lvtt_period(apic)) { - if (apic->lapic_timer.period < NSEC_PER_MSEC/2) - apic->lapic_timer.period = NSEC_PER_MSEC/2; - } + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + /* lapic timer in oneshot or peroidic mode */ + now = apic->lapic_timer.timer.base->get_time(); + apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT) + * APIC_BUS_CYCLE_NS * apic->divide_count; + + if (!apic->lapic_timer.period) + return; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (apic_lvtt_period(apic)) { + if (apic->lapic_timer.period < NSEC_PER_MSEC/2) + apic->lapic_timer.period = NSEC_PER_MSEC/2; + } - hrtimer_start(&apic->lapic_timer.timer, - ktime_add_ns(now, apic->lapic_timer.period), - HRTIMER_MODE_ABS); + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, apic->lapic_timer.period), + HRTIMER_MODE_ABS); - apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" + apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" PRIx64 ", " "timer initial count 0x%x, period %lldns, " "expire @ 0x%016" PRIx64 ".\n", __func__, @@ -695,6 +715,28 @@ static void start_apic_timer(struct kvm_lapic *apic) apic->lapic_timer.period, ktime_to_ns(ktime_add_ns(now, apic->lapic_timer.period))); + } else if (apic_lvtt_tscdeadline(apic)) { + /* lapic timer in tsc deadline mode */ + u64 guest_tsc, guest_tsc_delta, ns = 0; + struct kvm_vcpu *vcpu = apic->vcpu; + unsigned long this_tsc_khz = vcpu_tsc_khz(vcpu); + unsigned long flags; + + if (unlikely(!apic->lapic_timer.tscdeadline || !this_tsc_khz)) + return; + + local_irq_save(flags); + + now = apic->lapic_timer.timer.base->get_time(); + kvm_get_msr(vcpu, MSR_IA32_TSC, &guest_tsc); + guest_tsc_delta = apic->lapic_timer.tscdeadline - guest_tsc; + if (likely(guest_tsc_delta > 0)) + ns = guest_tsc_delta * 1000000UL / this_tsc_khz; + hrtimer_start(&apic->lapic_timer.timer, + ktime_add_ns(now, ns), HRTIMER_MODE_ABS); + + local_irq_restore(flags); + } } static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) @@ -782,7 +824,6 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) case APIC_LVT0: apic_manage_nmi_watchdog(apic, val); - case APIC_LVTT: case APIC_LVTTHMR: case APIC_LVTPC: case APIC_LVT1: @@ -796,7 +837,22 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) break; + case APIC_LVTT: + if ((apic_get_reg(apic, APIC_LVTT) & + apic->lapic_timer.timer_mode_mask) != + (val & apic->lapic_timer.timer_mode_mask)) + hrtimer_cancel(&apic->lapic_timer.timer); + + if (!apic_sw_enabled(apic)) + val |= APIC_LVT_MASKED; + val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask); + apic_set_reg(apic, APIC_LVTT, val); + break; + case APIC_TMICT: + if (apic_lvtt_tscdeadline(apic)) + break; + hrtimer_cancel(&apic->lapic_timer.timer); apic_set_reg(apic, APIC_TMICT, val); start_apic_timer(apic); @@ -883,6 +939,28 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) *---------------------------------------------------------------------- */ +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return 0; + + return apic->lapic_timer.tscdeadline; +} + +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data) +{ + struct kvm_lapic *apic = vcpu->arch.apic; + + if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic)) + return; + + hrtimer_cancel(&apic->lapic_timer.timer); + apic->lapic_timer.tscdeadline = data; + start_apic_timer(apic); +} + void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8) { struct kvm_lapic *apic = vcpu->arch.apic; diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 52c9e6b..10c6ee6 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -41,6 +41,9 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu); bool kvm_apic_present(struct kvm_vcpu *vcpu); int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); +u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu); +void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data); + void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu); void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6cb353c..a73c059 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -610,6 +610,16 @@ static void update_cpuid(struct kvm_vcpu *vcpu) if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE)) best->ecx |= bit(X86_FEATURE_OSXSAVE); } + + /* + * When cpu has tsc deadline timer capacibility, use bit 17/18 + * as timer mode mask. Otherwise only use bit 17. + */ + if (cpu_has_tsc_deadline_timer && best->function == 0x1) { + best->ecx |= bit(X86_FEATURE_TSC_DEADLINE_TIMER); + vcpu->arch.apic->lapic_timer.timer_mode_mask = (3 << 17); + } else + vcpu->arch.apic->lapic_timer.timer_mode_mask = (1 << 17); } int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) @@ -819,7 +829,7 @@ static u32 msrs_to_save[] = { #ifdef CONFIG_X86_64 MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif - MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA + MSR_IA32_TSC, MSR_IA32_TSCDEADLINE, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA }; static unsigned num_msrs_to_save; @@ -1000,7 +1010,7 @@ static inline int kvm_tsc_changes_freq(void) return ret; } -static u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) +u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu) { if (vcpu->arch.virtual_tsc_khz) return vcpu->arch.virtual_tsc_khz; @@ -1564,6 +1574,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) break; case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: return kvm_x2apic_msr_write(vcpu, msr, data); + case MSR_IA32_TSCDEADLINE: + kvm_set_lapic_tscdeadline_msr(vcpu, data); + break; case MSR_IA32_MISC_ENABLE: vcpu->arch.ia32_misc_enable_msr = data; break; @@ -1891,6 +1904,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: return kvm_x2apic_msr_read(vcpu, msr, pdata); break; + case MSR_IA32_TSCDEADLINE: + data = kvm_get_lapic_tscdeadline_msr(vcpu); + break; case MSR_IA32_MISC_ENABLE: data = vcpu->arch.ia32_misc_enable_msr; break;