From patchwork Wed May 19 08:34:49 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Sheng Yang X-Patchwork-Id: 52965 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 6E0B6B7D48 for ; Wed, 19 May 2010 18:40:38 +1000 (EST) Received: from localhost ([127.0.0.1]:52995 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OEepN-0008Iv-TY for incoming@patchwork.ozlabs.org; Wed, 19 May 2010 04:40:33 -0400 Received: from [140.186.70.92] (port=45399 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1OEemk-0006oj-1c for qemu-devel@nongnu.org; Wed, 19 May 2010 04:37:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1OEemg-0003xN-ST for qemu-devel@nongnu.org; Wed, 19 May 2010 04:37:49 -0400 Received: from mga02.intel.com ([134.134.136.20]:43313) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1OEemg-0003wM-B3 for qemu-devel@nongnu.org; Wed, 19 May 2010 04:37:46 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga101.jf.intel.com with ESMTP; 19 May 2010 01:34:56 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="4.53,262,1272870000"; d="scan'208";a="622921546" Received: from syang10-desktop.sh.intel.com (HELO syang10-desktop) ([10.239.36.64]) by orsmga001.jf.intel.com with ESMTP; 19 May 2010 01:37:22 -0700 Received: from yasker by syang10-desktop with local (Exim 4.71) (envelope-from ) id 1OEeju-0003C0-7R; Wed, 19 May 2010 16:34:54 +0800 From: Sheng Yang To: Avi Kivity , Marcelo Tosatti Date: Wed, 19 May 2010 16:34:49 +0800 Message-Id: <1274258090-12247-1-git-send-email-sheng@linux.intel.com> X-Mailer: git-send-email 1.7.0.4 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. Cc: Sheng Yang , qemu-devel@nongnu.org, kvm@vger.kernel.org, Dexuan Cui Subject: [Qemu-devel] [PATCH v2] KVM: VMX: Enable XSAVE/XRSTORE for guest X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Dexuan Cui Enable XSAVE/XRSTORE for guest. Change from V1: 1. Use FPU API. 2. Fix CPUID issue. 3. Save/restore all possible guest xstate fields when switching. Because we don't know which fields guest has already touched. Signed-off-by: Dexuan Cui Signed-off-by: Sheng Yang --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 28 +++++++++++++ arch/x86/kvm/x86.c | 85 +++++++++++++++++++++++++++++++++++--- 4 files changed, 108 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d08bb4a..78d7b06 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -302,6 +302,7 @@ struct kvm_vcpu_arch { } update_pte; struct fpu guest_fpu; + uint64_t xcr0, host_xcr0; gva_t mmio_fault_cr2; struct kvm_pio_request pio; diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 9e6779f..346ea66 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -266,6 +266,7 @@ enum vmcs_field { #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 /* * Interruption-information format diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 99ae513..2ee8ff6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "trace.h" @@ -2616,6 +2618,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS; if (enable_ept) vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE; + if (cpu_has_xsave) + vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_OSXSAVE; vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits); tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc; @@ -3354,6 +3358,29 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu) return 1; } +static int handle_xsetbv(struct kvm_vcpu *vcpu) +{ + u64 new_bv = ((u64)(kvm_register_read(vcpu, VCPU_REGS_RDX) << 32)) | + kvm_register_read(vcpu, VCPU_REGS_RAX); + + if (kvm_register_read(vcpu, VCPU_REGS_RCX) != 0) + goto err; + if (vmx_get_cpl(vcpu) != 0) + goto err; + if (!(new_bv & XSTATE_FP) || + (new_bv & ~vcpu->arch.host_xcr0)) + goto err; + if ((new_bv & XSTATE_YMM) && !(new_bv & XSTATE_SSE)) + goto err; + vcpu->arch.xcr0 = new_bv; + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); + skip_emulated_instruction(vcpu); + return 1; +err: + kvm_inject_gp(vcpu, 0); + return 1; +} + static int handle_apic_access(struct kvm_vcpu *vcpu) { return emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE; @@ -3632,6 +3659,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold, [EXIT_REASON_APIC_ACCESS] = handle_apic_access, [EXIT_REASON_WBINVD] = handle_wbinvd, + [EXIT_REASON_XSETBV] = handle_xsetbv, [EXIT_REASON_TASK_SWITCH] = handle_task_switch, [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7be1d36..5e20f37 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -64,6 +64,7 @@ (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR \ + | X86_CR4_OSXSAVE \ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE)) #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) @@ -149,6 +150,11 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { NULL } }; +static inline u32 bit(int bitno) +{ + return 1 << (bitno & 31); +} + static void kvm_on_user_return(struct user_return_notifier *urn) { unsigned slot; @@ -473,6 +479,17 @@ void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw) } EXPORT_SYMBOL_GPL(kvm_lmsw); +static bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 1, 0); + if (best->ecx & bit(X86_FEATURE_XSAVE)) + return true; + + return false; +} + int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); @@ -481,6 +498,9 @@ int __kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & CR4_RESERVED_BITS) return 1; + if (!guest_cpuid_has_xsave(vcpu) && X86_CR4_OSXSAVE) + return 1; + if (is_long_mode(vcpu)) { if (!(cr4 & X86_CR4_PAE)) return 1; @@ -665,11 +685,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) } EXPORT_SYMBOL_GPL(kvm_get_dr); -static inline u32 bit(int bitno) -{ - return 1 << (bitno & 31); -} - /* * List of msr numbers which we expose to userspace through KVM_GET_MSRS * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. @@ -1887,6 +1902,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, unsigned f_lm = 0; #endif unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; + unsigned f_xsave = cpu_has_xsave ? F(XSAVE) : 0; /* cpuid 1.edx */ const u32 kvm_supported_word0_x86_features = @@ -1916,7 +1932,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved, XSAVE, OSXSAVE */; + 0 /* Reserved, AES */ | f_xsave | 0 /* OSXSAVE */; /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | @@ -1931,7 +1947,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, switch (function) { case 0: - entry->eax = min(entry->eax, (u32)0xb); + entry->eax = min(entry->eax, (u32)0xd); break; case 1: entry->edx &= kvm_supported_word0_x86_features; @@ -1989,6 +2005,20 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, } break; } + case 0xd: { + int i; + + entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + for (i = 1; *nent < maxnent; ++i) { + if (entry[i - 1].eax == 0 && i != 2) + break; + do_cpuid_1_ent(&entry[i], function, i); + entry[i].flags |= + KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + ++*nent; + } + break; + } case KVM_CPUID_SIGNATURE: { char signature[12] = "KVMKVMKVM\0\0"; u32 *sigptr = (u32 *)signature; @@ -4376,6 +4406,17 @@ not_found: return 36; } +static void kvm_update_cpuid(struct kvm_vcpu *vcpu, + struct kvm_cpuid_entry2 *best) +{ + /* Update OSXSAVE bit */ + if (cpu_has_xsave && best->function == 0x1) { + best->ecx &= ~(bit(X86_FEATURE_OSXSAVE)); + if (kvm_read_cr4(vcpu) & X86_CR4_OSXSAVE) + best->ecx |= bit(X86_FEATURE_OSXSAVE); + } +} + void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) { u32 function, index; @@ -4389,6 +4430,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) kvm_register_write(vcpu, VCPU_REGS_RDX, 0); best = kvm_find_cpuid_entry(vcpu, function, index); if (best) { + kvm_update_cpuid(vcpu, best); kvm_register_write(vcpu, VCPU_REGS_RAX, best->eax); kvm_register_write(vcpu, VCPU_REGS_RBX, best->ebx); kvm_register_write(vcpu, VCPU_REGS_RCX, best->ecx); @@ -5118,6 +5160,11 @@ void fx_init(struct kvm_vcpu *vcpu) fpu_alloc(&vcpu->arch.guest_fpu); fpu_finit(&vcpu->arch.guest_fpu); + if (cpu_has_xsave) { + vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + vcpu->arch.xcr0 = vcpu->arch.host_xcr0; + } + vcpu->arch.cr0 |= X86_CR0_ET; } EXPORT_SYMBOL_GPL(fx_init); @@ -5127,14 +5174,30 @@ static void fx_free(struct kvm_vcpu *vcpu) fpu_free(&vcpu->arch.guest_fpu); } +static u64 cpuid_get_possible_xcr0(struct kvm_vcpu *vcpu) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + return eax + ((u64)edx << 32); +} + void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { if (vcpu->guest_fpu_loaded) return; vcpu->guest_fpu_loaded = 1; + if (cpu_has_xsave) + vcpu->arch.host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); unlazy_fpu(current); + /* Restore all possible states in the guest */ + if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, + cpuid_get_possible_xcr0(vcpu)); fpu_restore_checking(&vcpu->arch.guest_fpu); + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0); trace_kvm_fpu(1); } @@ -5144,7 +5207,15 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; + if (cpu_has_xsave) + vcpu->arch.xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); + /* Save all possible states in the guest */ + if (cpu_has_xsave && guest_cpuid_has_xsave(vcpu)) + xsetbv(XCR_XFEATURE_ENABLED_MASK, + cpuid_get_possible_xcr0(vcpu)); fpu_save_init(&vcpu->arch.guest_fpu); + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.host_xcr0); ++vcpu->stat.fpu_reload; set_bit(KVM_REQ_DEACTIVATE_FPU, &vcpu->requests); trace_kvm_fpu(0);