From patchwork Thu Dec 10 16:38:31 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ashok Raj X-Patchwork-Id: 555194 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 52376140082 for ; Fri, 11 Dec 2015 02:39:27 +1100 (AEDT) Received: from localhost ([::1]:42557 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a73JV-00087y-B6 for incoming@patchwork.ozlabs.org; Thu, 10 Dec 2015 10:39:25 -0500 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55696) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a73Iv-0007Gv-NS for qemu-devel@nongnu.org; Thu, 10 Dec 2015 10:38:53 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1a73Is-0000X4-H1 for qemu-devel@nongnu.org; Thu, 10 Dec 2015 10:38:49 -0500 Received: from mga01.intel.com ([192.55.52.88]:3040) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1a73Is-0000Wt-77 for qemu-devel@nongnu.org; Thu, 10 Dec 2015 10:38:46 -0500 Received: from orsmga002.jf.intel.com ([10.7.209.21]) by fmsmga101.fm.intel.com with ESMTP; 10 Dec 2015 07:38:44 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.20,408,1444719600"; d="scan'208";a="868907763" Received: from otc-brkl-03.jf.intel.com ([10.54.39.10]) by orsmga002.jf.intel.com with ESMTP; 10 Dec 2015 07:38:44 -0800 From: Ashok Raj To: qemu-devel@nongnu.org Date: Thu, 10 Dec 2015 11:38:31 -0500 Message-Id: <1449765512-24796-1-git-send-email-ashok.raj@intel.com> X-Mailer: git-send-email 2.4.3 X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 192.55.52.88 Cc: Tony Luck , Ashok Raj , Gleb Natapov , Gong Chen , Andi Kleen , Paolo Bonzini , Boris Petkov Subject: [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This patch adds basic enumeration, control msr's required to support Local Machine Check Exception Support (LMCE). - Added Local Machine Check definitions, changed MCG_CAP - Added support for IA32_FEATURE_CONTROL. - When delivering MCE to guest, we deliver to just a single CPU when guest OS has opted in to Local delivery. Also tested: - Legacy guest that doesn't support recovery would panic. - Platform that supports recovery but no LMCE. KVM can emulate LMCE for guest. Platform MCE is broadcast, but we inject MCE just for one thread that encountered the fault. Reviewed-by: Andi Kleen Signed-off-by: Ashok Raj Tested-by: Gong Chen --- V2 patchset: Removed a line based on Paolo's suggestion. Gong verified the new patch works. target-i386/cpu.c | 8 ++++++++ target-i386/cpu.h | 8 ++++++-- target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++------- 3 files changed, 45 insertions(+), 9 deletions(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 11e5e39..167669a 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu) } } +static void feature_control_init(X86CPU *cpu) +{ + CPUX86State *cenv = &cpu->env; + + cenv->msr_ia32_feature_control = ((1<<20) | (1<<0)); +} + #ifndef CONFIG_USER_ONLY static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) { @@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #endif mce_init(cpu); + feature_control_init(cpu); #ifndef CONFIG_USER_ONLY if (tcg_enabled()) { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 84edfd0..a567d7a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -282,8 +282,9 @@ #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ +#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */ -#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P) +#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P) #define MCE_BANKS_DEF 10 #define MCG_CAP_BANKS_MASK 0xff @@ -291,6 +292,7 @@ #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ +#define MCG_STATUS_LMCE (1ULL<<3) /* Local MCE signaled */ #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ @@ -333,6 +335,7 @@ #define MSR_MCG_CAP 0x179 #define MSR_MCG_STATUS 0x17a #define MSR_MCG_CTL 0x17b +#define MSR_MCG_EXT_CTL 0x4d0 #define MSR_P6_EVNTSEL0 0x186 @@ -892,7 +895,6 @@ typedef struct CPUX86State { uint64_t mcg_status; uint64_t msr_ia32_misc_enable; - uint64_t msr_ia32_feature_control; uint64_t msr_fixed_ctr_ctrl; uint64_t msr_global_ctrl; @@ -977,8 +979,10 @@ typedef struct CPUX86State { int64_t tsc_khz; void *kvm_xsave_buf; + uint64_t msr_ia32_feature_control; uint64_t mcg_cap; uint64_t mcg_ctl; + uint64_t mcg_ext_ctl; uint64_t mce_banks[MCE_BANKS_DEF*4]; uint64_t tsc_aux; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 6dc9846..c61fe1f 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -72,6 +72,7 @@ static bool has_msr_tsc_aux; static bool has_msr_tsc_adjust; static bool has_msr_tsc_deadline; static bool has_msr_feature_control; +static bool has_msr_ext_mcg_ctl; static bool has_msr_async_pf_en; static bool has_msr_pv_eoi_en; static bool has_msr_misc_enable; @@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; uint64_t mcg_status = MCG_STATUS_MCIP; + int flags = 0; + CPUState *cs = CPU(cpu); + + /* + * We need to read back the value of MSR_EXT_MCG_CTL that was set by the + * guest kernel back into Qemu + */ + cpu_synchronize_state(cs); + + flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; if (code == BUS_MCEERR_AR) { - status |= MCI_STATUS_AR | 0x134; - mcg_status |= MCG_STATUS_EIPV; + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + if (env->mcg_ext_ctl & 0x1) { + mcg_status |= MCG_STATUS_LMCE; + flags = 0; /* No Broadcast when LMCE is opted by guest */ + } } else { status |= 0xc0; mcg_status |= MCG_STATUS_RIPV; } cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, - cpu_x86_support_mca_broadcast(env) ? - MCE_INJECT_BROADCAST : 0); + (MCM_ADDR_PHYS << 6) | 0xc, flags); } static void hardware_memory_error(void) @@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs) c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); if (c) { - has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || - !!(c->ecx & CPUID_EXT_SMX); + has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) || + !!(c->ecx & CPUID_EXT_SMX) || + !!(env->mcg_cap & MCG_LMCE_P)); } + if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P)) + has_msr_ext_mcg_ctl = true; + c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0); if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) { /* for migration */ @@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); + kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl); for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); } @@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu) if (has_msr_feature_control) { msrs[n++].index = MSR_IA32_FEATURE_CONTROL; } + if (has_msr_ext_mcg_ctl) { + msrs[n++].index = MSR_MCG_EXT_CTL; + } if (has_msr_bndcfgs) { msrs[n++].index = MSR_IA32_BNDCFGS; } @@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_IA32_FEATURE_CONTROL: env->msr_ia32_feature_control = msrs[i].data; break; + case MSR_MCG_EXT_CTL: + env->mcg_ext_ctl = msrs[i].data; + break; case MSR_IA32_BNDCFGS: env->msr_bndcfgs = msrs[i].data; break;