From patchwork Tue Feb 15 08:23:33 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jan Kiszka X-Patchwork-Id: 83211 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) by ozlabs.org (Postfix) with ESMTP id 43EACB70FC for ; Tue, 15 Feb 2011 19:37:57 +1100 (EST) Received: from localhost ([127.0.0.1]:35238 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PpGL4-0000QU-8q for incoming@patchwork.ozlabs.org; Tue, 15 Feb 2011 03:32:50 -0500 Received: from [140.186.70.92] (port=54644 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PpGCN-00057A-Jg for qemu-devel@nongnu.org; Tue, 15 Feb 2011 03:27:59 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PpGCI-0004Hh-Ib for qemu-devel@nongnu.org; Tue, 15 Feb 2011 03:23:48 -0500 Received: from david.siemens.de ([192.35.17.14]:26590) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PpGCH-0004Gd-RO for qemu-devel@nongnu.org; Tue, 15 Feb 2011 03:23:46 -0500 Received: from mail1.siemens.de (localhost [127.0.0.1]) by david.siemens.de (8.13.6/8.13.6) with ESMTP id p1F8Ndv6023997; Tue, 15 Feb 2011 09:23:39 +0100 Received: from mchn199C.mchp.siemens.de ([139.25.246.60]) by mail1.siemens.de (8.13.6/8.13.6) with ESMTP id p1F8Nb0t027686; Tue, 15 Feb 2011 09:23:39 +0100 From: Jan Kiszka To: Avi Kivity , Marcelo Tosatti Date: Tue, 15 Feb 2011 09:23:33 +0100 Message-Id: <67f48a7cb1aebebb1dc4af0f413379686dcff24b.1297758211.git.jan.kiszka@siemens.com> X-Mailer: git-send-email 1.7.1 In-Reply-To: References: In-Reply-To: References: X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6, seldom 2.4 (older, 4) X-Received-From: 192.35.17.14 Cc: Hidetoshi Seto , Jin Dongming , qemu-devel@nongnu.org, kvm@vger.kernel.org, Huang Ying Subject: [Qemu-devel] [PATCH 09/13] kvm: x86: Consolidate TCG and KVM MCE injection code X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This switches KVM's MCE injection path to cpu_x86_inject_mce, both for SIGBUS and monitor initiated events. This means we prepare the MCA MSRs in the VCPUState also for KVM. We have to drop the MSRs writeback restrictions for this purpose which is now safe as every uncoordinated MSR injection is removed with this patch. We have to execute every per-VCPU event setup on the target VCPU as we are performing a read-modify-write on its state. Signed-off-by: Jan Kiszka CC: Huang Ying CC: Hidetoshi Seto CC: Jin Dongming --- target-i386/helper.c | 105 ++++++++++++-------- target-i386/kvm.c | 256 +++++++------------------------------------------ target-i386/kvm_x86.h | 25 ----- 3 files changed, 96 insertions(+), 290 deletions(-) delete mode 100644 target-i386/kvm_x86.h diff --git a/target-i386/helper.c b/target-i386/helper.c index e3ef40c..a08309f 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -27,7 +27,6 @@ #include "exec-all.h" #include "qemu-common.h" #include "kvm.h" -#include "kvm_x86.h" #ifndef CONFIG_USER_ONLY #include "sysemu.h" #include "monitor.h" @@ -1067,29 +1066,42 @@ static void breakpoint_handler(CPUState *env) prev_debug_excp_handler(env); } -static void -qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc, - int flags) +typedef struct MCEInjectionParams { + Monitor *mon; + CPUState *env; + int bank; + uint64_t status; + uint64_t mcg_status; + uint64_t addr; + uint64_t misc; + int flags; +} MCEInjectionParams; + +static void do_inject_x86_mce(void *data) { - uint64_t mcg_cap = cenv->mcg_cap; - uint64_t *banks = cenv->mce_banks + 4 * bank; + MCEInjectionParams *params = data; + CPUState *cenv = params->env; + uint64_t *banks = cenv->mce_banks + 4 * params->bank; + + cpu_synchronize_state(cenv); /* * If there is an MCE exception being processed, ignore this SRAO MCE * unless unconditional injection was requested. */ - if (!(flags & MCE_INJECT_UNCOND_AO) && !(status & MCI_STATUS_AR) + if (!(params->flags & MCE_INJECT_UNCOND_AO) + && !(params->status & MCI_STATUS_AR) && (cenv->mcg_status & MCG_STATUS_MCIP)) { return; } - if (status & MCI_STATUS_UC) { + + if (params->status & MCI_STATUS_UC) { /* * if MSR_MCG_CTL is not all 1s, the uncorrected error * reporting is disabled */ - if ((mcg_cap & MCG_CTL_P) && cenv->mcg_ctl != ~(uint64_t)0) { - monitor_printf(mon, + if ((cenv->mcg_cap & MCG_CTL_P) && cenv->mcg_ctl != ~(uint64_t)0) { + monitor_printf(params->mon, "CPU %d: Uncorrected error reporting disabled\n", cenv->cpu_index); return; @@ -1100,35 +1112,39 @@ qemu_inject_x86_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, * reporting is disabled for the bank */ if (banks[0] != ~(uint64_t)0) { - monitor_printf(mon, "CPU %d: Uncorrected error reporting disabled " - "for bank %d\n", cenv->cpu_index, bank); + monitor_printf(params->mon, + "CPU %d: Uncorrected error reporting disabled for" + " bank %d\n", + cenv->cpu_index, params->bank); return; } if ((cenv->mcg_status & MCG_STATUS_MCIP) || !(cenv->cr[4] & CR4_MCE_MASK)) { - monitor_printf(mon, "CPU %d: Previous MCE still in progress, " - "raising triple fault\n", cenv->cpu_index); + monitor_printf(params->mon, + "CPU %d: Previous MCE still in progress, raising" + " triple fault\n", + cenv->cpu_index); qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); qemu_system_reset_request(); return; } if (banks[1] & MCI_STATUS_VAL) { - status |= MCI_STATUS_OVER; + params->status |= MCI_STATUS_OVER; } - banks[2] = addr; - banks[3] = misc; - cenv->mcg_status = mcg_status; - banks[1] = status; + banks[2] = params->addr; + banks[3] = params->misc; + cenv->mcg_status = params->mcg_status; + banks[1] = params->status; cpu_interrupt(cenv, CPU_INTERRUPT_MCE); } else if (!(banks[1] & MCI_STATUS_VAL) || !(banks[1] & MCI_STATUS_UC)) { if (banks[1] & MCI_STATUS_VAL) { - status |= MCI_STATUS_OVER; + params->status |= MCI_STATUS_OVER; } - banks[2] = addr; - banks[3] = misc; - banks[1] = status; + banks[2] = params->addr; + banks[3] = params->misc; + banks[1] = params->status; } else { banks[1] |= MCI_STATUS_OVER; } @@ -1138,9 +1154,18 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags) { + MCEInjectionParams params = { + .mon = mon, + .env = cenv, + .bank = bank, + .status = status, + .mcg_status = mcg_status, + .addr = addr, + .misc = misc, + .flags = flags, + }; unsigned bank_num = cenv->mcg_cap & 0xff; CPUState *env; - int flag = 0; if (!cenv->mcg_cap) { monitor_printf(mon, "MCE injection not supported\n"); @@ -1160,25 +1185,19 @@ void cpu_x86_inject_mce(Monitor *mon, CPUState *cenv, int bank, return; } - if (kvm_enabled()) { - if (flags & MCE_INJECT_BROADCAST) { - flag |= MCE_BROADCAST; - } - - kvm_inject_x86_mce(cenv, bank, status, mcg_status, addr, misc, flag); - } else { - qemu_inject_x86_mce(mon, cenv, bank, status, mcg_status, addr, misc, - flags); - if (flags & MCE_INJECT_BROADCAST) { - for (env = first_cpu; env != NULL; env = env->next_cpu) { - if (cenv == env) { - continue; - } - qemu_inject_x86_mce(mon, env, 1, - MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, - flags); + run_on_cpu(cenv, do_inject_x86_mce, ¶ms); + if (flags & MCE_INJECT_BROADCAST) { + params.bank = 1; + params.status = MCI_STATUS_VAL | MCI_STATUS_UC; + params.mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; + params.addr = 0; + params.misc = 0; + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (cenv == env) { + continue; } + params.env = env; + run_on_cpu(cenv, do_inject_x86_mce, ¶ms); } } } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 46f45db..8be093b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -28,7 +28,6 @@ #include "hw/pc.h" #include "hw/apic.h" #include "ioport.h" -#include "kvm_x86.h" #ifdef CONFIG_KVM_PARA #include @@ -193,164 +192,23 @@ static int kvm_setup_mce(CPUState *env, uint64_t *mcg_cap) return kvm_vcpu_ioctl(env, KVM_X86_SETUP_MCE, mcg_cap); } -static int kvm_set_mce(CPUState *env, struct kvm_x86_mce *m) +static void kvm_mce_inject(CPUState *env, target_phys_addr_t paddr, int code) { - return kvm_vcpu_ioctl(env, KVM_X86_SET_MCE, m); -} - -static int kvm_get_msr(CPUState *env, struct kvm_msr_entry *msrs, int n) -{ - struct kvm_msrs *kmsrs = qemu_malloc(sizeof *kmsrs + n * sizeof *msrs); - int r; - - kmsrs->nmsrs = n; - memcpy(kmsrs->entries, msrs, n * sizeof *msrs); - r = kvm_vcpu_ioctl(env, KVM_GET_MSRS, kmsrs); - memcpy(msrs, kmsrs->entries, n * sizeof *msrs); - free(kmsrs); - return r; -} - -/* FIXME: kill this and kvm_get_msr, use env->mcg_status instead */ -static int kvm_mce_in_progress(CPUState *env) -{ - struct kvm_msr_entry msr_mcg_status = { - .index = MSR_MCG_STATUS, - }; - int r; - - r = kvm_get_msr(env, &msr_mcg_status, 1); - if (r == -1 || r == 0) { - fprintf(stderr, "Failed to get MCE status\n"); - return 0; - } - return !!(msr_mcg_status.data & MCG_STATUS_MCIP); -} - -struct kvm_x86_mce_data -{ - CPUState *env; - struct kvm_x86_mce *mce; - int abort_on_error; -}; - -static void kvm_do_inject_x86_mce(void *_data) -{ - struct kvm_x86_mce_data *data = _data; - int r; - - /* If there is an MCE exception being processed, ignore this SRAO MCE */ - if ((data->env->mcg_cap & MCG_SER_P) && - !(data->mce->status & MCI_STATUS_AR)) { - if (kvm_mce_in_progress(data->env)) { - return; - } - } - - r = kvm_set_mce(data->env, data->mce); - if (r < 0) { - perror("kvm_set_mce FAILED"); - if (data->abort_on_error) { - abort(); - } - } -} - -static void kvm_inject_x86_mce_on(CPUState *env, struct kvm_x86_mce *mce, - int flag) -{ - struct kvm_x86_mce_data data = { - .env = env, - .mce = mce, - .abort_on_error = (flag & ABORT_ON_ERROR), - }; - - if (!env->mcg_cap) { - fprintf(stderr, "MCE support is not enabled!\n"); - return; - } - - run_on_cpu(env, kvm_do_inject_x86_mce, &data); -} + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; + uint64_t mcg_status = MCG_STATUS_MCIP; -static void kvm_mce_broadcast_rest(CPUState *env) -{ - struct kvm_x86_mce mce = { - .bank = 1, - .status = MCI_STATUS_VAL | MCI_STATUS_UC, - .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, - .addr = 0, - .misc = 0, - }; - CPUState *cenv; - - /* Broadcast MCA signal for processor version 06H_EH and above */ - if (cpu_x86_support_mca_broadcast(env)) { - for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { - if (cenv == env) { - continue; - } - kvm_inject_x86_mce_on(cenv, &mce, ABORT_ON_ERROR); - } - } -} - -static void kvm_mce_inj_srar_dataload(CPUState *env, target_phys_addr_t paddr) -{ - struct kvm_x86_mce mce = { - .bank = 9, - .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | MCI_STATUS_AR | 0x134, - .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV, - .addr = paddr, - .misc = (MCM_ADDR_PHYS << 6) | 0xc, - }; - int r; - - r = kvm_set_mce(env, &mce); - if (r < 0) { - fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); - abort(); - } - kvm_mce_broadcast_rest(env); -} - -static void kvm_mce_inj_srao_memscrub(CPUState *env, target_phys_addr_t paddr) -{ - struct kvm_x86_mce mce = { - .bank = 9, - .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0, - .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, - .addr = paddr, - .misc = (MCM_ADDR_PHYS << 6) | 0xc, - }; - int r; - - r = kvm_set_mce(env, &mce); - if (r < 0) { - fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); - abort(); + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + } else { + status |= 0xc0; + mcg_status |= MCG_STATUS_RIPV; } - kvm_mce_broadcast_rest(env); -} - -static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) -{ - struct kvm_x86_mce mce = { - .bank = 9, - .status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN - | MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S - | 0xc0, - .mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV, - .addr = paddr, - .misc = (MCM_ADDR_PHYS << 6) | 0xc, - }; - - kvm_inject_x86_mce_on(env, &mce, ABORT_ON_ERROR); - kvm_mce_broadcast_rest(env); + cpu_x86_inject_mce(NULL, env, 9, status, mcg_status, paddr, + (MCM_ADDR_PHYS << 6) | 0xc, + cpu_x86_support_mca_broadcast(env) ? + MCE_INJECT_BROADCAST : 0); } #endif /* KVM_CAP_MCE */ @@ -360,19 +218,17 @@ static void hardware_memory_error(void) exit(1); } -int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *hvaddr) { #ifdef KVM_CAP_MCE - void *vaddr; ram_addr_t ram_addr; - target_phys_addr_t paddr; - - if ((env->mcg_cap & MCG_SER_P) && addr - && (code == BUS_MCEERR_AR - || code == BUS_MCEERR_AO)) { - vaddr = (void *)addr; - if (qemu_ram_addr_from_host(vaddr, &ram_addr) || - !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, &paddr)) { + target_phys_addr_t gpaddr; + + if ((env->mcg_cap & MCG_SER_P) && hvaddr + && (code == BUS_MCEERR_AR || code == BUS_MCEERR_AO)) { + if (qemu_ram_addr_from_host(hvaddr, &ram_addr) || + !kvm_physical_memory_addr_from_ram(env->kvm_state, ram_addr, + &gpaddr)) { fprintf(stderr, "Hardware memory error for memory used by " "QEMU itself instead of guest system!\n"); /* Hope we are lucky for AO MCE */ @@ -382,20 +238,7 @@ int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) hardware_memory_error(); } } - - if (code == BUS_MCEERR_AR) { - /* Fake an Intel architectural Data Load SRAR UCR */ - kvm_mce_inj_srar_dataload(env, paddr); - } else { - /* - * If there is an MCE excpetion being processed, ignore - * this SRAO MCE - */ - if (!kvm_mce_in_progress(env)) { - /* Fake an Intel architectural Memory scrubbing UCR */ - kvm_mce_inj_srao_memscrub(env, paddr); - } - } + kvm_mce_inject(env, gpaddr, code); } else #endif /* KVM_CAP_MCE */ { @@ -410,24 +253,22 @@ int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) return 0; } -int kvm_arch_on_sigbus(int code, void *addr) +int kvm_arch_on_sigbus(int code, void *hvaddr) { #ifdef KVM_CAP_MCE - if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { - void *vaddr; + if ((first_cpu->mcg_cap & MCG_SER_P) && hvaddr && code == BUS_MCEERR_AO) { ram_addr_t ram_addr; - target_phys_addr_t paddr; + target_phys_addr_t gpaddr; /* Hope we are lucky for AO MCE */ - vaddr = addr; - if (qemu_ram_addr_from_host(vaddr, &ram_addr) || + if (qemu_ram_addr_from_host(hvaddr, &ram_addr) || !kvm_physical_memory_addr_from_ram(first_cpu->kvm_state, ram_addr, - &paddr)) { + &gpaddr)) { fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!: %p\n", addr); + "QEMU itself instead of guest system!: %p\n", hvaddr); return 0; } - kvm_mce_inj_srao_memscrub2(first_cpu, paddr); + kvm_mce_inject(first_cpu, gpaddr, code); } else #endif /* KVM_CAP_MCE */ { @@ -442,31 +283,6 @@ int kvm_arch_on_sigbus(int code, void *addr) return 0; } -void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc, - int flag) -{ -#ifdef KVM_CAP_MCE - struct kvm_x86_mce mce = { - .bank = bank, - .status = status, - .mcg_status = mcg_status, - .addr = addr, - .misc = misc, - }; - - if (flag & MCE_BROADCAST) { - kvm_mce_broadcast_rest(cenv); - } - - kvm_inject_x86_mce_on(cenv, &mce, flag); -#else /* !KVM_CAP_MCE*/ - if (flag & ABORT_ON_ERROR) { - abort(); - } -#endif /* !KVM_CAP_MCE*/ -} - static int kvm_inject_mce_oldstyle(CPUState *env) { #ifdef KVM_CAP_MCE @@ -1059,14 +875,10 @@ static int kvm_put_msrs(CPUState *env, int level) if (env->mcg_cap) { int i; - if (level == KVM_PUT_RESET_STATE) { - kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); - } else if (level == KVM_PUT_FULL_STATE) { - kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); - kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { - kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); - } + kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); + kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl); + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { + kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]); } } #endif diff --git a/target-i386/kvm_x86.h b/target-i386/kvm_x86.h deleted file mode 100644 index 9d7b584..0000000 --- a/target-i386/kvm_x86.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * QEMU KVM support - * - * Copyright (C) 2009 Red Hat Inc. - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef __KVM_X86_H__ -#define __KVM_X86_H__ - -#define ABORT_ON_ERROR 0x01 -#define MCE_BROADCAST 0x02 - -void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, - uint64_t mcg_status, uint64_t addr, uint64_t misc, - int flag); - -#endif