From patchwork Mon Nov 1 15:01:27 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Graf X-Patchwork-Id: 69782 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id D17BFB70AF for ; Tue, 2 Nov 2010 02:22:38 +1100 (EST) Received: from localhost ([127.0.0.1]:55890 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCwCo-00079e-H2 for incoming@patchwork.ozlabs.org; Mon, 01 Nov 2010 11:21:54 -0400 Received: from [140.186.70.92] (port=52899 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCvts-0003kP-P0 for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:24 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PCvtW-0000gN-Mj for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:04 -0400 Received: from cantor.suse.de ([195.135.220.2]:60262 helo=mx1.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PCvtW-0000f0-8x for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:01:58 -0400 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.221.2]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) by mx1.suse.de (Postfix) with ESMTP id 5E810947B6; Mon, 1 Nov 2010 16:01:54 +0100 (CET) From: Alexander Graf To: qemu-devel Developers Date: Mon, 1 Nov 2010 16:01:27 +0100 Message-Id: <1288623713-28062-15-git-send-email-agraf@suse.de> X-Mailer: git-send-email 1.6.0.2 In-Reply-To: <1288623713-28062-1-git-send-email-agraf@suse.de> References: <1288623713-28062-1-git-send-email-agraf@suse.de> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.4-2.6 Cc: Gerd Hoffmann Subject: [Qemu-devel] [PATCH 14/40] xenner: kernel: Instruction emulator X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org In some cases we need to emulate guest instructions. This patch adds code to take care of this. Signed-off-by: Alexander Graf --- pc-bios/xenner/xenner-instr.c | 405 +++++++++++++++++++++++++++++++++++++++++ 1 files changed, 405 insertions(+), 0 deletions(-) create mode 100644 pc-bios/xenner/xenner-instr.c diff --git a/pc-bios/xenner/xenner-instr.c b/pc-bios/xenner/xenner-instr.c new file mode 100644 index 0000000..11be2ce --- /dev/null +++ b/pc-bios/xenner/xenner-instr.c @@ -0,0 +1,405 @@ +/* + * Copyright (C) Red Hat 2007 + * Copyright (C) Novell Inc. 2010 + * + * Author(s): Gerd Hoffmann + * Alexander Graf + * + * Xenner instruction emulator + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "xenner.h" +#include "msr-index.h" +#include "cpufeature.h" + +void real_cpuid(struct kvm_cpuid_entry *entry) +{ + asm volatile("cpuid" + : "=a" (entry->eax), + "=b" (entry->ebx), + "=c" (entry->ecx), + "=d" (entry->edx) + : "a" (entry->function)); +} + +static unsigned long clear_cpuid_bit(unsigned long bit, unsigned long x) +{ + unsigned long r = x; + + bit %= 64; + r = x & ~(1 << bit); + + return r; +} + +static void filter_cpuid(struct kvm_cpuid_entry *entry) +{ + switch (entry->function) { + case 0x00000001: + entry->edx = clear_cpuid_bit(X86_FEATURE_SEP, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_DS, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_DS, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_ACC, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_PBE, entry->edx); + + entry->ecx = clear_cpuid_bit(X86_FEATURE_DTES64, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_MWAIT, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_DSCPL, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_VMXE, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_SMXE, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_EST, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_TM2, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_XTPR, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_PDCM, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_DCA, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_XSAVE, entry->ecx); + /* fall through */ + case 0x80000001: + entry->edx = clear_cpuid_bit(X86_FEATURE_VME, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_PSE, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_PGE, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_MCE, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_MCA, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_MTRR, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_PSE36, entry->edx); + +#ifdef CONFIG_32BIT + entry->edx = clear_cpuid_bit(X86_FEATURE_LM, entry->edx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_LAHF_LM, entry->ecx); +#endif + entry->edx = clear_cpuid_bit(X86_FEATURE_PAGE1GB, entry->edx); + entry->edx = clear_cpuid_bit(X86_FEATURE_RDTSCP, entry->edx); + + entry->ecx = clear_cpuid_bit(X86_FEATURE_SVME, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_OSVW, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_IBS, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_SKINIT, entry->ecx); + entry->ecx = clear_cpuid_bit(X86_FEATURE_WDT, entry->ecx); + break; + + case 0x00000005: /* MONITOR/MWAIT */ + case 0x0000000a: /* Architectural Performance Monitor Features */ + case 0x8000000a: /* SVM revision and features */ + case 0x8000001b: /* Instruction Based Sampling */ + entry->eax = 0; + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + break; + } +} + +static void emulate_cpuid(struct regs *regs) +{ + struct kvm_cpuid_entry entry; + + entry.function = regs->rax; + real_cpuid(&entry); + filter_cpuid(&entry); + regs->rax = entry.eax; + regs->rbx = entry.ebx; + regs->rcx = entry.ecx; + regs->rdx = entry.edx; + printk(2, "cpuid 0x%08x: eax 0x%08x ebx 0x%08x ecx 0x%08x edx 0x%08x\n", + entry.function, entry.eax, entry.ebx, entry.ecx, entry.edx); +} + +static void emulate_rdmsr(struct regs *regs) +{ + uint32_t ax,dx; + switch (regs->rcx) { + case MSR_EFER: + case MSR_FS_BASE: + case MSR_GS_BASE: + case MSR_KERNEL_GS_BASE: + /* white listed */ + rdmsr(regs->rcx, &ax, &dx); + regs->rax = ax; + regs->rdx = dx; + break; + default: + printk(1, "%s: ignore: rcx 0x%" PRIxREG "\n", __FUNCTION__, regs->rcx); + regs->rax = 0; + regs->rdx = 0; + break; + } +} + +static void emulate_wrmsr(struct regs *regs) +{ + static const uint64_t known = (EFER_NX|EFER_LMA|EFER_LME|EFER_SCE); + static const uint64_t fixed = (EFER_LMA|EFER_LME|EFER_SCE); + uint32_t ax,dx; + + switch (regs->rcx) { + case MSR_EFER: + if (regs->rax & ~known) { + printk(1, "%s: efer: unknown bit set\n", __FUNCTION__); + goto out; + } + + rdmsr(regs->rcx, &ax, &dx); + if ((regs->rax & fixed) != (ax & fixed)) { + printk(1, "%s: efer: modify fixed bit\n", __FUNCTION__); + goto out; + } + + printk(1, "%s: efer:%s%s%s%s\n", __FUNCTION__, + regs->rax & EFER_SCE ? " sce" : "", + regs->rax & EFER_LME ? " lme" : "", + regs->rax & EFER_LMA ? " lma" : "", + regs->rax & EFER_NX ? " nx" : ""); + /* fall through */ + case MSR_FS_BASE: + case MSR_GS_BASE: + case MSR_KERNEL_GS_BASE: + wrmsr(regs->rcx, regs->rax, regs->rdx); + return; + } + +out: + printk(1, "%s: ignore: 0x%" PRIxREG " 0x%" PRIxREG ":0x%" PRIxREG "\n", + __FUNCTION__, regs->rcx, regs->rdx, regs->rax); +} + +void print_emu_instr(int level, const char *prefix, uint8_t *instr) +{ + printk(level, "%s: rip %p bytes %02x %02x %02x %02x %02x %02x %02x %02x\n", + prefix, instr, + instr[0], instr[1], instr[2], instr[3], + instr[4], instr[5], instr[6], instr[7]); +} + +static ureg_t *decode_reg(struct regs *regs, uint8_t modrm, int rm) +{ + int shift = rm ? 0 : 3; + ureg_t *reg = NULL; + + switch ((modrm >> shift) & 0x07) { + case 0: reg = (ureg_t*)®s->rax; break; + case 1: reg = (ureg_t*)®s->rcx; break; + case 2: reg = (ureg_t*)®s->rdx; break; + case 3: reg = (ureg_t*)®s->rbx; break; + case 4: reg = (ureg_t*)®s->rsp; break; + case 5: reg = (ureg_t*)®s->rbp; break; + case 6: reg = (ureg_t*)®s->rsi; break; + case 7: reg = (ureg_t*)®s->rdi; break; + } + return reg; +} + +void print_bits(int level, const char *msg, uint32_t old, uint32_t new, + const char *names[]) +{ + char buf[128]; + int pos = 0; + uint32_t mask; + char *mod; + int i; + + pos += snprintf(buf+pos, sizeof(buf)-pos, "%s:", msg); + for (i = 0; i < 32; i++) { + mask = 1 << i; + if (new&mask) { + if (old&mask) { + /* bit present */ + mod = ""; + } else { + /* bit added */ + mod = "+"; + } + } else { + if (old&mask) { + /* bit removed */ + mod = "-"; + } else { + /* bit not present */ + continue; + } + } + pos += snprintf(buf+pos, sizeof(buf)-pos, " %s%s", + mod, names[i] ? names[i] : "???"); + } + pos += snprintf(buf+pos, sizeof(buf)-pos, "\n"); + printk(level, "%s", buf); +} + +int emulate(struct xen_cpu *cpu, struct regs *regs) +{ + static const uint8_t xen_emu_prefix[5] = {0x0f, 0x0b, 'x','e','n'}; + uint8_t *instr; + int skip = 0; + int in = 0; + int shift = 0; + int port = 0; + +restart: + instr = (void*)regs->rip; + + /* prefixes */ + if (instr[skip] == 0x66) { + shift = 16; + skip++; + } + + /* instructions */ + switch (instr[skip]) { + case 0x0f: + switch (instr[skip+1]) { + case 0x06: + /* clts */ + clts(); + skip += 2; + break; + case 0x09: + /* wbinvd */ + __asm__("wbinvd" ::: "memory"); + skip += 2; + break; + case 0x0b: + /* ud2a */ + if (xen_emu_prefix[2] == instr[skip+2] && + xen_emu_prefix[3] == instr[skip+3] && + xen_emu_prefix[4] == instr[skip+4]) { + printk(2, "%s: xen emu prefix\n", __FUNCTION__); + regs->rip += 5; + goto restart; + } + printk(1, "%s: ud2a -- linux kernel BUG()?\n", __FUNCTION__); + /* bounce to guest, hoping it prints more info */ + return 0; + case 0x20: + { + /* read control registers */ + ureg_t *reg = decode_reg(regs, instr[skip+2], 1); + switch (((instr[skip+2]) >> 3) & 0x07) { + case 0: + *reg = read_cr0(); + skip = 3; + break; + case 3: + *reg = frame_to_addr(read_cr3_mfn(cpu)); + skip = 3; + break; + case 4: + *reg = read_cr4(); + skip = 3; + break; + } + break; + } + case 0x22: + { + /* write control registers */ + static const ureg_t cr0_fixed = ~(X86_CR0_TS); + static const ureg_t cr4_fixed = X86_CR4_TSD; + ureg_t *reg = decode_reg(regs, instr[skip+2], 1); + ureg_t cr; + switch (((instr[skip+2]) >> 3) & 0x07) { + case 0: + cr = read_cr0(); + if (cr != *reg) { + if ((cr & cr0_fixed) == (*reg & cr0_fixed)) { + print_bits(2, "apply cr0 update", cr, *reg, cr0_bits); + write_cr0(*reg); + } else { + print_bits(1, "IGNORE cr0 update", cr, *reg, cr0_bits); + } + } + skip = 3; + break; + case 4: + cr = read_cr4(); + if (cr != *reg) { + if ((cr & cr4_fixed) == (*reg & cr4_fixed)) { + print_bits(1, "apply cr4 update", cr, *reg, cr4_bits); + write_cr4(*reg); + } else { + print_bits(1, "IGNORE cr4 update", cr, *reg, cr4_bits); + } + } + skip = 3; + break; + } + break; + } + case 0x30: + /* wrmsr */ + emulate_wrmsr(regs); + skip += 2; + break; + case 0x32: + /* rdmsr */ + emulate_rdmsr(regs); + skip += 2; + break; + case 0xa2: + /* cpuid */ + emulate_cpuid(regs); + skip += 2; + break; + } + break; + + case 0xe4: /* in ,%al */ + case 0xe5: + in = (instr[skip] & 1) ? 2 : 1; + port = instr[skip+1]; + skip += 2; + break; + case 0xec: /* in (%dx),%al */ + case 0xed: + in = (instr[skip] & 1) ? 2 : 1; + port = regs->rdx & 0xffff; + skip += 1; + break; + case 0xe6: /* out %al, */ + case 0xe7: + port = instr[skip+1]; + skip += 2; + break; + case 0xee: /* out %al,(%dx) */ + case 0xef: + port = regs->rdx & 0xffff; + skip += 1; + break; + + case 0xfa: + /* cli */ + guest_cli(cpu); + skip += 1; + break; + case 0xfb: + /* sti */ + guest_sti(cpu); + skip += 1; + break; + } + + /* unknown instruction */ + if (!skip) { + print_emu_instr(0, "instr emu failed", instr); + return -1; + } + + /* I/O instruction */ + if (in == 2) { + regs->rax |= 0xffffffff; + } else if (in == 1) { + regs->rax |= (0xffff << shift); + } + + return skip; +}