From patchwork Mon Nov 1 15:01:25 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Graf X-Patchwork-Id: 69790 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 67ECAB6EDF for ; Tue, 2 Nov 2010 02:35:37 +1100 (EST) Received: from localhost ([127.0.0.1]:37021 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCwNb-0004gO-4z for incoming@patchwork.ozlabs.org; Mon, 01 Nov 2010 11:33:03 -0400 Received: from [140.186.70.92] (port=53015 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCvtz-0003n5-I8 for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:35 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PCvtZ-0000j9-7u for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:14 -0400 Received: from cantor2.suse.de ([195.135.220.15]:39322 helo=mx2.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PCvtY-0000ct-Mz for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:01 -0400 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.221.2]) by mx2.suse.de (Postfix) with ESMTP id 3A5F58730A; Mon, 1 Nov 2010 16:01:54 +0100 (CET) From: Alexander Graf To: qemu-devel Developers Date: Mon, 1 Nov 2010 16:01:25 +0100 Message-Id: <1288623713-28062-13-git-send-email-agraf@suse.de> X-Mailer: git-send-email 1.6.0.2 In-Reply-To: <1288623713-28062-1-git-send-email-agraf@suse.de> References: <1288623713-28062-1-git-send-email-agraf@suse.de> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.4-2.6 Cc: Gerd Hoffmann Subject: [Qemu-devel] [PATCH 12/40] xenner: kernel: Hypercall handler (generic) X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Xenner handles guest hypercalls itself. This patch adds all the handling code that is shared between i386 and x86_64. Signed-off-by: Alexander Graf --- pc-bios/xenner/xenner-hcall.c | 1031 +++++++++++++++++++++++++++++++++++++++++ 1 files changed, 1031 insertions(+), 0 deletions(-) create mode 100644 pc-bios/xenner/xenner-hcall.c diff --git a/pc-bios/xenner/xenner-hcall.c b/pc-bios/xenner/xenner-hcall.c new file mode 100644 index 0000000..30b574f --- /dev/null +++ b/pc-bios/xenner/xenner-hcall.c @@ -0,0 +1,1031 @@ +/* + * Copyright (C) Red Hat 2007 + * Copyright (C) Novell Inc. 2010 + * + * Author(s): Gerd Hoffmann + * Alexander Graf + * + * Xenner hypercall handlers + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include + +#include "config-host.h" +#include "xenner.h" + +sreg_t error_noop(struct xen_cpu *cpu, ureg_t *args) +{ + /* ignore */ + return 0; +} + +sreg_t error_noperm(struct xen_cpu *cpu, ureg_t *args) +{ + /* we don't do dom0 hypercalls */ + return -EPERM; +} + +sreg_t console_io(struct xen_cpu *cpu, ureg_t *args) +{ + int count = args[1]; + void *ptr = (void*)args[2]; + uint8_t buf[128]; + + switch (args[0]) { + case CONSOLEIO_write: + if (count > sizeof(buf)-1) { + count = sizeof(buf)-1; + } + if (0 != memcpy_pf(&buf, ptr, count)) { + return -EFAULT; + } + buf[count] = 0; + while (count > 0 && (buf[count-1] == '\r' || buf[count-1] == '\n')) { + buf[--count] = 0; + } + printk(1, "guest: \"%s\"\n", buf); + return count; + case CONSOLEIO_read: + return 0; + default: + printk(1, "console: unknown: %s\n", consoleio_name(args[0])); + return -ENOSYS; + } +} + +sreg_t stack_switch(struct xen_cpu *cpu, ureg_t *args) +{ + cpu->kernel_ss = fix_sel(args[0]); + cpu->kernel_sp = args[1]; + +#ifdef CONFIG_32BIT + cpu->tss.ss1 = cpu->kernel_ss; + cpu->tss.esp1 = cpu->kernel_sp; +#endif + return 0; +} + +sreg_t update_descriptor(struct xen_cpu *cpu, ureg_t *args) +{ +#ifdef CONFIG_64BIT + uint64_t pa = args[0]; + struct descriptor_32 desc = { + .a = args[1] & 0xffffffff, + .b = args[1] >> 32, + }; +#else + uint64_t pa = args[0] | (uint64_t)args[1] << 32; + struct descriptor_32 desc = { + .a = args[2], + .b = args[3], + }; +#endif + struct descriptor_32 *guest_gdt; + int p, index; + uint64_t mfn; + + fix_desc(&desc); + + mfn = addr_to_frame(pa); + for (p = 0; p < 16; p++) { + if (mfn == cpu->gdt_mfns[p]) { + break; + } + } + if (p == 16) { + printk(1, "%s: not found in gdt: pa %" PRIx64 " (ldt update?)\n", + __FUNCTION__, pa); + } else { + /* update emu gdt shadow */ + index = addr_offset(pa) / sizeof(struct descriptor_32); + cpu->gdt[p * 512 + index] = desc; + } + + /* update guest gdt/ldt */ + guest_gdt = map_page(pa); + *guest_gdt = desc; + free_page(guest_gdt); + return 0; +} + +sreg_t fpu_taskswitch(struct xen_cpu *cpu, ureg_t *args) +{ + if (args[0]) { + write_cr0(X86_CR0_TS|read_cr0()); + } else { + clts(); + } + return 0; +} + +sreg_t grant_table_op(struct xen_cpu *cpu, ureg_t *args) +{ + struct gnttab_setup_table *st; + struct gnttab_query_size *qs; + unsigned long *frames; + int i, rc = 0; + + switch (args[0]) { + case GNTTABOP_setup_table: + st = (void*)args[1]; + printk(1, "%s: setup_table %d\n", __FUNCTION__, st->nr_frames); + if (st->nr_frames > GRANT_FRAMES_MAX) { + st->status = GNTST_general_error; + } else { + grant_frames = st->nr_frames; + frames = (unsigned long *)st->frame_list.p; + for (i = 0; i < grant_frames; i++) { + frames[i] = EMU_MFN(grant_table) + i; + } + st->status = GNTST_okay; + } + break; + case GNTTABOP_query_size: + printk(1, "%s: query_size\n", __FUNCTION__); + qs = (void*)args[1]; + qs->nr_frames = grant_frames; + qs->max_nr_frames = GRANT_FRAMES_MAX; + qs->status = GNTST_okay; + break; + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]); + rc = -ENOSYS; + } + return rc; +} + +sreg_t xen_version(struct xen_cpu *cpu, ureg_t *args) +{ + static const char extra[XEN_EXTRAVERSION_LEN] = + "-qemu-" QEMU_VERSION QEMU_PKGVERSION; + + switch (args[0]) { + case XENVER_version: + return (3 << 16) | 1; + case XENVER_extraversion: + if (memcpy_pf((void*)args[1], extra, sizeof(extra))) { + return -EFAULT; + } + return 0; + case XENVER_capabilities: + { + char caps[] = CAP_VERSION_STRING; + if (memcpy_pf((void*)args[1], caps, sizeof(caps))) { + return -EFAULT; + } + break; + } + + case XENVER_get_features: + { + xen_feature_info_t *fi = (void*)args[1]; + fi->submap = 0; + if (!fi->submap_idx) { + fi->submap |= (1 << XENFEAT_pae_pgdir_above_4gb); + } + break; + } + +#ifdef CONFIG_32BIT + case XENVER_platform_parameters: + { + uint32_t *ptr32 = (void*)args[1]; + *ptr32 = XEN_M2P; + break; + } +#endif + + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]); + return -ENOSYS; + } + return 0; +} + +sreg_t vm_assist(struct xen_cpu *cpu, ureg_t *args) +{ + int type = args[1]; + + switch (args[0]) { + case VMASST_CMD_enable: + printk(1, "%s: enable %d (%s)\n", __FUNCTION__, + type, vmasst_type_name(type)); + if (type == VMASST_TYPE_writable_pagetables) { + wrpt = 1; + } + break; + case VMASST_CMD_disable: + printk(1, "%s: disable %d (%s)\n", __FUNCTION__, + type, vmasst_type_name(type)); + if (type == VMASST_TYPE_writable_pagetables) { + wrpt = 0; + } + break; + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]); + return -ENOSYS; + } + return 0; +} + +sreg_t sched_op(struct xen_cpu *cpu, ureg_t *args) +{ + switch (args[0]) { + case SCHEDOP_yield: + /* Hmm, some day, on SMP, we want probably do something else ... */ + sti(); pause(); cli(); + break; + + case SCHEDOP_block: + guest_sti(cpu); + if (!evtchn_pending(cpu)) { + halt_i(cpu->id); + pv_clock_update(1); + } + break; + + case SCHEDOP_shutdown: + { + struct sched_shutdown sh; + + if (memcpy_pf(&sh, (void*)(args[1]), sizeof(sh))) { + return -EFAULT; + } + emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, sh.reason); + break; + } + + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]); + return -ENOSYS; + } + return 0; +} + +sreg_t sched_op_compat(struct xen_cpu *cpu, ureg_t *args) +{ + switch (args[0]) { + case SCHEDOP_yield: + case SCHEDOP_block: + return sched_op(cpu, args); + case SCHEDOP_shutdown: + emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, args[1]); + return 0; + default: + return -ENOSYS; + } +} + +sreg_t memory_op(struct xen_cpu *cpu, ureg_t *args) +{ + int cmd = args[0] & 0x0f /* MEMOP_CMD_MASK */; + + switch (cmd) { + case XENMEM_increase_reservation: + { + struct xen_memory_reservation res; + + if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) { + return -EFAULT; + } + if (res.domid != DOMID_SELF) { + return -EPERM; + } + printk(0, "%s: increase_reservation: nr %ld, order %d (not implemented)\n", + __FUNCTION__, res.nr_extents, res.extent_order); + /* FIXME: not implemented yet, thus say "no pages allocated" */ + return 0; + } + case XENMEM_decrease_reservation: + { + struct xen_memory_reservation res; + xen_pfn_t *ptr, gmfn; + int i, p, count = 0; + + if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) { + return -EFAULT; + } + if (res.domid != DOMID_SELF) { + return -EPERM; + } + ptr = (xen_pfn_t *)res.extent_start.p; + for (i = 0; i < res.nr_extents; i++) { + if (memcpy_pf(&gmfn, ptr + i, sizeof(gmfn))) { + break; + } + for (p = 0; p < (1 << res.extent_order); p++) { + m2p[gmfn+p] = INVALID_M2P_ENTRY; + } + /* FIXME: make host free pages */ + count++; + } + printk(2, "%s: decrease_reservation: nr %ld, order %d" + " (max %" PRIx64 "/%" PRIx64 ") -> rc %d\n", __FUNCTION__, + res.nr_extents, res.extent_order, + vmconf.pg_guest, vmconf.pg_total, + count); + /* FIXME: signal to userspace */ + return count; + } + case XENMEM_populate_physmap: + { + struct xen_memory_reservation res; + xen_pfn_t *ptr, gpfn, gmfn; + int i, p, count = 0; + + if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) { + return -EFAULT; + } + if (res.domid != DOMID_SELF) { + return -EPERM; + } + ptr = (xen_pfn_t *)res.extent_start.p; + gmfn = vmconf.mfn_guest; + for (i = 0; i < res.nr_extents; i++) { + if (memcpy_pf(&gpfn, ptr + i, sizeof(gpfn))) { + break; + } + for (p = 0; p < (1 << res.extent_order); p++) { + while (m2p[gmfn] != INVALID_M2P_ENTRY && + gmfn < vmconf.pg_total) { + gmfn++; + } + if (gmfn == vmconf.pg_total) { + break; + } + m2p[gmfn] = gpfn+p; + } + if (p != (1 << res.extent_order)) { + break; + } + /* FIXME: make host reclaim pages */ + if (memcpy_pf(ptr + i, &gmfn, sizeof(gmfn))) { + break; + } + count++; + } + printk(2, "%s: populate_physmap: nr %ld, order %d -> rc %d\n", + __FUNCTION__, res.nr_extents, res.extent_order, count); + /* FIXME: signal to userspace */ + return count; + } + case XENMEM_machphys_mapping: + { + struct xen_machphys_mapping map; + uint32_t pg_m2p = 1024 /* pages (4 MB) */; + void *dest = (void*)(args[1]); +#ifdef CONFIG_64BIT + map.v_start = XEN_M2P_64; +#else + map.v_start = XEN_M2P; +#endif + map.v_end = map.v_start + frame_to_addr(pg_m2p); + map.max_mfn = pg_m2p << (PAGE_SHIFT-3); + if (memcpy_pf(dest, &map, sizeof(map))) { + return -EFAULT; + } + return 0; + } + + case XENMEM_memory_map: + /* we have no e820 map */ + return -ENOSYS; + + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, cmd); + return -ENOSYS; + } + return 0; +} + +sreg_t set_trap_table(struct xen_cpu *cpu, ureg_t *args) +{ + struct trap_info *traps; + struct trap_info trap; + int i; + + if (!args[0]) { + memset(&xentr, 0, sizeof(xentr)); + return -EINVAL; + } + + traps = (void*)args[0]; + for (i = 0;; i++) { + if (memcpy_pf(&trap, traps+i, sizeof(trap))) { + return -EFAULT; + } + if (!trap.address) { + break; + } + trap.cs = fix_sel32(trap.cs); + xentr[traps[i].vector] = trap; +#ifdef CONFIG_32BIT + if (traps[i].vector >= 0x80) { + /* route directly */ + uint32_t dpl = trap.flags & 0x03; + xen_idt[traps[i].vector] = + mkgate32(trap.cs, trap.address, 0x8f | (dpl << 5)); + } +#endif + } + return 0; +} + +static int callback_setup(int type, xen_callback_t *cb) +{ + int ok = 0; + + switch (type) { + case CALLBACKTYPE_event: + case CALLBACKTYPE_failsafe: +#ifdef CONFIG_64BIT + case CALLBACKTYPE_syscall: +#endif + ok = 1; + break; + } + + printk(1, "%s: %s (#%d) -> %s\n", __FUNCTION__, + callbacktype_name(type), type, + ok ? "OK" : "unsupported"); + if (!ok) { + return -1; + } + +#ifdef CONFIG_32BIT + cb->cs = fix_sel32(cb->cs); +#endif + xencb[type] = *cb; + return 0; +} + +static void callback_clear(int type) +{ +#ifdef CONFIG_64BIT + xencb[type] = 0; +#else + xencb[type].cs = 0; + xencb[type].eip = 0; +#endif +} + +sreg_t set_callbacks(struct xen_cpu *cpu, ureg_t *args) +{ +#ifdef CONFIG_64BIT + callback_setup(CALLBACKTYPE_event, &args[0]); + callback_setup(CALLBACKTYPE_failsafe, &args[1]); + callback_setup(CALLBACKTYPE_syscall, &args[2]); +#else + xen_callback_t cb; + + cb.cs = args[0]; + cb.eip = args[1]; + callback_setup(CALLBACKTYPE_event, &cb); + cb.cs = args[2]; + cb.eip = args[3]; + callback_setup(CALLBACKTYPE_failsafe, &cb); +#endif + return 0; +} + +sreg_t callback_op(struct xen_cpu *cpu, ureg_t *args) +{ + struct callback_register cb; + + memcpy_pf(&cb, (void*)(args[1]), sizeof(cb)); + if (cb.type >= 8) { + return -EINVAL; + } + + switch (args[0]) { + case CALLBACKOP_register: + if (callback_setup(cb.type, &cb.address)) { + return -EINVAL; + } + break; + case CALLBACKOP_unregister: + callback_clear(cb.type); + break; + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]); + return -ENOSYS; + } + return 0; +} + +void guest_gdt_copy_page(struct descriptor_32 *src, + struct descriptor_32 *dst) +{ + struct descriptor_32 tmp; + int e; + + for (e = 0; e < 512; e++) { + tmp = src[e]; + fix_desc(&tmp); + dst[e] = tmp; + } +} + +int guest_gdt_init(struct xen_cpu *cpu, uint32_t entries, ureg_t *mfns) +{ + uint32_t pages = (entries + 511) / 512; + struct descriptor_32 *src, *dst; + uint32_t p; + + for (p = 0; p < pages; p++) { + cpu->gdt_mfns[p] = mfns[p]; + src = map_page(frame_to_addr(mfns[p])); + dst = cpu->gdt + p * 512; + guest_gdt_copy_page(src, dst); + free_page(src); + } + return 0; +} + +sreg_t set_gdt(struct xen_cpu *cpu, ureg_t *args) +{ + ureg_t mfns[16]; + uint32_t entries = args[1]; + + if (memcpy_pf(mfns, (void*)(args[0]), sizeof(mfns))) { + return -EFAULT; + } + if (entries > (0xe000 >> 3)) { + return -EINVAL; + } + + return guest_gdt_init(cpu, entries, mfns); +} + +sreg_t vcpu_op(struct xen_cpu *cpu, ureg_t *args) +{ + if (args[1] >= vmconf.nr_cpus) { + return -EINVAL; + } + + switch (args[0]) { + case VCPUOP_register_runstate_memory_area: + /* FIXME */ + return 0; + + case VCPUOP_is_up: + { + struct xen_cpu *vcpu; + + vcpu = cpu_find(args[1]); + return vcpu->online; + } + + case VCPUOP_set_periodic_timer: + { + struct vcpu_set_periodic_timer ticks; + + if (memcpy_pf(&ticks, (void*)args[2], sizeof(ticks))) { + return -EFAULT; + } + + cpu->periodic = ticks.period_ns; + printk(1, "%s/%d: periodic %" PRId64 " (%d Hz)\n", __FUNCTION__, + cpu->id, cpu->periodic, + 1000000000 / (unsigned int)cpu->periodic); + lapic_timer(cpu); + break; + } + case VCPUOP_stop_periodic_timer: + cpu->periodic = 0; + printk(1, "%s/%d: periodic off\n", __FUNCTION__, cpu->id); + lapic_timer(cpu); + break; + case VCPUOP_set_singleshot_timer: + { + struct vcpu_set_singleshot_timer single; + + if (memcpy_pf(&single, (void*)args[2], sizeof(single))) { + return -EFAULT; + } + cpu->oneshot = single.timeout_abs_ns; + printk(3, "%s/%d: oneshot %" PRId64 "\n", __FUNCTION__, cpu->id, + cpu->oneshot); + lapic_timer(cpu); + break; + } + case VCPUOP_stop_singleshot_timer: + cpu->oneshot = 0; + printk(1, "%s/%d: oneshot off\n", __FUNCTION__, cpu->id); + lapic_timer(cpu); + break; + case VCPUOP_initialise: + { + struct xen_cpu *vcpu; + + printk(0, "%s: initialise cpu %d\n", __FUNCTION__, (int)args[1]); + vcpu = cpu_find(args[1]); + if (!vcpu->init_ctxt) { + vcpu->init_ctxt = get_memory(sizeof(*(vcpu->init_ctxt)), "init_ctxt"); + } + if (memcpy_pf(vcpu->init_ctxt, (void*)args[2], + sizeof(*(vcpu->init_ctxt)))) { + return -EFAULT; + } + break; + } + case VCPUOP_up: + { + struct xen_cpu *vcpu; + + printk(0, "%s: up cpu %d\n", __FUNCTION__, (int)args[1]); + vcpu = cpu_find(args[1]); + lapic_ipi_boot(cpu, vcpu); + break; + } + case VCPUOP_down: + { + return -ENOSYS; + } + case VCPUOP_register_vcpu_info: + { + struct vcpu_register_vcpu_info reg; + struct xen_cpu *vcpu; + struct vcpu_info *new_info; + uint64_t new_info_pa; + + vcpu = cpu_find(args[1]); + if (memcpy_pf(®, (void*)args[2], sizeof(reg))) { + return -EFAULT; + } + if (reg.offset + sizeof(struct vcpu_info) > PAGE_SIZE) { + return -EINVAL; + } + if (vcpu->v.vcpu_page) { + return -EINVAL; + } + + vcpu->v.vcpu_page = fixmap_page(cpu, frame_to_addr(reg.mfn)); + new_info = vcpu->v.vcpu_page + reg.offset; + new_info_pa = frame_to_addr(reg.mfn) + reg.offset; + printk(1,"%s/%d: vcpu_info: mfn 0x%" PRIx64 ", offset 0x%x, pa %" PRIx64 " mapped to %p\n", + __FUNCTION__, vcpu->id, reg.mfn, reg.offset, new_info_pa, new_info); + + memcpy(new_info, vcpu->v.vcpu_info, sizeof(struct vcpu_info)); + vcpu->v.vcpu_info = new_info; + vcpu->v.vcpu_info_pa = new_info_pa; + pv_clock_sys(vcpu); + break; + } + default: + return -ENOSYS; + } + return 0; +} + +sreg_t set_timer_op(struct xen_cpu *cpu, ureg_t *args) +{ +#ifdef CONFIG_64BIT + uint64_t time = args[0]; +#else + uint64_t time = args[0] | (uint64_t)args[1] << 32; +#endif + + cpu->oneshot = time; + lapic_timer(cpu); + return 0; +} + +sreg_t event_channel_op(struct xen_cpu *cpu, ureg_t *args) +{ + switch (args[0]) { + case EVTCHNOP_alloc_unbound: + { + struct evtchn_alloc_unbound alloc; + struct xen_cpu *vcpu; + + if (memcpy_pf(&alloc, (void*)args[1], sizeof(alloc))) { + return -EFAULT; + } + if (alloc.dom != DOMID_SELF || alloc.remote_dom != 0) { + return -EINVAL; + } + alloc.port = evtchn_alloc(cpu->id); + vcpu = cpu_find(0); + evtchn_route_interdomain(vcpu, alloc.port, NULL); + if (memcpy_pf((void*)args[1], &alloc, sizeof(alloc))) { + return -EFAULT; + } + return 0; + } + case EVTCHNOP_bind_vcpu: + { + struct evtchn_bind_vcpu bind; + struct xen_cpu *vcpu; + + if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) { + return -EFAULT; + } + vcpu = cpu_find(bind.vcpu); + if (evtchn_route_interdomain(vcpu, bind.port, NULL)) { + return -EINVAL; + } + return 0; + } + case EVTCHNOP_bind_virq: + { + struct evtchn_bind_virq bind; + struct xen_cpu *vcpu; + + if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) { + return -EFAULT; + } + switch (bind.virq) { + case VIRQ_TIMER: + vcpu = cpu_find(bind.vcpu); + if (!vcpu->timerport) { + vcpu->timerport = evtchn_alloc(cpu->id); + evtchn_route_virq(vcpu, VIRQ_TIMER, vcpu->timerport, "timer"); + if (cpu == vcpu) { + lapic_timer(cpu); + } + } + bind.port = vcpu->timerport; + break; + default: + bind.port = evtchn_alloc(cpu->id); + break; + } + if (memcpy_pf((void*)args[1], &bind, sizeof(bind))) { + return -EFAULT; + } + return 0; + } + case EVTCHNOP_bind_ipi: + { + struct evtchn_bind_ipi bind; + struct xen_cpu *vcpu; + + if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) { + return -EFAULT; + } + bind.port = evtchn_alloc(cpu->id); + vcpu = cpu_find(bind.vcpu); + evtchn_route_ipi(vcpu, bind.port); + if (memcpy_pf((void*)args[1], &bind, sizeof(bind))) { + return -EFAULT; + } + return 0; + } + case EVTCHNOP_bind_pirq: + return -EPERM; + case EVTCHNOP_send: + { + struct evtchn_send send; + + if (memcpy_pf(&send, (void*)args[1], sizeof(send))) { + return -EFAULT; + } + if (evtchn_send(cpu, send.port)) { + /* handled internally */ + return 0; + } else { + emudev_cmd(EMUDEV_CMD_EVTCHN_SEND, send.port); + return 0; + } + } + case EVTCHNOP_unmask: + { + struct evtchn_unmask unmask; + + if (memcpy_pf(&unmask, (void*)args[1], sizeof(unmask))) { + return -EFAULT; + } + evtchn_unmask(cpu, unmask.port); + return 0; + } + case EVTCHNOP_close: + { + struct evtchn_close cl; + + if (memcpy_pf(&cl, (void*)args[1], sizeof(cl))) { + return -EFAULT; + } + evtchn_close(cpu, cl.port); + return 0; + } + default: + return -ENOSYS; + } +} + +sreg_t event_channel_op_compat(struct xen_cpu *cpu, ureg_t *args) +{ + struct evtchn_op op; + ureg_t nargs[2]; + + if (memcpy_pf(&op, (void*)args[0], sizeof(op))) { + return -EFAULT; + } + nargs[0] = op.cmd; + nargs[1] = args[0] + offsetof(struct evtchn_op, u); + return event_channel_op(cpu, nargs); +} + +sreg_t mmuext_op(struct xen_cpu *cpu, ureg_t *args) +{ + struct mmuext_op *uops = (void*)args[0]; + ureg_t count = args[1]; +#if 0 + ureg_t *done = (void*)args[2]; +#endif + ureg_t dom = args[3]; + ureg_t cpumask; + int i; + + if (dom != DOMID_SELF) { + printk(1, "%s: foreigndom not supported\n", __FUNCTION__); + return -ENOSYS; + } + + for (i = 0; i < count; i++, uops++) { + switch (uops->cmd) { + case MMUEXT_PIN_L1_TABLE: + case MMUEXT_PIN_L2_TABLE: + case MMUEXT_PIN_L3_TABLE: + case MMUEXT_PIN_L4_TABLE: + /* ignore */ + break; + case MMUEXT_UNPIN_TABLE: + /* KVM_MMU_OP_RELEASE_PT ??? */ + break; + case MMUEXT_INVLPG_LOCAL: + flush_tlb_addr(uops->arg1.linear_addr); + break; + case MMUEXT_INVLPG_MULTI: + if (memcpy_pf(&cpumask, (void*)uops->arg2.vcpumask.p, + sizeof(cpumask))) { + return -EFAULT; + } + flush_tlb_addr(uops->arg1.linear_addr); + flush_tlb_remote(cpu, cpumask, uops->arg1.linear_addr); + break; + case MMUEXT_INVLPG_ALL: + flush_tlb_addr(uops->arg1.linear_addr); + flush_tlb_remote(cpu, vminfo.vcpus_online, uops->arg1.linear_addr); + break; + case MMUEXT_TLB_FLUSH_LOCAL: + flush_tlb(); + break; + case MMUEXT_TLB_FLUSH_MULTI: + flush_tlb(); + flush_tlb_remote(cpu, cpumask, 0); + break; + case MMUEXT_TLB_FLUSH_ALL: + flush_tlb(); + flush_tlb_remote(cpu, vminfo.vcpus_online, 0); + break; + + case MMUEXT_SET_LDT: + printk(2, "%s: SET_LDT (va %lx, nr %d)\n", __FUNCTION__, + uops->arg1.linear_addr, uops->arg2.nr_ents); + if (uops->arg2.nr_ents) { + struct descriptor_32 *gdt = cpu->gdt; + int idx = ldt(cpu); + gdt[ idx +0 ] = mkdesc32(uops->arg1.linear_addr & 0xffffffff, + uops->arg2.nr_ents * 8 - 1, + 0x82, 0); +#ifdef CONFIG_64BIT + gdt[ idx+1 ].a = uops->arg1.linear_addr >> 32; + gdt[ idx+1 ].b = 0; +#endif + lldt(idx << 3); + } else { + lldt(0); + } + break; + + case MMUEXT_NEW_BASEPTR: + update_emu_mappings(uops->arg1.mfn); + pv_write_cr3(cpu, uops->arg1.mfn); + break; +#ifdef CONFIG_64BIT + case MMUEXT_NEW_USER_BASEPTR: + update_emu_mappings(uops->arg1.mfn); + cpu->user_cr3_mfn = uops->arg1.mfn; + break; +#endif + default: + printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, uops->cmd); + return -ENOSYS; + } + } + return 0; +} + +sreg_t physdev_op(struct xen_cpu *cpu, ureg_t *args) +{ + switch (args[0]) { + case PHYSDEVOP_set_iopl: + { + struct physdev_set_iopl iopl; + + if (memcpy_pf(&iopl, (void*)args[1], sizeof(iopl))) { + return -EFAULT; + } + printk(2, "%s: set iopl: %d\n", __FUNCTION__, iopl.iopl); + cpu->iopl = iopl.iopl; + return 0; + } + case PHYSDEVOP_set_iobitmap: + { + struct physdev_set_iobitmap iobitmap; + if (memcpy_pf(&iobitmap, (void*)args[1], sizeof(iobitmap))) { + return -EFAULT; + } + printk(2, "%s: set iobitmap: %d\n", __FUNCTION__, iobitmap.nr_ports); + cpu->nr_ports = iobitmap.nr_ports; + return 0; + } + default: + printk(1, "%s: not implemented (#%d)\n", __FUNCTION__, (int)args[0]); + return -EPERM; + } +} + +static ureg_t read_debugreg(int nr) +{ + ureg_t val; + switch (nr) { + case 0: asm volatile("mov %%db0,%0" : "=r" (val)); break; + case 1: asm volatile("mov %%db1,%0" : "=r" (val)); break; + case 2: asm volatile("mov %%db2,%0" : "=r" (val)); break; + case 3: asm volatile("mov %%db3,%0" : "=r" (val)); break; + case 4: asm volatile("mov %%db4,%0" : "=r" (val)); break; + case 5: asm volatile("mov %%db5,%0" : "=r" (val)); break; + case 6: asm volatile("mov %%db6,%0" : "=r" (val)); break; + case 7: asm volatile("mov %%db7,%0" : "=r" (val)); break; + default: val = -EINVAL; break; + } + return val; +} + +static void write_debugreg(int nr, ureg_t val) +{ + switch (nr) { + case 0: asm volatile("mov %0,%%db0" : : "r" (val) : "memory"); break; + case 1: asm volatile("mov %0,%%db1" : : "r" (val) : "memory"); break; + case 2: asm volatile("mov %0,%%db2" : : "r" (val) : "memory"); break; + case 3: asm volatile("mov %0,%%db3" : : "r" (val) : "memory"); break; + case 4: asm volatile("mov %0,%%db4" : : "r" (val) : "memory"); break; + case 5: asm volatile("mov %0,%%db5" : : "r" (val) : "memory"); break; + case 6: asm volatile("mov %0,%%db6" : : "r" (val) : "memory"); break; + case 7: asm volatile("mov %0,%%db7" : : "r" (val) : "memory"); break; + } +} + +sreg_t get_debugreg(struct xen_cpu *cpu, ureg_t *args) +{ + ureg_t val = read_debugreg(args[0]); + printk(2, "%s: %" PRIxREG" = %" PRIxREG "\n", __FUNCTION__, args[0], val); + return val; +} + +sreg_t set_debugreg(struct xen_cpu *cpu, ureg_t *args) +{ + int nr = args[0]; + ureg_t val = args[1]; + + switch (nr) { + case 0: + case 1: + case 2: + case 3: + /* TODO: check address */ + break; + case 6: + val &= 0xffffefff; /* reserved bits => 0 */ + val |= 0xffff0ff0; /* reserved bits => 1 */ + break; + case 7: + if (val) { + val &= 0xffff27ff; /* reserved bits => 0 */ + val |= 0x00000400; /* reserved bits => 1 */ + } + break; + default: + return -EINVAL; + } + + printk(0, "%s: %d = %" PRIxREG "\n", __FUNCTION__, nr, val); + write_debugreg(nr,val); + return 0; +} +