Patchwork [12/40] xenner: kernel: Hypercall handler (generic)

login
register
mail settings
Submitter Alexander Graf
Date Nov. 1, 2010, 3:01 p.m.
Message ID <1288623713-28062-13-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/69790/
State New
Headers show

Comments

Alexander Graf - Nov. 1, 2010, 3:01 p.m.
Xenner handles guest hypercalls itself. This patch adds all the handling
code that is shared between i386 and x86_64.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/xenner/xenner-hcall.c | 1031 +++++++++++++++++++++++++++++++++++++++++
 1 files changed, 1031 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/xenner/xenner-hcall.c

Patch

diff --git a/pc-bios/xenner/xenner-hcall.c b/pc-bios/xenner/xenner-hcall.c
new file mode 100644
index 0000000..30b574f
--- /dev/null
+++ b/pc-bios/xenner/xenner-hcall.c
@@ -0,0 +1,1031 @@ 
+/*
+ *  Copyright (C) Red Hat 2007
+ *  Copyright (C) Novell Inc. 2010
+ *
+ *  Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ *             Alexander Graf <agraf@suse.de>
+ *
+ *  Xenner hypercall handlers
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+
+#include "config-host.h"
+#include "xenner.h"
+
+sreg_t error_noop(struct xen_cpu *cpu, ureg_t *args)
+{
+    /* ignore */
+    return 0;
+}
+
+sreg_t error_noperm(struct xen_cpu *cpu, ureg_t *args)
+{
+    /* we don't do dom0 hypercalls */
+    return -EPERM;
+}
+
+sreg_t console_io(struct xen_cpu *cpu, ureg_t *args)
+{
+    int count = args[1];
+    void *ptr = (void*)args[2];
+    uint8_t buf[128];
+
+    switch (args[0]) {
+    case CONSOLEIO_write:
+        if (count > sizeof(buf)-1) {
+            count = sizeof(buf)-1;
+        }
+        if (0 != memcpy_pf(&buf, ptr, count)) {
+            return -EFAULT;
+        }
+        buf[count] = 0;
+        while (count > 0 && (buf[count-1] == '\r' || buf[count-1] == '\n')) {
+            buf[--count] = 0;
+        }
+        printk(1, "guest: \"%s\"\n", buf);
+        return count;
+    case CONSOLEIO_read:
+        return 0;
+    default:
+        printk(1, "console: unknown: %s\n", consoleio_name(args[0]));
+        return -ENOSYS;
+    }
+}
+
+sreg_t stack_switch(struct xen_cpu *cpu, ureg_t *args)
+{
+    cpu->kernel_ss = fix_sel(args[0]);
+    cpu->kernel_sp = args[1];
+
+#ifdef CONFIG_32BIT
+    cpu->tss.ss1  = cpu->kernel_ss;
+    cpu->tss.esp1 = cpu->kernel_sp;
+#endif
+    return 0;
+}
+
+sreg_t update_descriptor(struct xen_cpu *cpu, ureg_t *args)
+{
+#ifdef CONFIG_64BIT
+    uint64_t pa = args[0];
+    struct descriptor_32 desc = {
+        .a = args[1] & 0xffffffff,
+        .b = args[1] >> 32,
+    };
+#else
+    uint64_t pa   = args[0] | (uint64_t)args[1] << 32;
+    struct descriptor_32 desc = {
+        .a = args[2],
+        .b = args[3],
+    };
+#endif
+    struct descriptor_32 *guest_gdt;
+    int p, index;
+    uint64_t mfn;
+
+    fix_desc(&desc);
+
+    mfn = addr_to_frame(pa);
+    for (p = 0; p < 16; p++) {
+        if (mfn == cpu->gdt_mfns[p]) {
+            break;
+        }
+    }
+    if (p == 16) {
+        printk(1, "%s: not found in gdt: pa %" PRIx64 " (ldt update?)\n",
+               __FUNCTION__, pa);
+    } else {
+        /* update emu gdt shadow */
+        index = addr_offset(pa) / sizeof(struct descriptor_32);
+        cpu->gdt[p * 512 + index] = desc;
+    }
+
+    /* update guest gdt/ldt */
+    guest_gdt = map_page(pa);
+    *guest_gdt = desc;
+    free_page(guest_gdt);
+    return 0;
+}
+
+sreg_t fpu_taskswitch(struct xen_cpu *cpu, ureg_t *args)
+{
+    if (args[0]) {
+        write_cr0(X86_CR0_TS|read_cr0());
+    } else {
+        clts();
+    }
+    return 0;
+}
+
+sreg_t grant_table_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    struct gnttab_setup_table *st;
+    struct gnttab_query_size  *qs;
+    unsigned long *frames;
+    int i, rc = 0;
+
+    switch (args[0]) {
+    case GNTTABOP_setup_table:
+        st = (void*)args[1];
+        printk(1, "%s: setup_table %d\n", __FUNCTION__, st->nr_frames);
+        if (st->nr_frames > GRANT_FRAMES_MAX) {
+            st->status = GNTST_general_error;
+        } else {
+            grant_frames = st->nr_frames;
+            frames = (unsigned long *)st->frame_list.p;
+            for (i = 0; i < grant_frames; i++) {
+                frames[i] = EMU_MFN(grant_table) + i;
+            }
+            st->status = GNTST_okay;
+        }
+        break;
+    case GNTTABOP_query_size:
+        printk(1, "%s: query_size\n", __FUNCTION__);
+        qs = (void*)args[1];
+        qs->nr_frames = grant_frames;
+        qs->max_nr_frames = GRANT_FRAMES_MAX;
+        qs->status = GNTST_okay;
+        break;
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]);
+        rc = -ENOSYS;
+    }
+    return rc;
+}
+
+sreg_t xen_version(struct xen_cpu *cpu, ureg_t *args)
+{
+    static const char extra[XEN_EXTRAVERSION_LEN] =
+        "-qemu-" QEMU_VERSION QEMU_PKGVERSION;
+
+    switch (args[0]) {
+    case XENVER_version:
+        return (3 << 16) | 1;
+    case XENVER_extraversion:
+        if (memcpy_pf((void*)args[1], extra, sizeof(extra))) {
+            return -EFAULT;
+        }
+        return 0;
+    case XENVER_capabilities:
+    {
+        char caps[] = CAP_VERSION_STRING;
+        if (memcpy_pf((void*)args[1], caps, sizeof(caps))) {
+            return -EFAULT;
+        }
+        break;
+    }
+
+    case XENVER_get_features:
+    {
+        xen_feature_info_t *fi = (void*)args[1];
+        fi->submap = 0;
+        if (!fi->submap_idx) {
+            fi->submap |= (1 << XENFEAT_pae_pgdir_above_4gb);
+        }
+        break;
+    }
+
+#ifdef CONFIG_32BIT
+    case XENVER_platform_parameters:
+    {
+        uint32_t *ptr32 = (void*)args[1];
+        *ptr32 = XEN_M2P;
+        break;
+    }
+#endif
+
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+sreg_t vm_assist(struct xen_cpu *cpu, ureg_t *args)
+{
+    int type = args[1];
+
+    switch (args[0]) {
+    case VMASST_CMD_enable:
+        printk(1, "%s: enable %d (%s)\n", __FUNCTION__,
+               type, vmasst_type_name(type));
+        if (type == VMASST_TYPE_writable_pagetables) {
+            wrpt = 1;
+        }
+        break;
+    case VMASST_CMD_disable:
+        printk(1, "%s: disable %d (%s)\n", __FUNCTION__,
+               type, vmasst_type_name(type));
+        if (type == VMASST_TYPE_writable_pagetables) {
+            wrpt = 0;
+        }
+        break;
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+sreg_t sched_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    switch (args[0]) {
+    case SCHEDOP_yield:
+        /* Hmm, some day, on SMP, we want probably do something else ... */
+        sti(); pause(); cli();
+        break;
+
+    case SCHEDOP_block:
+        guest_sti(cpu);
+        if (!evtchn_pending(cpu)) {
+            halt_i(cpu->id);
+            pv_clock_update(1);
+        }
+        break;
+
+    case SCHEDOP_shutdown:
+    {
+        struct sched_shutdown sh;
+
+        if (memcpy_pf(&sh, (void*)(args[1]), sizeof(sh))) {
+            return -EFAULT;
+        }
+        emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, sh.reason);
+        break;
+    }
+
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+sreg_t sched_op_compat(struct xen_cpu *cpu, ureg_t *args)
+{
+    switch (args[0]) {
+    case SCHEDOP_yield:
+    case SCHEDOP_block:
+        return sched_op(cpu, args);
+    case SCHEDOP_shutdown:
+        emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, args[1]);
+        return 0;
+    default:
+        return -ENOSYS;
+    }
+}
+
+sreg_t memory_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    int cmd = args[0] & 0x0f /* MEMOP_CMD_MASK */;
+
+    switch (cmd) {
+    case XENMEM_increase_reservation:
+    {
+        struct xen_memory_reservation res;
+
+        if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) {
+            return -EFAULT;
+        }
+        if (res.domid != DOMID_SELF) {
+            return -EPERM;
+        }
+        printk(0, "%s: increase_reservation: nr %ld, order %d (not implemented)\n",
+               __FUNCTION__, res.nr_extents, res.extent_order);
+        /* FIXME: not implemented yet, thus say "no pages allocated" */
+        return 0;
+    }
+    case XENMEM_decrease_reservation:
+    {
+        struct xen_memory_reservation res;
+        xen_pfn_t *ptr, gmfn;
+        int i, p, count = 0;
+
+        if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) {
+            return -EFAULT;
+        }
+        if (res.domid != DOMID_SELF) {
+            return -EPERM;
+        }
+        ptr = (xen_pfn_t *)res.extent_start.p;
+        for (i = 0; i < res.nr_extents; i++) {
+            if (memcpy_pf(&gmfn, ptr + i, sizeof(gmfn))) {
+                break;
+            }
+            for (p = 0; p < (1 << res.extent_order); p++) {
+                m2p[gmfn+p] = INVALID_M2P_ENTRY;
+            }
+            /* FIXME: make host free pages */
+            count++;
+        }
+        printk(2, "%s: decrease_reservation: nr %ld, order %d"
+               " (max %" PRIx64 "/%" PRIx64 ") -> rc %d\n", __FUNCTION__,
+               res.nr_extents, res.extent_order,
+               vmconf.pg_guest, vmconf.pg_total,
+               count);
+        /* FIXME: signal to userspace */
+        return count;
+    }
+    case XENMEM_populate_physmap:
+    {
+        struct xen_memory_reservation res;
+        xen_pfn_t *ptr, gpfn, gmfn;
+        int i, p, count = 0;
+
+        if (memcpy_pf(&res, (void*)(args[1]), sizeof(res))) {
+            return -EFAULT;
+        }
+        if (res.domid != DOMID_SELF) {
+            return -EPERM;
+        }
+        ptr = (xen_pfn_t *)res.extent_start.p;
+        gmfn = vmconf.mfn_guest;
+        for (i = 0; i < res.nr_extents; i++) {
+            if (memcpy_pf(&gpfn, ptr + i, sizeof(gpfn))) {
+                break;
+            }
+            for (p = 0; p < (1 << res.extent_order); p++) {
+                while (m2p[gmfn] != INVALID_M2P_ENTRY &&
+                       gmfn < vmconf.pg_total) {
+                    gmfn++;
+                }
+                if (gmfn == vmconf.pg_total) {
+                    break;
+                }
+                m2p[gmfn] = gpfn+p;
+            }
+            if (p != (1 << res.extent_order)) {
+                break;
+            }
+            /* FIXME: make host reclaim pages */
+            if (memcpy_pf(ptr + i, &gmfn, sizeof(gmfn))) {
+                break;
+            }
+            count++;
+        }
+        printk(2, "%s: populate_physmap: nr %ld, order %d -> rc %d\n",
+               __FUNCTION__, res.nr_extents, res.extent_order, count);
+        /* FIXME: signal to userspace */
+        return count;
+    }
+    case XENMEM_machphys_mapping:
+    {
+        struct xen_machphys_mapping map;
+        uint32_t pg_m2p = 1024 /* pages (4 MB) */;
+        void *dest = (void*)(args[1]);
+#ifdef CONFIG_64BIT
+        map.v_start = XEN_M2P_64;
+#else
+        map.v_start = XEN_M2P;
+#endif
+        map.v_end   = map.v_start + frame_to_addr(pg_m2p);
+        map.max_mfn = pg_m2p << (PAGE_SHIFT-3);
+        if (memcpy_pf(dest, &map, sizeof(map))) {
+            return -EFAULT;
+        }
+        return 0;
+    }
+
+    case XENMEM_memory_map:
+        /* we have no e820 map */
+        return -ENOSYS;
+
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, cmd);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+sreg_t set_trap_table(struct xen_cpu *cpu, ureg_t *args)
+{
+    struct trap_info *traps;
+    struct trap_info trap;
+    int i;
+
+    if (!args[0]) {
+        memset(&xentr, 0, sizeof(xentr));
+        return -EINVAL;
+    }
+
+    traps = (void*)args[0];
+    for (i = 0;; i++) {
+        if (memcpy_pf(&trap, traps+i, sizeof(trap))) {
+            return -EFAULT;
+        }
+        if (!trap.address) {
+            break;
+        }
+        trap.cs = fix_sel32(trap.cs);
+        xentr[traps[i].vector] = trap;
+#ifdef CONFIG_32BIT
+        if (traps[i].vector >= 0x80) {
+            /* route directly */
+            uint32_t dpl = trap.flags & 0x03;
+            xen_idt[traps[i].vector] =
+                mkgate32(trap.cs, trap.address, 0x8f | (dpl << 5));
+        }
+#endif
+    }
+    return 0;
+}
+
+static int callback_setup(int type, xen_callback_t *cb)
+{
+    int ok = 0;
+
+    switch (type) {
+    case CALLBACKTYPE_event:
+    case CALLBACKTYPE_failsafe:
+#ifdef CONFIG_64BIT
+    case CALLBACKTYPE_syscall:
+#endif
+        ok = 1;
+        break;
+    }
+
+    printk(1, "%s: %s (#%d) -> %s\n", __FUNCTION__,
+           callbacktype_name(type), type,
+           ok ? "OK" : "unsupported");
+    if (!ok) {
+        return -1;
+    }
+
+#ifdef CONFIG_32BIT
+    cb->cs = fix_sel32(cb->cs);
+#endif
+    xencb[type] = *cb;
+    return 0;
+}
+
+static void callback_clear(int type)
+{
+#ifdef CONFIG_64BIT
+    xencb[type] = 0;
+#else
+    xencb[type].cs = 0;
+    xencb[type].eip = 0;
+#endif
+}
+
+sreg_t set_callbacks(struct xen_cpu *cpu, ureg_t *args)
+{
+#ifdef CONFIG_64BIT
+    callback_setup(CALLBACKTYPE_event,    &args[0]);
+    callback_setup(CALLBACKTYPE_failsafe, &args[1]);
+    callback_setup(CALLBACKTYPE_syscall,  &args[2]);
+#else
+    xen_callback_t cb;
+
+    cb.cs  = args[0];
+    cb.eip = args[1];
+    callback_setup(CALLBACKTYPE_event, &cb);
+    cb.cs  = args[2];
+    cb.eip = args[3];
+    callback_setup(CALLBACKTYPE_failsafe, &cb);
+#endif
+    return 0;
+}
+
+sreg_t callback_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    struct callback_register cb;
+
+    memcpy_pf(&cb, (void*)(args[1]), sizeof(cb));
+    if (cb.type >= 8) {
+        return -EINVAL;
+    }
+
+    switch (args[0]) {
+    case CALLBACKOP_register:
+        if (callback_setup(cb.type, &cb.address)) {
+            return -EINVAL;
+        }
+        break;
+    case CALLBACKOP_unregister:
+        callback_clear(cb.type);
+        break;
+    default:
+        printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, (int)args[0]);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+void guest_gdt_copy_page(struct descriptor_32 *src,
+                         struct descriptor_32 *dst)
+{
+    struct descriptor_32 tmp;
+    int e;
+
+    for (e = 0; e < 512; e++) {
+        tmp = src[e];
+        fix_desc(&tmp);
+        dst[e] = tmp;
+    }
+}
+
+int guest_gdt_init(struct xen_cpu *cpu, uint32_t entries, ureg_t *mfns)
+{
+    uint32_t pages = (entries + 511) / 512;
+    struct descriptor_32 *src, *dst;
+    uint32_t p;
+
+    for (p = 0; p < pages; p++) {
+        cpu->gdt_mfns[p] = mfns[p];
+        src = map_page(frame_to_addr(mfns[p]));
+        dst = cpu->gdt + p * 512;
+        guest_gdt_copy_page(src, dst);
+        free_page(src);
+    }
+    return 0;
+}
+
+sreg_t set_gdt(struct xen_cpu *cpu, ureg_t *args)
+{
+    ureg_t mfns[16];
+    uint32_t entries = args[1];
+
+    if (memcpy_pf(mfns, (void*)(args[0]), sizeof(mfns))) {
+        return -EFAULT;
+    }
+    if (entries > (0xe000 >> 3)) {
+        return -EINVAL;
+    }
+
+    return guest_gdt_init(cpu, entries, mfns);
+}
+
+sreg_t vcpu_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    if (args[1] >= vmconf.nr_cpus) {
+        return -EINVAL;
+    }
+
+    switch (args[0]) {
+    case VCPUOP_register_runstate_memory_area:
+        /* FIXME */
+        return 0;
+
+    case VCPUOP_is_up:
+    {
+        struct xen_cpu *vcpu;
+
+        vcpu = cpu_find(args[1]);
+        return vcpu->online;
+    }
+
+    case VCPUOP_set_periodic_timer:
+    {
+        struct vcpu_set_periodic_timer ticks;
+
+        if (memcpy_pf(&ticks, (void*)args[2], sizeof(ticks))) {
+            return -EFAULT;
+        }
+
+        cpu->periodic = ticks.period_ns;
+        printk(1, "%s/%d: periodic %" PRId64 " (%d Hz)\n", __FUNCTION__,
+               cpu->id, cpu->periodic,
+               1000000000 / (unsigned int)cpu->periodic);
+        lapic_timer(cpu);
+        break;
+    }
+    case VCPUOP_stop_periodic_timer:
+        cpu->periodic = 0;
+        printk(1, "%s/%d: periodic off\n", __FUNCTION__, cpu->id);
+        lapic_timer(cpu);
+        break;
+    case VCPUOP_set_singleshot_timer:
+    {
+        struct vcpu_set_singleshot_timer single;
+
+        if (memcpy_pf(&single, (void*)args[2], sizeof(single))) {
+            return -EFAULT;
+        }
+        cpu->oneshot = single.timeout_abs_ns;
+        printk(3, "%s/%d: oneshot %" PRId64 "\n", __FUNCTION__, cpu->id,
+               cpu->oneshot);
+        lapic_timer(cpu);
+        break;
+    }
+    case VCPUOP_stop_singleshot_timer:
+        cpu->oneshot = 0;
+        printk(1, "%s/%d: oneshot off\n", __FUNCTION__, cpu->id);
+        lapic_timer(cpu);
+        break;
+    case VCPUOP_initialise:
+    {
+        struct xen_cpu *vcpu;
+
+        printk(0, "%s: initialise cpu %d\n", __FUNCTION__, (int)args[1]);
+        vcpu = cpu_find(args[1]);
+        if (!vcpu->init_ctxt) {
+            vcpu->init_ctxt = get_memory(sizeof(*(vcpu->init_ctxt)), "init_ctxt");
+        }
+        if (memcpy_pf(vcpu->init_ctxt, (void*)args[2],
+                           sizeof(*(vcpu->init_ctxt)))) {
+            return -EFAULT;
+        }
+        break;
+    }
+    case VCPUOP_up:
+    {
+        struct xen_cpu *vcpu;
+
+        printk(0, "%s: up cpu %d\n", __FUNCTION__, (int)args[1]);
+        vcpu = cpu_find(args[1]);
+        lapic_ipi_boot(cpu, vcpu);
+        break;
+    }
+    case VCPUOP_down:
+    {
+        return -ENOSYS;
+    }
+    case VCPUOP_register_vcpu_info:
+    {
+        struct vcpu_register_vcpu_info reg;
+        struct xen_cpu *vcpu;
+        struct vcpu_info *new_info;
+        uint64_t new_info_pa;
+
+        vcpu = cpu_find(args[1]);
+        if (memcpy_pf(&reg, (void*)args[2], sizeof(reg))) {
+            return -EFAULT;
+        }
+        if (reg.offset + sizeof(struct vcpu_info) > PAGE_SIZE) {
+            return -EINVAL;
+        }
+        if (vcpu->v.vcpu_page) {
+            return -EINVAL;
+        }
+
+        vcpu->v.vcpu_page = fixmap_page(cpu, frame_to_addr(reg.mfn));
+        new_info = vcpu->v.vcpu_page + reg.offset;
+        new_info_pa = frame_to_addr(reg.mfn) + reg.offset;
+        printk(1,"%s/%d: vcpu_info: mfn 0x%" PRIx64 ", offset 0x%x, pa %" PRIx64 " mapped to %p\n",
+               __FUNCTION__, vcpu->id, reg.mfn, reg.offset, new_info_pa, new_info);
+
+        memcpy(new_info, vcpu->v.vcpu_info, sizeof(struct vcpu_info));
+        vcpu->v.vcpu_info = new_info;
+        vcpu->v.vcpu_info_pa = new_info_pa;
+        pv_clock_sys(vcpu);
+        break;
+    }
+    default:
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+sreg_t set_timer_op(struct xen_cpu *cpu, ureg_t *args)
+{
+#ifdef CONFIG_64BIT
+    uint64_t time = args[0];
+#else
+    uint64_t time = args[0] | (uint64_t)args[1] << 32;
+#endif
+
+    cpu->oneshot = time;
+    lapic_timer(cpu);
+    return 0;
+}
+
+sreg_t event_channel_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    switch (args[0]) {
+    case EVTCHNOP_alloc_unbound:
+    {
+        struct evtchn_alloc_unbound alloc;
+        struct xen_cpu *vcpu;
+
+        if (memcpy_pf(&alloc, (void*)args[1], sizeof(alloc))) {
+            return -EFAULT;
+        }
+        if (alloc.dom != DOMID_SELF || alloc.remote_dom != 0) {
+            return -EINVAL;
+        }
+        alloc.port = evtchn_alloc(cpu->id);
+        vcpu = cpu_find(0);
+        evtchn_route_interdomain(vcpu, alloc.port, NULL);
+        if (memcpy_pf((void*)args[1], &alloc, sizeof(alloc))) {
+            return -EFAULT;
+        }
+        return 0;
+    }
+    case EVTCHNOP_bind_vcpu:
+    {
+        struct evtchn_bind_vcpu bind;
+        struct xen_cpu *vcpu;
+
+        if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) {
+            return -EFAULT;
+        }
+        vcpu = cpu_find(bind.vcpu);
+        if (evtchn_route_interdomain(vcpu, bind.port, NULL)) {
+            return -EINVAL;
+        }
+        return 0;
+    }
+    case EVTCHNOP_bind_virq:
+    {
+        struct evtchn_bind_virq bind;
+        struct xen_cpu *vcpu;
+
+        if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) {
+            return -EFAULT;
+        }
+        switch (bind.virq) {
+        case VIRQ_TIMER:
+            vcpu = cpu_find(bind.vcpu);
+            if (!vcpu->timerport) {
+                vcpu->timerport = evtchn_alloc(cpu->id);
+                evtchn_route_virq(vcpu, VIRQ_TIMER, vcpu->timerport, "timer");
+                if (cpu == vcpu) {
+                    lapic_timer(cpu);
+                }
+            }
+            bind.port = vcpu->timerport;
+            break;
+        default:
+            bind.port = evtchn_alloc(cpu->id);
+            break;
+        }
+        if (memcpy_pf((void*)args[1], &bind, sizeof(bind))) {
+            return -EFAULT;
+        }
+        return 0;
+    }
+    case EVTCHNOP_bind_ipi:
+    {
+        struct evtchn_bind_ipi bind;
+        struct xen_cpu *vcpu;
+
+        if (memcpy_pf(&bind, (void*)args[1], sizeof(bind))) {
+            return -EFAULT;
+        }
+        bind.port = evtchn_alloc(cpu->id);
+        vcpu = cpu_find(bind.vcpu);
+        evtchn_route_ipi(vcpu, bind.port);
+        if (memcpy_pf((void*)args[1], &bind, sizeof(bind))) {
+            return -EFAULT;
+        }
+        return 0;
+    }
+    case EVTCHNOP_bind_pirq:
+        return -EPERM;
+    case EVTCHNOP_send:
+    {
+        struct evtchn_send send;
+
+        if (memcpy_pf(&send, (void*)args[1], sizeof(send))) {
+            return -EFAULT;
+        }
+        if (evtchn_send(cpu, send.port)) {
+            /* handled internally */
+            return 0;
+        } else {
+            emudev_cmd(EMUDEV_CMD_EVTCHN_SEND, send.port);
+            return 0;
+        }
+    }
+    case EVTCHNOP_unmask:
+    {
+        struct evtchn_unmask unmask;
+
+        if (memcpy_pf(&unmask, (void*)args[1], sizeof(unmask))) {
+            return -EFAULT;
+        }
+        evtchn_unmask(cpu, unmask.port);
+        return 0;
+    }
+    case EVTCHNOP_close:
+    {
+        struct evtchn_close cl;
+
+        if (memcpy_pf(&cl, (void*)args[1], sizeof(cl))) {
+            return -EFAULT;
+        }
+        evtchn_close(cpu, cl.port);
+        return 0;
+    }
+    default:
+        return -ENOSYS;
+    }
+}
+
+sreg_t event_channel_op_compat(struct xen_cpu *cpu, ureg_t *args)
+{
+    struct evtchn_op op;
+    ureg_t nargs[2];
+
+    if (memcpy_pf(&op, (void*)args[0], sizeof(op))) {
+        return -EFAULT;
+    }
+    nargs[0] = op.cmd;
+    nargs[1] = args[0] + offsetof(struct evtchn_op, u);
+    return event_channel_op(cpu, nargs);
+}
+
+sreg_t mmuext_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    struct mmuext_op *uops = (void*)args[0];
+    ureg_t count = args[1];
+#if 0
+    ureg_t *done = (void*)args[2];
+#endif
+    ureg_t dom   = args[3];
+    ureg_t cpumask;
+    int i;
+
+    if (dom != DOMID_SELF) {
+        printk(1, "%s: foreigndom not supported\n", __FUNCTION__);
+        return -ENOSYS;
+    }
+
+    for (i = 0; i < count; i++, uops++) {
+        switch (uops->cmd) {
+        case MMUEXT_PIN_L1_TABLE:
+        case MMUEXT_PIN_L2_TABLE:
+        case MMUEXT_PIN_L3_TABLE:
+        case MMUEXT_PIN_L4_TABLE:
+            /* ignore */
+            break;
+        case MMUEXT_UNPIN_TABLE:
+            /* KVM_MMU_OP_RELEASE_PT ??? */
+            break;
+        case MMUEXT_INVLPG_LOCAL:
+            flush_tlb_addr(uops->arg1.linear_addr);
+            break;
+        case MMUEXT_INVLPG_MULTI:
+            if (memcpy_pf(&cpumask, (void*)uops->arg2.vcpumask.p,
+                sizeof(cpumask))) {
+                return -EFAULT;
+            }
+            flush_tlb_addr(uops->arg1.linear_addr);
+            flush_tlb_remote(cpu, cpumask, uops->arg1.linear_addr);
+            break;
+        case MMUEXT_INVLPG_ALL:
+            flush_tlb_addr(uops->arg1.linear_addr);
+            flush_tlb_remote(cpu, vminfo.vcpus_online, uops->arg1.linear_addr);
+            break;
+        case MMUEXT_TLB_FLUSH_LOCAL:
+            flush_tlb();
+            break;
+        case MMUEXT_TLB_FLUSH_MULTI:
+            flush_tlb();
+            flush_tlb_remote(cpu, cpumask, 0);
+            break;
+        case MMUEXT_TLB_FLUSH_ALL:
+            flush_tlb();
+            flush_tlb_remote(cpu, vminfo.vcpus_online, 0);
+            break;
+
+        case MMUEXT_SET_LDT:
+            printk(2, "%s: SET_LDT (va %lx, nr %d)\n", __FUNCTION__,
+                   uops->arg1.linear_addr, uops->arg2.nr_ents);
+            if (uops->arg2.nr_ents) {
+                struct descriptor_32 *gdt = cpu->gdt;
+                int idx = ldt(cpu);
+                gdt[ idx +0 ] = mkdesc32(uops->arg1.linear_addr & 0xffffffff,
+                                         uops->arg2.nr_ents * 8 - 1,
+                                         0x82, 0);
+#ifdef CONFIG_64BIT
+                gdt[ idx+1 ].a = uops->arg1.linear_addr >> 32;
+                gdt[ idx+1 ].b = 0;
+#endif
+                lldt(idx << 3);
+            } else {
+                lldt(0);
+            }
+            break;
+
+        case MMUEXT_NEW_BASEPTR:
+            update_emu_mappings(uops->arg1.mfn);
+            pv_write_cr3(cpu, uops->arg1.mfn);
+            break;
+#ifdef CONFIG_64BIT
+        case MMUEXT_NEW_USER_BASEPTR:
+            update_emu_mappings(uops->arg1.mfn);
+            cpu->user_cr3_mfn = uops->arg1.mfn;
+            break;
+#endif
+        default:
+            printk(0, "%s: FIXME: unknown %d\n", __FUNCTION__, uops->cmd);
+            return -ENOSYS;
+        }
+    }
+    return 0;
+}
+
+sreg_t physdev_op(struct xen_cpu *cpu, ureg_t *args)
+{
+    switch (args[0]) {
+    case PHYSDEVOP_set_iopl:
+    {
+        struct physdev_set_iopl iopl;
+
+        if (memcpy_pf(&iopl, (void*)args[1], sizeof(iopl))) {
+            return -EFAULT;
+        }
+        printk(2, "%s: set iopl: %d\n", __FUNCTION__, iopl.iopl);
+        cpu->iopl = iopl.iopl;
+        return 0;
+    }
+    case PHYSDEVOP_set_iobitmap:
+    {
+        struct physdev_set_iobitmap iobitmap;
+        if (memcpy_pf(&iobitmap, (void*)args[1], sizeof(iobitmap))) {
+            return -EFAULT;
+        }
+        printk(2, "%s: set iobitmap: %d\n", __FUNCTION__, iobitmap.nr_ports);
+        cpu->nr_ports = iobitmap.nr_ports;
+        return 0;
+    }
+    default:
+        printk(1, "%s: not implemented (#%d)\n", __FUNCTION__, (int)args[0]);
+        return -EPERM;
+    }
+}
+
+static ureg_t read_debugreg(int nr)
+{
+    ureg_t val;
+    switch (nr) {
+    case 0: asm volatile("mov %%db0,%0" : "=r" (val)); break;
+    case 1: asm volatile("mov %%db1,%0" : "=r" (val)); break;
+    case 2: asm volatile("mov %%db2,%0" : "=r" (val)); break;
+    case 3: asm volatile("mov %%db3,%0" : "=r" (val)); break;
+    case 4: asm volatile("mov %%db4,%0" : "=r" (val)); break;
+    case 5: asm volatile("mov %%db5,%0" : "=r" (val)); break;
+    case 6: asm volatile("mov %%db6,%0" : "=r" (val)); break;
+    case 7: asm volatile("mov %%db7,%0" : "=r" (val)); break;
+    default: val = -EINVAL; break;
+    }
+    return val;
+}
+
+static void write_debugreg(int nr, ureg_t val)
+{
+    switch (nr) {
+    case 0: asm volatile("mov %0,%%db0" : : "r" (val) : "memory"); break;
+    case 1: asm volatile("mov %0,%%db1" : : "r" (val) : "memory"); break;
+    case 2: asm volatile("mov %0,%%db2" : : "r" (val) : "memory"); break;
+    case 3: asm volatile("mov %0,%%db3" : : "r" (val) : "memory"); break;
+    case 4: asm volatile("mov %0,%%db4" : : "r" (val) : "memory"); break;
+    case 5: asm volatile("mov %0,%%db5" : : "r" (val) : "memory"); break;
+    case 6: asm volatile("mov %0,%%db6" : : "r" (val) : "memory"); break;
+    case 7: asm volatile("mov %0,%%db7" : : "r" (val) : "memory"); break;
+    }
+}
+
+sreg_t get_debugreg(struct xen_cpu *cpu, ureg_t *args)
+{
+    ureg_t val = read_debugreg(args[0]);
+    printk(2, "%s: %" PRIxREG" = %" PRIxREG "\n", __FUNCTION__, args[0], val);
+    return val;
+}
+
+sreg_t set_debugreg(struct xen_cpu *cpu, ureg_t *args)
+{
+    int nr = args[0];
+    ureg_t val = args[1];
+
+    switch (nr) {
+    case 0:
+    case 1:
+    case 2:
+    case 3:
+        /* TODO: check address */
+        break;
+    case 6:
+        val &= 0xffffefff; /* reserved bits => 0 */
+        val |= 0xffff0ff0; /* reserved bits => 1 */
+        break;
+    case 7:
+        if (val) {
+            val &= 0xffff27ff; /* reserved bits => 0 */
+            val |= 0x00000400; /* reserved bits => 1 */
+        }
+        break;
+    default:
+        return -EINVAL;
+    }
+
+    printk(0, "%s: %d = %" PRIxREG "\n", __FUNCTION__, nr, val);
+    write_debugreg(nr,val);
+    return 0;
+}
+