diff mbox

[18/40] xenner: kernel: Main

Message ID 1288623713-28062-19-git-send-email-agraf@suse.de
State New
Headers show

Commit Message

Alexander Graf Nov. 1, 2010, 3:01 p.m. UTC
This patch adds the platform agnostic piece of xenner's main loop.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/xenner/xenner-main.c |  875 ++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 875 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/xenner/xenner-main.c
diff mbox

Patch

diff --git a/pc-bios/xenner/xenner-main.c b/pc-bios/xenner/xenner-main.c
new file mode 100644
index 0000000..c63f447
--- /dev/null
+++ b/pc-bios/xenner/xenner-main.c
@@ -0,0 +1,875 @@ 
+/*
+ *  Copyright (C) Red Hat 2007
+ *  Copyright (C) Novell Inc. 2010
+ *
+ *  Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ *             Alexander Graf <agraf@suse.de>
+ *
+ *  Xenner generic main functions
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config-host.h"
+
+static void set_up_context(void *ctxt, unsigned long boot_cr3,
+                           unsigned long init_pt_len);
+static void guest_hypercall_page(struct xen_cpu *cpu);
+
+void *memset(void *s, int c, size_t n)
+{
+    uint8_t *p = s;
+    uint32_t i;
+
+    for (i = 0; i < n; i++) {
+        p[i] = c;
+    }
+    return s;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+    const uint8_t *s = src;
+    uint8_t *d = dest;
+    uint32_t i;
+
+    for (i = 0; i < n; i++) {
+        d[i] = s[i];
+    }
+    return dest;
+}
+
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+    const uint8_t *a = s1;
+    const uint8_t *b = s2;
+    int i;
+
+    for (i = 0; i < n; i++) {
+        if (a[i] == b[i]) {
+            continue;
+        }
+        if (a[i] < b[i]) {
+            return -1;
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static void print_gpf_info(int level, struct xen_cpu *cpu, struct regs *regs)
+{
+    uint8_t *code = (void*)regs->rip;
+
+    printk(level, "%s: vcpu %d, index 0x%x%s%s%s, "
+           "rflags %" PRIxREG ", cs:rip %" PRIxREG ":%" PRIxREG " "
+           "-> 0x%02x, 0x%02x, 0x%02x, 0x%02x,  0x%02x, 0x%02x, 0x%02x, 0x%02x\n",
+           __FUNCTION__, cpu->id, (int)(regs->error >> 3),
+           (regs->error & 0x04) ? ", TI"  : "",
+           (regs->error & 0x02) ? ", IDT" : "",
+           (regs->error & 0x01) ? ", EXT" : "",
+           regs->rflags, regs->cs, regs->rip,
+           code[0], code[1], code[2], code[3],
+           code[4], code[5], code[6], code[7]);
+}
+
+static void print_page_fault_info(int level, struct xen_cpu *cpu, struct regs *regs, ureg_t cr2)
+{
+    printk(level, "%s:%s%s%s%s%s%s, rip %" PRIxREG ", cr2 %" PRIxREG ", vcpu %d\n",
+           __FUNCTION__,
+#ifdef CONFIG_64BIT
+           is_kernel(cpu) ? " [kernel-mode]" : " [user-mode]",
+#else
+           "",
+#endif
+           regs->error & 0x01 ? " preset"  : " nopage",
+           regs->error & 0x02 ? " write"   : " read",
+           regs->error & 0x04 ? " user"    : " kernel",
+           regs->error & 0x08 ? " reserved-bit"  : "",
+           regs->error & 0x10 ? " instr-fetch"   : "",
+           regs->rip, cr2, cpu->id);
+}
+
+static int fixup_extable(struct regs *regs)
+{
+    uintptr_t *ptr;
+
+    for (ptr = _estart; ptr < _estop; ptr += 2) {
+        if (ptr[0] != regs->rip) {
+            continue;
+        }
+        printk(2, "fixup: %" PRIxPTR " -> %" PRIxPTR "\n", ptr[0], ptr[1]);
+        regs->rip = ptr[1];
+        vminfo.faults[XEN_FAULT_PAGE_FAULT_FIX_EXTAB]++;
+        return 1;
+    }
+    return 0;
+}
+
+int panic(const char *message, struct regs *regs)
+{
+    printk(0, "panic: %s\n", message);
+    if (regs) {
+        print_state(regs);
+    }
+    emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, -1);
+    return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+#ifdef CONFIG_64BIT
+# define DR "%016" PRIxREG
+# define DC "%08"  PRIxREG
+# define DS "%04"  PRIxREG
+#else
+# define DR "%08"  PRIxREG
+# define DC "%08"  PRIxREG
+# define DS "%04"  PRIxREG
+#endif
+
+void print_registers(int level, struct regs *regs)
+{
+    ureg_t ds,es,fs,gs,cr0,cr2,cr3,cr4;
+
+    asm volatile("mov %%ds, %[ds]  \n"
+                 "mov %%es, %[es]  \n"
+                 "mov %%fs, %[fs]  \n"
+                 "mov %%gs, %[gs]  \n"
+                 : [ds] "=r" (ds),
+                   [es] "=r" (es),
+                   [fs] "=r" (fs),
+                   [gs] "=r" (gs)
+                 : /* no inputs */);
+    asm volatile("mov %%cr0, %[cr0]  \n"
+                 "mov %%cr2, %[cr2]  \n"
+                 "mov %%cr3, %[cr3]  \n"
+                 "mov %%cr4, %[cr4]  \n"
+                 : [cr0] "=r" (cr0),
+                   [cr2] "=r" (cr2),
+                   [cr3] "=r" (cr3),
+                   [cr4] "=r" (cr4)
+                 : /* no inputs */);
+
+    printk(level, "printing registers\n");
+    printk(level, "  code   cs:rip " DS ":" DR "\n", regs->cs, regs->rip);
+    printk(level, "  stack  ss:rsp " DS ":" DR "\n", regs->ss, regs->rsp);
+    printk(level, "  rax " DR " rbx " DR " rcx " DR " rdx " DR "\n",
+           regs->rax, regs->rbx, regs->rcx, regs->rdx);
+    printk(level, "  rsi " DR " rdi " DR " rsp " DR " rbp " DR "\n",
+           regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+#ifdef CONFIG_64BIT
+    printk(level, "  r8  " DR " r9  " DR " r10 " DR " r11 " DR "\n",
+           regs->r8, regs->r9, regs->r10, regs->r11);
+    printk(level, "  r12 " DR " r13 " DR " r14 " DR " r15 " DR "\n",
+           regs->r12, regs->r13, regs->r14, regs->r15);
+#endif
+    printk(level, "  cs " DS " ds " DS " es " DS " fs " DS " gs " DS " ss " DS "\n",
+           regs->cs, ds, es, fs, gs, regs->ss);
+    printk(level, "  cr0 " DC " cr2 " DC " cr3 " DC " cr4 " DC " rflags " DC "\n",
+           cr0, cr2, cr3, cr4, regs->rflags);
+    print_bits(level, "  cr0", cr0, cr0, cr0_bits);
+    print_bits(level, "  cr4", cr4, cr4, cr4_bits);
+    print_bits(level, "  rflags", regs->rflags, regs->rflags, rflags_bits);
+
+}
+
+void print_stack(int level, ureg_t rsp)
+{
+    ureg_t max;
+
+    max = ((rsp + PAGE_SIZE) & PAGE_MASK) - sizeof(ureg_t);
+    printk(level, "printing stack " DR " - " DR "\n", rsp, max);
+    while (rsp <= max) {
+        printk(level, "  " DR ": " DR "\n", rsp, *((ureg_t*)rsp));
+        rsp += sizeof(ureg_t);
+    }
+}
+
+void print_state(struct regs *regs)
+{
+    print_registers(0, regs);
+    print_stack(0, regs->rsp);
+}
+
+#undef DR
+
+/* --------------------------------------------------------------------- */
+
+static struct descriptor_32 mkdesc(const struct kvm_segment *seg)
+{
+    struct descriptor_32 desc;
+    int shift = 0;
+
+    shift  = seg->g ? 12 : 0;
+    desc.a = (seg->limit >> shift) & 0xffff;
+    desc.b = (seg->limit >> shift) & 0x000f0000;
+
+    desc.a |= (seg->base & 0xffff) << 16;
+    desc.b |= seg->base & 0xff000000;
+    desc.b |= (seg->base & 0xff0000) >> 16;
+    desc.b |= (seg->type & 0x0f) << 8;
+    desc.b |= (seg->dpl & 0x03) << 13;
+
+    if (seg->s)       desc.b |= (1 << 12);
+    if (seg->present) desc.b |= (1 << 15);
+    if (seg->avl)     desc.b |= (1 << 20);
+    if (seg->l)       desc.b |= (1 << 21);
+    if (seg->db)      desc.b |= (1 << 22);
+    if (seg->g)       desc.b |= (1 << 23);
+
+    return desc;
+}
+
+static inline void gdt_set(struct descriptor_32 *gdt, const struct kvm_segment *seg)
+{
+    gdt[ seg->selector >> 3 ] = mkdesc(seg);
+}
+
+static void cr_init(struct xen_cpu *cpu)
+{
+    ureg_t cr0, cr4;
+
+    printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+    cr0  = read_cr0();
+    cr0 |= X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | X86_CR0_NE | \
+        X86_CR0_WP | X86_CR0_AM | X86_CR0_PG;
+    cr0 &= ~(X86_CR0_TS|X86_CR0_CD|X86_CR0_NW);
+    print_bits(2, "cr0", read_cr0(), cr0, cr0_bits);
+    write_cr0(cr0);
+
+    cr4  = read_cr4();
+    cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+    print_bits(2, "cr4", read_cr4(), cr4, cr4_bits);
+    write_cr4(cr4);
+}
+
+static void stack_init(struct xen_cpu *cpu)
+{
+    uintptr_t *ptr;
+    int pages;
+
+    if (cpu->stack_low) {
+        return;
+    }
+
+    /* allocate stack */
+    pages = (boot_stack_high - boot_stack_low + PAGE_SIZE -1) / PAGE_SIZE;
+    cpu->stack_low  = get_pages(pages, "stack");
+    cpu->stack_high = cpu->stack_low + pages * PAGE_SIZE;
+
+    /* set per-cpu data pointer */
+    ptr = STACK_PTR(cpu, cpu_ptr);
+    *ptr = (uintptr_t)cpu;
+
+    /* set per-cpu data pointer for boot stack */
+    if (!cpu->id) {
+        ptr = (void*)(&cpu_ptr);
+        *ptr = (uintptr_t)cpu;
+    }
+
+#ifdef CONFIG_64BIT
+    /* copy and setup syscall trampoline from boot stack */
+    memcpy(STACK_PTR(cpu, trampoline_start),
+           trampoline_start, trampoline_stop - trampoline_start);
+    ptr = STACK_PTR(cpu, trampoline_patch);
+    *ptr = (uintptr_t)trampoline_syscall;
+
+    /* allocate irq stack */
+    cpu->irqstack_low  = get_pages(pages, "irqstack");
+    cpu->irqstack_high = cpu->irqstack_low + PAGE_SIZE;
+
+    /* set per-cpu data pointer */
+    ptr = IRQSTACK_PTR(cpu, cpu_ptr);
+    *ptr = (uintptr_t)cpu;
+#endif
+}
+
+void gdt_load(struct xen_cpu *cpu)
+{
+    struct {
+        uint16_t  len;
+        uintptr_t ptr;
+    } __attribute__((packed)) gdtp = {
+        .len = (16 * PAGE_SIZE)-1,
+        .ptr = (uintptr_t)cpu->gdt,
+    };
+
+    asm volatile("lgdt %0" : : "m" (gdtp) : "memory");
+}
+
+void idt_load(void)
+{
+    struct {
+        uint16_t  len;
+        uintptr_t ptr;
+    } __attribute__((packed)) idtp = {
+        .len = sizeof(xen_idt)-1,
+        .ptr = (uintptr_t)xen_idt,
+    };
+
+    asm volatile("lidt %0" : : "m" (idtp) : "memory");
+}
+
+void guest_cpu_init(struct xen_cpu *cpu)
+{
+    struct vcpu_guest_context *ctxt = cpu->init_ctxt;
+    ureg_t mfns[16];
+    int i;
+
+    if (ctxt->gdt_ents) {
+        for (i = 0; i < 16; i++) {
+            mfns[i] = ctxt->gdt_frames[i];
+        }
+        guest_gdt_init(cpu, ctxt->gdt_ents, mfns);
+    }
+
+    ctxt->kernel_ss    = fix_sel(ctxt->kernel_ss);
+    ctxt->user_regs.cs = fix_sel(ctxt->user_regs.cs);
+    ctxt->user_regs.ds = fix_sel(ctxt->user_regs.ds);
+    ctxt->user_regs.es = fix_sel(ctxt->user_regs.es);
+    ctxt->user_regs.fs = fix_sel(ctxt->user_regs.fs);
+    ctxt->user_regs.gs = fix_sel(ctxt->user_regs.gs);
+    ctxt->user_regs.ss = fix_sel(ctxt->user_regs.ss);
+
+    cpu->kernel_ss = ctxt->kernel_ss;
+    cpu->kernel_sp = ctxt->kernel_sp;
+}
+
+static uint64_t maddr_to_paddr(uint64_t _maddr)
+{
+    unsigned long virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+    uint64_t maddr = _maddr;
+    uint64_t mfn = addr_to_frame(maddr);
+
+    /* M2P */
+    if ((mfn >= vmconf.mfn_m2p) && (mfn < (vmconf.mfn_m2p + vmconf.pg_m2p))) {
+        return XEN_M2P + maddr - frame_to_addr(vmconf.mfn_m2p);
+    }
+
+    /* xenner */
+    if (maddr < frame_to_addr(vmconf.mfn_guest)) {
+        return (uintptr_t)_vstart + maddr;
+    }
+
+    /* guest */
+    maddr -= frame_to_addr(vmconf.mfn_guest);
+    maddr += virt_base;
+
+    return maddr;
+}
+
+static void *pfn_to_ptr(xen_pfn_t pfn)
+{
+    unsigned long addr = frame_to_addr(pfn);
+
+    addr += frame_to_addr(vmconf.mfn_guest);
+    return map_page(addr);
+}
+
+static void guest_start_info(struct xen_cpu *cpu, struct regs *regs,
+                             unsigned long init_pt_len, unsigned long boot_cr3)
+{
+    struct start_info *start_info;
+    uint64_t i;
+    uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+    uint64_t initrd_len;
+    uint64_t cmdline_pfn = emudev_get(EMUDEV_CONF_PFN_CMDLINE, 0);
+    unsigned long *mfn_list;
+    uint64_t mfn_list_pfn = emudev_get(EMUDEV_CONF_PFN_MFN_LIST, 0);
+    char cap_ver[] = CAP_VERSION_STRING;
+    char *cmdline = NULL;
+
+    start_info = pfn_to_ptr(emudev_get(EMUDEV_CONF_PFN_START_INFO, 0));
+
+    printk(1, "%s: called\n", __FUNCTION__);
+
+    memset(start_info, 0, sizeof(*start_info));
+    memcpy(start_info->magic, cap_ver, sizeof(cap_ver));
+    start_info->magic[sizeof(start_info->magic) - 1] = '\0';
+
+    start_info->shared_info = EMU_PA(&shared_info);
+    start_info->pt_base = maddr_to_paddr(boot_cr3);
+    start_info->nr_pt_frames = addr_to_frame(init_pt_len + (PAGE_SIZE - 1));
+    start_info->shared_info = (unsigned long)EMU_PA(&shared_info);
+    start_info->nr_pages = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0);
+    start_info->store_mfn = emudev_get(EMUDEV_CONF_MFN_XENSTORE, 0);
+    start_info->store_evtchn = emudev_get(EMUDEV_CONF_EVTCH_XENSTORE, 0);
+    start_info->console.domU.mfn = emudev_get(EMUDEV_CONF_MFN_CONSOLE, 0);
+    start_info->console.domU.evtchn = emudev_get(EMUDEV_CONF_EVTCH_CONSOLE, 0);
+
+    initrd_len = emudev_get(EMUDEV_CONF_INITRD_LEN, 0);
+    if (initrd_len) {
+        start_info->mod_start = virt_base +
+            frame_to_addr(emudev_get(EMUDEV_CONF_PFN_INITRD, 0));
+        start_info->mod_len = initrd_len;
+    }
+
+    if (cmdline_pfn) {
+        cmdline = pfn_to_ptr(cmdline_pfn);
+
+        memcpy(start_info->cmd_line, pfn_to_ptr(cmdline_pfn),
+               MAX_GUEST_CMDLINE);
+        printk(1, "guest cmdline: %s\n", start_info->cmd_line);
+    }
+
+    /* set up m2p page table */
+    for (i = 0; i < vmconf.pg_total; i++) {
+        m2p[i + vmconf.mfn_guest] = i;
+    }
+
+    /* fill mfn list */
+    start_info->mfn_list = virt_base + frame_to_addr(mfn_list_pfn);
+    mfn_list = (void*)start_info->mfn_list;
+
+    for (i = 0; i < start_info->nr_pages; i++) {
+        mfn_list[i] = i + vmconf.mfn_guest;
+    }
+
+    regs->rsi = (unsigned long)start_info;
+
+    free_page(start_info);
+    if (cmdline) {
+        free_page(cmdline);
+    }
+}
+
+static void cpu_set_cr3(struct xen_cpu *cpu, unsigned long boot_cr3)
+{
+#ifdef CONFIG_64BIT
+    cpu->user_mode = 0;
+    cpu->kernel_cr3_mfn = addr_to_frame(boot_cr3);
+#else
+    cpu->cr3_mfn = addr_to_frame(boot_cr3);
+#endif
+}
+
+static uint64_t count_pgtables(uint64_t max_pfn)
+{
+    uint64_t r = max_pfn;
+    uint64_t fourmb = addr_to_frame(4 * 1024 * 1024);
+
+    /* XXX this should become a real calculation, for now assume we need max
+     *     200 page table pages */
+    r += 200;
+
+    /* pad to 4mb */
+    r = (r + fourmb - 1) & ~(fourmb - 1);
+
+    return r;
+}
+
+/*
+ * Maps the guest into its own virtual address space in its own page table and
+ * returns the length and maddr of that new page table
+ */
+static unsigned long map_guest(unsigned long *boot_cr3)
+{
+    uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+    struct xen_cpu tmp_cpu;
+    uint64_t max_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0);
+    unsigned long init_pt_len;
+
+    max_pfn += count_pgtables(max_pfn);
+
+    /* create initial page table that maps the guest virt_base linearly
+       to host physical memory. This has to happen in guest visible mem */
+    switch_heap(HEAP_HIGH);
+
+    *boot_cr3 = (unsigned long)EMU_PA(get_pages(1, "pt root"));
+    cpu_set_cr3(&tmp_cpu, *boot_cr3);
+    printk(3, "init guest pt map mfn %lx len %lx\n", (unsigned long)vmconf.mfn_guest,
+           (unsigned long)max_pfn);
+
+    map_region(&tmp_cpu, virt_base, EMU_PGFLAGS, vmconf.mfn_guest, max_pfn);
+
+    /* save the pt len for start_info */
+    init_pt_len = heap_size();
+
+    switch_heap(HEAP_EMU);
+
+    return init_pt_len;
+}
+
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_cpu *cpu_alloc(int id)
+{
+    struct xen_cpu *cpu;
+    ureg_t cr3;
+
+    printk(1, "%s: cpu %d\n", __FUNCTION__, id);
+
+    cpu = get_memory(sizeof(*cpu), "per-cpu data");
+    cpu->id = id;
+    cpu->periodic = XEN_DEFAULT_PERIOD;
+    cpu->v.vcpu_info = (void*)&shared_info.vcpu_info[id];
+    cpu->v.vcpu_info_pa = EMU_PA(cpu->v.vcpu_info);
+    guest_cli(cpu);
+    list_add_tail(&cpu->next, &cpus);
+
+    asm volatile("mov %%cr3,%0" : "=r" (cr3));
+    pv_write_cr3(cpu, addr_to_frame(cr3));
+
+    gdt_init(cpu);
+    stack_init(cpu);
+    tss_init(cpu);
+    return cpu;
+}
+
+struct xen_cpu *cpu_find(int id)
+{
+    struct list_head *item;
+    struct xen_cpu *cpu;
+
+    list_for_each(item, &cpus) {
+        cpu = list_entry(item, struct xen_cpu, next);
+        if (cpu->id == id) {
+            return cpu;
+        }
+    }
+    return cpu_alloc(id);
+}
+
+static void cpu_init(struct xen_cpu *cpu)
+{
+    printk(1, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+    gdt_load(cpu);
+    ltr(tss(cpu) << 3);
+    idt_load();
+    cr_init(cpu);
+    msrs_init(cpu);
+    pv_init(cpu);
+
+    vminfo.vcpus_online  |= (1 << cpu->id);
+    vminfo.vcpus_running |= (1 << cpu->id);
+    vminfo.vcpus++;
+    cpu->online = 1;
+}
+
+static void userspace_config(void)
+{
+    uint32_t pfn;
+    int i;
+
+    /* read config */
+    vmconf.debug_level = emudev_get(EMUDEV_CONF_DEBUG_LEVEL, 0);
+    vmconf.mfn_emu     = emudev_get(EMUDEV_CONF_EMU_START_PFN, 0);
+    vmconf.pg_emu      = emudev_get(EMUDEV_CONF_EMU_PAGE_COUNT, 0);
+    vmconf.mfn_m2p     = emudev_get(EMUDEV_CONF_M2P_START_PFN, 0);
+    vmconf.pg_m2p      = emudev_get(EMUDEV_CONF_M2P_PAGE_COUNT, 0);
+    vmconf.mfn_guest   = emudev_get(EMUDEV_CONF_GUEST_START_PFN, 0);
+    vmconf.pg_guest    = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0);
+    vmconf.pg_total    = emudev_get(EMUDEV_CONF_TOTAL_PAGE_COUNT, 0);
+    vmconf.nr_cpus     = emudev_get(EMUDEV_CONF_NR_VCPUS, 0);
+
+    /* write config */
+    pfn = addr_to_frame(EMU_PA(&boot_ctxt));
+    emudev_set(EMUDEV_CONF_BOOT_CTXT_PFN, 0, pfn);
+    pfn = addr_to_frame(EMU_PA(&vminfo));
+    emudev_set(EMUDEV_CONF_VMINFO_PFN, 0, pfn);
+    pfn = addr_to_frame(EMU_PA(&grant_table));
+    for (i = 0; i < GRANT_FRAMES_MAX; i++)
+        emudev_set(EMUDEV_CONF_GRANT_TABLE_PFNS, i, pfn+i);
+
+    /* commands */
+    emudev_cmd(EMUDEV_CMD_CONFIGURATION_DONE, 0);
+}
+
+/* --------------------------------------------------------------------- */
+/* called from assembler                                                 */
+
+asmlinkage void do_boot(struct regs *regs)
+{
+    struct xen_cpu *cpu;
+    struct xen_cpu boot_cpu;
+    unsigned long init_pt_len, boot_cr3;
+
+    printk(0, "this is %s (qemu-xenner %s), boot cpu #0\n", EMUNAME,
+                QEMU_VERSION QEMU_PKGVERSION);
+
+    userspace_config();
+    printk(1, "%s: configuration done\n", EMUNAME);
+
+    cpu_set_cr3(&boot_cpu, EMU_PA(emu_pgd));
+    paging_init(&boot_cpu);
+    init_pt_len = map_guest(&boot_cr3);
+
+    set_up_context(&boot_ctxt, boot_cr3, init_pt_len);
+
+    cpu = cpu_alloc(0);
+    cpu->init_ctxt = &boot_ctxt;
+    idt_init();
+    cpu_init(cpu);
+    printk(1, "%s: boot cpu setup done\n", EMUNAME);
+
+#ifdef CONFIG_64BIT
+    paging_init(cpu);
+#endif
+    paging_start(cpu);
+    printk(1, "%s: paging setup done\n", EMUNAME);
+
+    irq_init(cpu);
+    printk(1, "%s: irq setup done\n", EMUNAME);
+
+    guest_cpu_init(cpu);
+    guest_regs_init(cpu, regs);
+    guest_start_info(cpu, regs, init_pt_len, boot_cr3);
+    guest_hypercall_page(cpu);
+    printk(1, "%s: booting guest kernel (entry %" PRIxREG ":%" PRIxREG ") ...\n",
+           EMUNAME, regs->cs, regs->rip);
+}
+
+asmlinkage void do_boot_secondary(ureg_t id, struct regs *regs)
+{
+    struct xen_cpu *cpu;
+
+    printk(0, "this is cpu #%d\n", (int)id);
+    cpu = cpu_find(id);
+    cpu_init(cpu);
+    paging_start(cpu);
+    irq_init(cpu);
+#if 0
+    if (cpu->virq_to_vector[VIRQ_TIMER])
+        lapic_timer(cpu);
+#endif
+
+    guest_cpu_init(cpu);
+    guest_regs_init(cpu, regs);
+
+    print_registers(2, regs);
+    printk(1, "%s: secondary entry: %" PRIxREG ":%" PRIxREG ", jumping ...\n",
+           EMUNAME, regs->cs, regs->rip);
+}
+
+asmlinkage void do_illegal_instruction(struct regs *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+    int skip;
+
+    vminfo.faults[XEN_FAULT_ILLEGAL_INSTRUCTION]++;
+    if (context_is_emu(regs)) {
+        panic("ring0 (emu) illegal instruction", regs);
+    }
+    if (context_is_user(cpu, regs)) {
+        uint8_t *i = (void*)regs->rip;
+        printk(1, "user ill: at %p"
+               "  0x%02x, 0x%02x, 0x%02x, 0x%02x,"
+               "  0x%02x, 0x%02x, 0x%02x, 0x%02x\n",
+               i, i[0], i[1], i[2], i[3], i[4], i[5], i[6], i[7]);
+        bounce_trap(cpu, regs, 6, -1);
+        return;
+    }
+
+    skip = emulate(cpu, regs);
+    switch (skip) {
+    case -1: /* error */
+        panic("instruction emulation failed (ill)\n", regs);
+        break;
+    case 0:  /* bounce to guest */
+        bounce_trap(cpu, regs, 6, -1);
+        break;
+    default: /* handled */
+        regs->rip += skip;
+        break;
+    }
+}
+
+static int is_allowed_io(struct xen_cpu *cpu, struct regs *regs)
+{
+    uint8_t *code = (void*)regs->rip;
+    int pl;
+
+#ifdef CONFIG_64BIT
+    pl = context_is_user(cpu, regs) ? 3 : 1;
+#else
+    pl = regs->cs & 0x03;
+#endif
+
+    switch (*code) {
+    case 0xe4 ... 0xe7:
+    case 0xec ... 0xef:
+        /* I/O instructions */
+        if (pl <= cpu->iopl)
+            return 1; /* yes: by iopl */
+        if (cpu->nr_ports)
+            return 1; /* yes: by bitmap (FIXME: check port) */
+        break;
+    case 0xfa:
+    case 0xfb:
+        /* cli, sti */
+        if (pl <= cpu->iopl)
+            return 1; /* yes: by iopl */
+    }
+    return 0; /* no */
+}
+
+asmlinkage void do_general_protection(struct regs *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+    int skip;
+
+    vminfo.faults[XEN_FAULT_GENERAL_PROTECTION]++;
+    if (context_is_emu(regs)) {
+        if (fixup_extable(regs)) {
+            return;
+        }
+        print_gpf_info(0, cpu, regs);
+        panic("ring0 (emu) general protection fault", regs);
+    }
+    if (is_allowed_io(cpu, regs)) {
+        goto emulate;
+    }
+    if (context_is_user(cpu, regs)) {
+        vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++;
+        print_gpf_info(1, cpu, regs);
+        bounce_trap(cpu, regs, 13, -1);
+        return;
+    }
+
+    if (regs->error) {
+        print_gpf_info(0, cpu, regs);
+        panic("unhandled kernel gpf", regs);
+    }
+
+emulate:
+    skip = emulate(cpu, regs);
+    switch (skip) {
+    case -1: /* error */
+        print_gpf_info(0, cpu, regs);
+        panic("instruction emulation failed (gpf)", regs);
+        break;
+    case 0:  /* bounce to guest */
+        vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++;
+        bounce_trap(cpu, regs, 13, -1);
+        break;
+    default: /* handled */
+        vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_EMUINS]++;
+        regs->rip += skip;
+        evtchn_try_forward(cpu, regs); /* sti */
+        break;
+    }
+}
+
+asmlinkage void do_double_fault(struct regs *regs)
+{
+    panic("double fault", regs);
+}
+
+asmlinkage void do_guest_forward(struct regs *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+    const struct trapinfo *trap = NULL;
+
+    if (regs->trapno < sizeof(trapinfo)/sizeof(trapinfo[0])) {
+        trap = trapinfo + regs->trapno;
+    }
+    printk(trap ? trap->lvl : 0,
+           "%s: trap %d [%s], error 0x%" PRIxREG ","
+           " cs:rip %" PRIxREG ":%" PRIxREG ","
+           " forwarding to guest\n",
+           __FUNCTION__, (int)regs->trapno,
+           trap && trap->name ? trap->name : "-",
+           trap && trap->ec   ? regs->error : 0,
+           regs->cs, regs->rip);
+    bounce_trap(cpu, regs, regs->trapno, -1);
+}
+
+asmlinkage void do_lazy_fpu(struct regs *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+
+    vminfo.faults[XEN_FAULT_LAZY_FPU]++;
+    clts();
+    bounce_trap(cpu, regs, regs->trapno, -1);
+}
+
+asmlinkage void do_int1(struct regs *regs)
+{
+    if (context_is_emu(regs)) {
+        printk(0, "%s: emu context\n", __FUNCTION__);
+        print_registers(0, regs);
+        return;
+    }
+    do_guest_forward(regs);
+}
+
+asmlinkage void do_int3(struct regs *regs)
+{
+    if (context_is_emu(regs)) {
+        printk(0, "%s: emu context\n", __FUNCTION__);
+        print_registers(0, regs);
+        return;
+    }
+    do_guest_forward(regs);
+}
+
+/* --------------------------------------------------------------------- */
+
+static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
+static atomic_t   flush_cnt;
+static ureg_t     flush_addr;
+
+asmlinkage void do_smp_flush_tlb(struct regs *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+
+    lapic_eoi(cpu);
+    if (flush_addr) {
+        flush_tlb_addr(flush_addr);
+    } else {
+        flush_tlb();
+    }
+    atomic_dec(&flush_cnt);
+}
+
+void flush_tlb_remote(struct xen_cpu *cpu, ureg_t mask, ureg_t addr)
+{
+    int cpus;
+
+    mask &= ~(1 << cpu->id);
+    if (!mask) {
+        vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_NONE]++;
+        return;
+    }
+
+    /*
+     * we must be able to process ipi while waiting for the lock,
+     * otherwise we deadlock in case another cpu busy-waits for us
+     * doing the tlb flush.
+     */
+    sti();
+    spin_lock(&flush_lock);
+
+    cpus = vminfo.vcpus-1; /* FIXME: not using mask, sending to all */
+    flush_addr = addr;
+    if (flush_addr) {
+        vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_PAGE]++;
+    } else {
+        vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_ALL]++;
+    }
+
+    atomic_add(cpus, &flush_cnt);
+    lapic_ipi_flush_tlb(cpu);
+    while (atomic_read(&flush_cnt)) {
+        pause();
+    }
+
+    spin_unlock(&flush_lock);
+    cli();
+}