new file mode 100644
@@ -0,0 +1,875 @@
+/*
+ * Copyright (C) Red Hat 2007
+ * Copyright (C) Novell Inc. 2010
+ *
+ * Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ * Alexander Graf <agraf@suse.de>
+ *
+ * Xenner generic main functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config-host.h"
+
+static void set_up_context(void *ctxt, unsigned long boot_cr3,
+ unsigned long init_pt_len);
+static void guest_hypercall_page(struct xen_cpu *cpu);
+
+void *memset(void *s, int c, size_t n)
+{
+ uint8_t *p = s;
+ uint32_t i;
+
+ for (i = 0; i < n; i++) {
+ p[i] = c;
+ }
+ return s;
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ const uint8_t *s = src;
+ uint8_t *d = dest;
+ uint32_t i;
+
+ for (i = 0; i < n; i++) {
+ d[i] = s[i];
+ }
+ return dest;
+}
+
+int memcmp(const void *s1, const void *s2, size_t n)
+{
+ const uint8_t *a = s1;
+ const uint8_t *b = s2;
+ int i;
+
+ for (i = 0; i < n; i++) {
+ if (a[i] == b[i]) {
+ continue;
+ }
+ if (a[i] < b[i]) {
+ return -1;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static void print_gpf_info(int level, struct xen_cpu *cpu, struct regs *regs)
+{
+ uint8_t *code = (void*)regs->rip;
+
+ printk(level, "%s: vcpu %d, index 0x%x%s%s%s, "
+ "rflags %" PRIxREG ", cs:rip %" PRIxREG ":%" PRIxREG " "
+ "-> 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x, 0x%02x\n",
+ __FUNCTION__, cpu->id, (int)(regs->error >> 3),
+ (regs->error & 0x04) ? ", TI" : "",
+ (regs->error & 0x02) ? ", IDT" : "",
+ (regs->error & 0x01) ? ", EXT" : "",
+ regs->rflags, regs->cs, regs->rip,
+ code[0], code[1], code[2], code[3],
+ code[4], code[5], code[6], code[7]);
+}
+
+static void print_page_fault_info(int level, struct xen_cpu *cpu, struct regs *regs, ureg_t cr2)
+{
+ printk(level, "%s:%s%s%s%s%s%s, rip %" PRIxREG ", cr2 %" PRIxREG ", vcpu %d\n",
+ __FUNCTION__,
+#ifdef CONFIG_64BIT
+ is_kernel(cpu) ? " [kernel-mode]" : " [user-mode]",
+#else
+ "",
+#endif
+ regs->error & 0x01 ? " preset" : " nopage",
+ regs->error & 0x02 ? " write" : " read",
+ regs->error & 0x04 ? " user" : " kernel",
+ regs->error & 0x08 ? " reserved-bit" : "",
+ regs->error & 0x10 ? " instr-fetch" : "",
+ regs->rip, cr2, cpu->id);
+}
+
+static int fixup_extable(struct regs *regs)
+{
+ uintptr_t *ptr;
+
+ for (ptr = _estart; ptr < _estop; ptr += 2) {
+ if (ptr[0] != regs->rip) {
+ continue;
+ }
+ printk(2, "fixup: %" PRIxPTR " -> %" PRIxPTR "\n", ptr[0], ptr[1]);
+ regs->rip = ptr[1];
+ vminfo.faults[XEN_FAULT_PAGE_FAULT_FIX_EXTAB]++;
+ return 1;
+ }
+ return 0;
+}
+
+int panic(const char *message, struct regs *regs)
+{
+ printk(0, "panic: %s\n", message);
+ if (regs) {
+ print_state(regs);
+ }
+ emudev_cmd(EMUDEV_CMD_GUEST_SHUTDOWN, -1);
+ return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+#ifdef CONFIG_64BIT
+# define DR "%016" PRIxREG
+# define DC "%08" PRIxREG
+# define DS "%04" PRIxREG
+#else
+# define DR "%08" PRIxREG
+# define DC "%08" PRIxREG
+# define DS "%04" PRIxREG
+#endif
+
+void print_registers(int level, struct regs *regs)
+{
+ ureg_t ds,es,fs,gs,cr0,cr2,cr3,cr4;
+
+ asm volatile("mov %%ds, %[ds] \n"
+ "mov %%es, %[es] \n"
+ "mov %%fs, %[fs] \n"
+ "mov %%gs, %[gs] \n"
+ : [ds] "=r" (ds),
+ [es] "=r" (es),
+ [fs] "=r" (fs),
+ [gs] "=r" (gs)
+ : /* no inputs */);
+ asm volatile("mov %%cr0, %[cr0] \n"
+ "mov %%cr2, %[cr2] \n"
+ "mov %%cr3, %[cr3] \n"
+ "mov %%cr4, %[cr4] \n"
+ : [cr0] "=r" (cr0),
+ [cr2] "=r" (cr2),
+ [cr3] "=r" (cr3),
+ [cr4] "=r" (cr4)
+ : /* no inputs */);
+
+ printk(level, "printing registers\n");
+ printk(level, " code cs:rip " DS ":" DR "\n", regs->cs, regs->rip);
+ printk(level, " stack ss:rsp " DS ":" DR "\n", regs->ss, regs->rsp);
+ printk(level, " rax " DR " rbx " DR " rcx " DR " rdx " DR "\n",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ printk(level, " rsi " DR " rdi " DR " rsp " DR " rbp " DR "\n",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+#ifdef CONFIG_64BIT
+ printk(level, " r8 " DR " r9 " DR " r10 " DR " r11 " DR "\n",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ printk(level, " r12 " DR " r13 " DR " r14 " DR " r15 " DR "\n",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+#endif
+ printk(level, " cs " DS " ds " DS " es " DS " fs " DS " gs " DS " ss " DS "\n",
+ regs->cs, ds, es, fs, gs, regs->ss);
+ printk(level, " cr0 " DC " cr2 " DC " cr3 " DC " cr4 " DC " rflags " DC "\n",
+ cr0, cr2, cr3, cr4, regs->rflags);
+ print_bits(level, " cr0", cr0, cr0, cr0_bits);
+ print_bits(level, " cr4", cr4, cr4, cr4_bits);
+ print_bits(level, " rflags", regs->rflags, regs->rflags, rflags_bits);
+
+}
+
+void print_stack(int level, ureg_t rsp)
+{
+ ureg_t max;
+
+ max = ((rsp + PAGE_SIZE) & PAGE_MASK) - sizeof(ureg_t);
+ printk(level, "printing stack " DR " - " DR "\n", rsp, max);
+ while (rsp <= max) {
+ printk(level, " " DR ": " DR "\n", rsp, *((ureg_t*)rsp));
+ rsp += sizeof(ureg_t);
+ }
+}
+
+void print_state(struct regs *regs)
+{
+ print_registers(0, regs);
+ print_stack(0, regs->rsp);
+}
+
+#undef DR
+
+/* --------------------------------------------------------------------- */
+
+static struct descriptor_32 mkdesc(const struct kvm_segment *seg)
+{
+ struct descriptor_32 desc;
+ int shift = 0;
+
+ shift = seg->g ? 12 : 0;
+ desc.a = (seg->limit >> shift) & 0xffff;
+ desc.b = (seg->limit >> shift) & 0x000f0000;
+
+ desc.a |= (seg->base & 0xffff) << 16;
+ desc.b |= seg->base & 0xff000000;
+ desc.b |= (seg->base & 0xff0000) >> 16;
+ desc.b |= (seg->type & 0x0f) << 8;
+ desc.b |= (seg->dpl & 0x03) << 13;
+
+ if (seg->s) desc.b |= (1 << 12);
+ if (seg->present) desc.b |= (1 << 15);
+ if (seg->avl) desc.b |= (1 << 20);
+ if (seg->l) desc.b |= (1 << 21);
+ if (seg->db) desc.b |= (1 << 22);
+ if (seg->g) desc.b |= (1 << 23);
+
+ return desc;
+}
+
+static inline void gdt_set(struct descriptor_32 *gdt, const struct kvm_segment *seg)
+{
+ gdt[ seg->selector >> 3 ] = mkdesc(seg);
+}
+
+static void cr_init(struct xen_cpu *cpu)
+{
+ ureg_t cr0, cr4;
+
+ printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+ cr0 = read_cr0();
+ cr0 |= X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | X86_CR0_NE | \
+ X86_CR0_WP | X86_CR0_AM | X86_CR0_PG;
+ cr0 &= ~(X86_CR0_TS|X86_CR0_CD|X86_CR0_NW);
+ print_bits(2, "cr0", read_cr0(), cr0, cr0_bits);
+ write_cr0(cr0);
+
+ cr4 = read_cr4();
+ cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+ print_bits(2, "cr4", read_cr4(), cr4, cr4_bits);
+ write_cr4(cr4);
+}
+
+static void stack_init(struct xen_cpu *cpu)
+{
+ uintptr_t *ptr;
+ int pages;
+
+ if (cpu->stack_low) {
+ return;
+ }
+
+ /* allocate stack */
+ pages = (boot_stack_high - boot_stack_low + PAGE_SIZE -1) / PAGE_SIZE;
+ cpu->stack_low = get_pages(pages, "stack");
+ cpu->stack_high = cpu->stack_low + pages * PAGE_SIZE;
+
+ /* set per-cpu data pointer */
+ ptr = STACK_PTR(cpu, cpu_ptr);
+ *ptr = (uintptr_t)cpu;
+
+ /* set per-cpu data pointer for boot stack */
+ if (!cpu->id) {
+ ptr = (void*)(&cpu_ptr);
+ *ptr = (uintptr_t)cpu;
+ }
+
+#ifdef CONFIG_64BIT
+ /* copy and setup syscall trampoline from boot stack */
+ memcpy(STACK_PTR(cpu, trampoline_start),
+ trampoline_start, trampoline_stop - trampoline_start);
+ ptr = STACK_PTR(cpu, trampoline_patch);
+ *ptr = (uintptr_t)trampoline_syscall;
+
+ /* allocate irq stack */
+ cpu->irqstack_low = get_pages(pages, "irqstack");
+ cpu->irqstack_high = cpu->irqstack_low + PAGE_SIZE;
+
+ /* set per-cpu data pointer */
+ ptr = IRQSTACK_PTR(cpu, cpu_ptr);
+ *ptr = (uintptr_t)cpu;
+#endif
+}
+
+void gdt_load(struct xen_cpu *cpu)
+{
+ struct {
+ uint16_t len;
+ uintptr_t ptr;
+ } __attribute__((packed)) gdtp = {
+ .len = (16 * PAGE_SIZE)-1,
+ .ptr = (uintptr_t)cpu->gdt,
+ };
+
+ asm volatile("lgdt %0" : : "m" (gdtp) : "memory");
+}
+
+void idt_load(void)
+{
+ struct {
+ uint16_t len;
+ uintptr_t ptr;
+ } __attribute__((packed)) idtp = {
+ .len = sizeof(xen_idt)-1,
+ .ptr = (uintptr_t)xen_idt,
+ };
+
+ asm volatile("lidt %0" : : "m" (idtp) : "memory");
+}
+
+void guest_cpu_init(struct xen_cpu *cpu)
+{
+ struct vcpu_guest_context *ctxt = cpu->init_ctxt;
+ ureg_t mfns[16];
+ int i;
+
+ if (ctxt->gdt_ents) {
+ for (i = 0; i < 16; i++) {
+ mfns[i] = ctxt->gdt_frames[i];
+ }
+ guest_gdt_init(cpu, ctxt->gdt_ents, mfns);
+ }
+
+ ctxt->kernel_ss = fix_sel(ctxt->kernel_ss);
+ ctxt->user_regs.cs = fix_sel(ctxt->user_regs.cs);
+ ctxt->user_regs.ds = fix_sel(ctxt->user_regs.ds);
+ ctxt->user_regs.es = fix_sel(ctxt->user_regs.es);
+ ctxt->user_regs.fs = fix_sel(ctxt->user_regs.fs);
+ ctxt->user_regs.gs = fix_sel(ctxt->user_regs.gs);
+ ctxt->user_regs.ss = fix_sel(ctxt->user_regs.ss);
+
+ cpu->kernel_ss = ctxt->kernel_ss;
+ cpu->kernel_sp = ctxt->kernel_sp;
+}
+
+static uint64_t maddr_to_paddr(uint64_t _maddr)
+{
+ unsigned long virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+ uint64_t maddr = _maddr;
+ uint64_t mfn = addr_to_frame(maddr);
+
+ /* M2P */
+ if ((mfn >= vmconf.mfn_m2p) && (mfn < (vmconf.mfn_m2p + vmconf.pg_m2p))) {
+ return XEN_M2P + maddr - frame_to_addr(vmconf.mfn_m2p);
+ }
+
+ /* xenner */
+ if (maddr < frame_to_addr(vmconf.mfn_guest)) {
+ return (uintptr_t)_vstart + maddr;
+ }
+
+ /* guest */
+ maddr -= frame_to_addr(vmconf.mfn_guest);
+ maddr += virt_base;
+
+ return maddr;
+}
+
+static void *pfn_to_ptr(xen_pfn_t pfn)
+{
+ unsigned long addr = frame_to_addr(pfn);
+
+ addr += frame_to_addr(vmconf.mfn_guest);
+ return map_page(addr);
+}
+
+static void guest_start_info(struct xen_cpu *cpu, struct regs *regs,
+ unsigned long init_pt_len, unsigned long boot_cr3)
+{
+ struct start_info *start_info;
+ uint64_t i;
+ uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+ uint64_t initrd_len;
+ uint64_t cmdline_pfn = emudev_get(EMUDEV_CONF_PFN_CMDLINE, 0);
+ unsigned long *mfn_list;
+ uint64_t mfn_list_pfn = emudev_get(EMUDEV_CONF_PFN_MFN_LIST, 0);
+ char cap_ver[] = CAP_VERSION_STRING;
+ char *cmdline = NULL;
+
+ start_info = pfn_to_ptr(emudev_get(EMUDEV_CONF_PFN_START_INFO, 0));
+
+ printk(1, "%s: called\n", __FUNCTION__);
+
+ memset(start_info, 0, sizeof(*start_info));
+ memcpy(start_info->magic, cap_ver, sizeof(cap_ver));
+ start_info->magic[sizeof(start_info->magic) - 1] = '\0';
+
+ start_info->shared_info = EMU_PA(&shared_info);
+ start_info->pt_base = maddr_to_paddr(boot_cr3);
+ start_info->nr_pt_frames = addr_to_frame(init_pt_len + (PAGE_SIZE - 1));
+ start_info->shared_info = (unsigned long)EMU_PA(&shared_info);
+ start_info->nr_pages = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0);
+ start_info->store_mfn = emudev_get(EMUDEV_CONF_MFN_XENSTORE, 0);
+ start_info->store_evtchn = emudev_get(EMUDEV_CONF_EVTCH_XENSTORE, 0);
+ start_info->console.domU.mfn = emudev_get(EMUDEV_CONF_MFN_CONSOLE, 0);
+ start_info->console.domU.evtchn = emudev_get(EMUDEV_CONF_EVTCH_CONSOLE, 0);
+
+ initrd_len = emudev_get(EMUDEV_CONF_INITRD_LEN, 0);
+ if (initrd_len) {
+ start_info->mod_start = virt_base +
+ frame_to_addr(emudev_get(EMUDEV_CONF_PFN_INITRD, 0));
+ start_info->mod_len = initrd_len;
+ }
+
+ if (cmdline_pfn) {
+ cmdline = pfn_to_ptr(cmdline_pfn);
+
+ memcpy(start_info->cmd_line, pfn_to_ptr(cmdline_pfn),
+ MAX_GUEST_CMDLINE);
+ printk(1, "guest cmdline: %s\n", start_info->cmd_line);
+ }
+
+ /* set up m2p page table */
+ for (i = 0; i < vmconf.pg_total; i++) {
+ m2p[i + vmconf.mfn_guest] = i;
+ }
+
+ /* fill mfn list */
+ start_info->mfn_list = virt_base + frame_to_addr(mfn_list_pfn);
+ mfn_list = (void*)start_info->mfn_list;
+
+ for (i = 0; i < start_info->nr_pages; i++) {
+ mfn_list[i] = i + vmconf.mfn_guest;
+ }
+
+ regs->rsi = (unsigned long)start_info;
+
+ free_page(start_info);
+ if (cmdline) {
+ free_page(cmdline);
+ }
+}
+
+static void cpu_set_cr3(struct xen_cpu *cpu, unsigned long boot_cr3)
+{
+#ifdef CONFIG_64BIT
+ cpu->user_mode = 0;
+ cpu->kernel_cr3_mfn = addr_to_frame(boot_cr3);
+#else
+ cpu->cr3_mfn = addr_to_frame(boot_cr3);
+#endif
+}
+
+static uint64_t count_pgtables(uint64_t max_pfn)
+{
+ uint64_t r = max_pfn;
+ uint64_t fourmb = addr_to_frame(4 * 1024 * 1024);
+
+ /* XXX this should become a real calculation, for now assume we need max
+ * 200 page table pages */
+ r += 200;
+
+ /* pad to 4mb */
+ r = (r + fourmb - 1) & ~(fourmb - 1);
+
+ return r;
+}
+
+/*
+ * Maps the guest into its own virtual address space in its own page table and
+ * returns the length and maddr of that new page table
+ */
+static unsigned long map_guest(unsigned long *boot_cr3)
+{
+ uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+ struct xen_cpu tmp_cpu;
+ uint64_t max_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0);
+ unsigned long init_pt_len;
+
+ max_pfn += count_pgtables(max_pfn);
+
+ /* create initial page table that maps the guest virt_base linearly
+ to host physical memory. This has to happen in guest visible mem */
+ switch_heap(HEAP_HIGH);
+
+ *boot_cr3 = (unsigned long)EMU_PA(get_pages(1, "pt root"));
+ cpu_set_cr3(&tmp_cpu, *boot_cr3);
+ printk(3, "init guest pt map mfn %lx len %lx\n", (unsigned long)vmconf.mfn_guest,
+ (unsigned long)max_pfn);
+
+ map_region(&tmp_cpu, virt_base, EMU_PGFLAGS, vmconf.mfn_guest, max_pfn);
+
+ /* save the pt len for start_info */
+ init_pt_len = heap_size();
+
+ switch_heap(HEAP_EMU);
+
+ return init_pt_len;
+}
+
+
+/* --------------------------------------------------------------------- */
+
+static struct xen_cpu *cpu_alloc(int id)
+{
+ struct xen_cpu *cpu;
+ ureg_t cr3;
+
+ printk(1, "%s: cpu %d\n", __FUNCTION__, id);
+
+ cpu = get_memory(sizeof(*cpu), "per-cpu data");
+ cpu->id = id;
+ cpu->periodic = XEN_DEFAULT_PERIOD;
+ cpu->v.vcpu_info = (void*)&shared_info.vcpu_info[id];
+ cpu->v.vcpu_info_pa = EMU_PA(cpu->v.vcpu_info);
+ guest_cli(cpu);
+ list_add_tail(&cpu->next, &cpus);
+
+ asm volatile("mov %%cr3,%0" : "=r" (cr3));
+ pv_write_cr3(cpu, addr_to_frame(cr3));
+
+ gdt_init(cpu);
+ stack_init(cpu);
+ tss_init(cpu);
+ return cpu;
+}
+
+struct xen_cpu *cpu_find(int id)
+{
+ struct list_head *item;
+ struct xen_cpu *cpu;
+
+ list_for_each(item, &cpus) {
+ cpu = list_entry(item, struct xen_cpu, next);
+ if (cpu->id == id) {
+ return cpu;
+ }
+ }
+ return cpu_alloc(id);
+}
+
+static void cpu_init(struct xen_cpu *cpu)
+{
+ printk(1, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+ gdt_load(cpu);
+ ltr(tss(cpu) << 3);
+ idt_load();
+ cr_init(cpu);
+ msrs_init(cpu);
+ pv_init(cpu);
+
+ vminfo.vcpus_online |= (1 << cpu->id);
+ vminfo.vcpus_running |= (1 << cpu->id);
+ vminfo.vcpus++;
+ cpu->online = 1;
+}
+
+static void userspace_config(void)
+{
+ uint32_t pfn;
+ int i;
+
+ /* read config */
+ vmconf.debug_level = emudev_get(EMUDEV_CONF_DEBUG_LEVEL, 0);
+ vmconf.mfn_emu = emudev_get(EMUDEV_CONF_EMU_START_PFN, 0);
+ vmconf.pg_emu = emudev_get(EMUDEV_CONF_EMU_PAGE_COUNT, 0);
+ vmconf.mfn_m2p = emudev_get(EMUDEV_CONF_M2P_START_PFN, 0);
+ vmconf.pg_m2p = emudev_get(EMUDEV_CONF_M2P_PAGE_COUNT, 0);
+ vmconf.mfn_guest = emudev_get(EMUDEV_CONF_GUEST_START_PFN, 0);
+ vmconf.pg_guest = emudev_get(EMUDEV_CONF_GUEST_PAGE_COUNT, 0);
+ vmconf.pg_total = emudev_get(EMUDEV_CONF_TOTAL_PAGE_COUNT, 0);
+ vmconf.nr_cpus = emudev_get(EMUDEV_CONF_NR_VCPUS, 0);
+
+ /* write config */
+ pfn = addr_to_frame(EMU_PA(&boot_ctxt));
+ emudev_set(EMUDEV_CONF_BOOT_CTXT_PFN, 0, pfn);
+ pfn = addr_to_frame(EMU_PA(&vminfo));
+ emudev_set(EMUDEV_CONF_VMINFO_PFN, 0, pfn);
+ pfn = addr_to_frame(EMU_PA(&grant_table));
+ for (i = 0; i < GRANT_FRAMES_MAX; i++)
+ emudev_set(EMUDEV_CONF_GRANT_TABLE_PFNS, i, pfn+i);
+
+ /* commands */
+ emudev_cmd(EMUDEV_CMD_CONFIGURATION_DONE, 0);
+}
+
+/* --------------------------------------------------------------------- */
+/* called from assembler */
+
+asmlinkage void do_boot(struct regs *regs)
+{
+ struct xen_cpu *cpu;
+ struct xen_cpu boot_cpu;
+ unsigned long init_pt_len, boot_cr3;
+
+ printk(0, "this is %s (qemu-xenner %s), boot cpu #0\n", EMUNAME,
+ QEMU_VERSION QEMU_PKGVERSION);
+
+ userspace_config();
+ printk(1, "%s: configuration done\n", EMUNAME);
+
+ cpu_set_cr3(&boot_cpu, EMU_PA(emu_pgd));
+ paging_init(&boot_cpu);
+ init_pt_len = map_guest(&boot_cr3);
+
+ set_up_context(&boot_ctxt, boot_cr3, init_pt_len);
+
+ cpu = cpu_alloc(0);
+ cpu->init_ctxt = &boot_ctxt;
+ idt_init();
+ cpu_init(cpu);
+ printk(1, "%s: boot cpu setup done\n", EMUNAME);
+
+#ifdef CONFIG_64BIT
+ paging_init(cpu);
+#endif
+ paging_start(cpu);
+ printk(1, "%s: paging setup done\n", EMUNAME);
+
+ irq_init(cpu);
+ printk(1, "%s: irq setup done\n", EMUNAME);
+
+ guest_cpu_init(cpu);
+ guest_regs_init(cpu, regs);
+ guest_start_info(cpu, regs, init_pt_len, boot_cr3);
+ guest_hypercall_page(cpu);
+ printk(1, "%s: booting guest kernel (entry %" PRIxREG ":%" PRIxREG ") ...\n",
+ EMUNAME, regs->cs, regs->rip);
+}
+
+asmlinkage void do_boot_secondary(ureg_t id, struct regs *regs)
+{
+ struct xen_cpu *cpu;
+
+ printk(0, "this is cpu #%d\n", (int)id);
+ cpu = cpu_find(id);
+ cpu_init(cpu);
+ paging_start(cpu);
+ irq_init(cpu);
+#if 0
+ if (cpu->virq_to_vector[VIRQ_TIMER])
+ lapic_timer(cpu);
+#endif
+
+ guest_cpu_init(cpu);
+ guest_regs_init(cpu, regs);
+
+ print_registers(2, regs);
+ printk(1, "%s: secondary entry: %" PRIxREG ":%" PRIxREG ", jumping ...\n",
+ EMUNAME, regs->cs, regs->rip);
+}
+
+asmlinkage void do_illegal_instruction(struct regs *regs)
+{
+ struct xen_cpu *cpu = get_cpu();
+ int skip;
+
+ vminfo.faults[XEN_FAULT_ILLEGAL_INSTRUCTION]++;
+ if (context_is_emu(regs)) {
+ panic("ring0 (emu) illegal instruction", regs);
+ }
+ if (context_is_user(cpu, regs)) {
+ uint8_t *i = (void*)regs->rip;
+ printk(1, "user ill: at %p"
+ " 0x%02x, 0x%02x, 0x%02x, 0x%02x,"
+ " 0x%02x, 0x%02x, 0x%02x, 0x%02x\n",
+ i, i[0], i[1], i[2], i[3], i[4], i[5], i[6], i[7]);
+ bounce_trap(cpu, regs, 6, -1);
+ return;
+ }
+
+ skip = emulate(cpu, regs);
+ switch (skip) {
+ case -1: /* error */
+ panic("instruction emulation failed (ill)\n", regs);
+ break;
+ case 0: /* bounce to guest */
+ bounce_trap(cpu, regs, 6, -1);
+ break;
+ default: /* handled */
+ regs->rip += skip;
+ break;
+ }
+}
+
+static int is_allowed_io(struct xen_cpu *cpu, struct regs *regs)
+{
+ uint8_t *code = (void*)regs->rip;
+ int pl;
+
+#ifdef CONFIG_64BIT
+ pl = context_is_user(cpu, regs) ? 3 : 1;
+#else
+ pl = regs->cs & 0x03;
+#endif
+
+ switch (*code) {
+ case 0xe4 ... 0xe7:
+ case 0xec ... 0xef:
+ /* I/O instructions */
+ if (pl <= cpu->iopl)
+ return 1; /* yes: by iopl */
+ if (cpu->nr_ports)
+ return 1; /* yes: by bitmap (FIXME: check port) */
+ break;
+ case 0xfa:
+ case 0xfb:
+ /* cli, sti */
+ if (pl <= cpu->iopl)
+ return 1; /* yes: by iopl */
+ }
+ return 0; /* no */
+}
+
+asmlinkage void do_general_protection(struct regs *regs)
+{
+ struct xen_cpu *cpu = get_cpu();
+ int skip;
+
+ vminfo.faults[XEN_FAULT_GENERAL_PROTECTION]++;
+ if (context_is_emu(regs)) {
+ if (fixup_extable(regs)) {
+ return;
+ }
+ print_gpf_info(0, cpu, regs);
+ panic("ring0 (emu) general protection fault", regs);
+ }
+ if (is_allowed_io(cpu, regs)) {
+ goto emulate;
+ }
+ if (context_is_user(cpu, regs)) {
+ vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++;
+ print_gpf_info(1, cpu, regs);
+ bounce_trap(cpu, regs, 13, -1);
+ return;
+ }
+
+ if (regs->error) {
+ print_gpf_info(0, cpu, regs);
+ panic("unhandled kernel gpf", regs);
+ }
+
+emulate:
+ skip = emulate(cpu, regs);
+ switch (skip) {
+ case -1: /* error */
+ print_gpf_info(0, cpu, regs);
+ panic("instruction emulation failed (gpf)", regs);
+ break;
+ case 0: /* bounce to guest */
+ vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_GUEST]++;
+ bounce_trap(cpu, regs, 13, -1);
+ break;
+ default: /* handled */
+ vminfo.faults[XEN_FAULT_GENERAL_PROTECTION_EMUINS]++;
+ regs->rip += skip;
+ evtchn_try_forward(cpu, regs); /* sti */
+ break;
+ }
+}
+
+asmlinkage void do_double_fault(struct regs *regs)
+{
+ panic("double fault", regs);
+}
+
+asmlinkage void do_guest_forward(struct regs *regs)
+{
+ struct xen_cpu *cpu = get_cpu();
+ const struct trapinfo *trap = NULL;
+
+ if (regs->trapno < sizeof(trapinfo)/sizeof(trapinfo[0])) {
+ trap = trapinfo + regs->trapno;
+ }
+ printk(trap ? trap->lvl : 0,
+ "%s: trap %d [%s], error 0x%" PRIxREG ","
+ " cs:rip %" PRIxREG ":%" PRIxREG ","
+ " forwarding to guest\n",
+ __FUNCTION__, (int)regs->trapno,
+ trap && trap->name ? trap->name : "-",
+ trap && trap->ec ? regs->error : 0,
+ regs->cs, regs->rip);
+ bounce_trap(cpu, regs, regs->trapno, -1);
+}
+
+asmlinkage void do_lazy_fpu(struct regs *regs)
+{
+ struct xen_cpu *cpu = get_cpu();
+
+ vminfo.faults[XEN_FAULT_LAZY_FPU]++;
+ clts();
+ bounce_trap(cpu, regs, regs->trapno, -1);
+}
+
+asmlinkage void do_int1(struct regs *regs)
+{
+ if (context_is_emu(regs)) {
+ printk(0, "%s: emu context\n", __FUNCTION__);
+ print_registers(0, regs);
+ return;
+ }
+ do_guest_forward(regs);
+}
+
+asmlinkage void do_int3(struct regs *regs)
+{
+ if (context_is_emu(regs)) {
+ printk(0, "%s: emu context\n", __FUNCTION__);
+ print_registers(0, regs);
+ return;
+ }
+ do_guest_forward(regs);
+}
+
+/* --------------------------------------------------------------------- */
+
+static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED;
+static atomic_t flush_cnt;
+static ureg_t flush_addr;
+
+asmlinkage void do_smp_flush_tlb(struct regs *regs)
+{
+ struct xen_cpu *cpu = get_cpu();
+
+ lapic_eoi(cpu);
+ if (flush_addr) {
+ flush_tlb_addr(flush_addr);
+ } else {
+ flush_tlb();
+ }
+ atomic_dec(&flush_cnt);
+}
+
+void flush_tlb_remote(struct xen_cpu *cpu, ureg_t mask, ureg_t addr)
+{
+ int cpus;
+
+ mask &= ~(1 << cpu->id);
+ if (!mask) {
+ vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_NONE]++;
+ return;
+ }
+
+ /*
+ * we must be able to process ipi while waiting for the lock,
+ * otherwise we deadlock in case another cpu busy-waits for us
+ * doing the tlb flush.
+ */
+ sti();
+ spin_lock(&flush_lock);
+
+ cpus = vminfo.vcpus-1; /* FIXME: not using mask, sending to all */
+ flush_addr = addr;
+ if (flush_addr) {
+ vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_PAGE]++;
+ } else {
+ vminfo.faults[XEN_FAULT_OTHER_FLUSH_TLB_ALL]++;
+ }
+
+ atomic_add(cpus, &flush_cnt);
+ lapic_ipi_flush_tlb(cpu);
+ while (atomic_read(&flush_cnt)) {
+ pause();
+ }
+
+ spin_unlock(&flush_lock);
+ cli();
+}
This patch adds the platform agnostic piece of xenner's main loop. Signed-off-by: Alexander Graf <agraf@suse.de> --- pc-bios/xenner/xenner-main.c | 875 ++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 875 insertions(+), 0 deletions(-) create mode 100644 pc-bios/xenner/xenner-main.c