Patchwork [17/40] xenner: kernel: Main (x86_64)

login
register
mail settings
Submitter Alexander Graf
Date Nov. 1, 2010, 3:01 p.m.
Message ID <1288623713-28062-18-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/69821/
State New
Headers show

Comments

Alexander Graf - Nov. 1, 2010, 3:01 p.m.
This patch adds the x86_64 specific piece of xenner's main loop.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/xenner/xenner-main64.c |  412 ++++++++++++++++++++++++++++++++++++++++
 1 files changed, 412 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/xenner/xenner-main64.c

Patch

diff --git a/pc-bios/xenner/xenner-main64.c b/pc-bios/xenner/xenner-main64.c
new file mode 100644
index 0000000..52f1dd3
--- /dev/null
+++ b/pc-bios/xenner/xenner-main64.c
@@ -0,0 +1,412 @@ 
+/*
+ *  Copyright (C) Red Hat 2007
+ *  Copyright (C) Novell Inc. 2010
+ *
+ *  Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ *             Alexander Graf <agraf@suse.de>
+ *
+ *  Xenner main functions for 64 bit
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msr-index.h"
+#include "xenner.h"
+#include "xenner-main.c"
+
+/* --------------------------------------------------------------------- */
+/* helper functions                                                      */
+
+int bounce_trap(struct xen_cpu *cpu, struct regs_64 *regs, int trapno, int cbno)
+{
+    uint64_t *stack, rip = 0, rsp, stack_cs, stack_rflags;
+    int error_code = 0;
+    int interrupt = 0;
+    int failsafe = 0;
+    int k = 0;
+
+    vminfo.faults[XEN_FAULT_BOUNCE_TRAP]++;
+
+    if (trapno >= 0) {
+        /* trap bounce */
+        rip  = xentr[trapno].address;
+        if (TI_GET_IF(&xentr[trapno])) {
+            interrupt = 1;
+        }
+        if (trapno < sizeof(trapinfo)/sizeof(trapinfo[0])) {
+            error_code = trapinfo[trapno].ec;
+        }
+        if (trapno == 14) {
+            /* page fault */
+            cpu->v.vcpu_info->arch.cr2 = read_cr2();
+        }
+    }
+    if (cbno >= 0) {
+        /* callback */
+        rip  = xencb[cbno];
+        switch (cbno) {
+        case CALLBACKTYPE_event:
+            interrupt = 1;
+            break;
+        }
+    }
+
+    if (!rip) {
+        printk(0, "%s: cbno %d, trapno %d\n", __FUNCTION__, cbno, trapno);
+        panic("no guest trap handler", regs);
+    }
+
+    /* set interrupt flag depending on event channel mask */
+    stack_rflags = regs->rflags & ~X86_EFLAGS_IF;
+    if (guest_irq_flag(cpu)) {
+        stack_rflags |= X86_EFLAGS_IF;
+    }
+
+    /* old evtchn_upcall_mask is saved in cs slot on the stack */
+    stack_cs = regs->cs | ((uint64_t)cpu->v.vcpu_info->evtchn_upcall_mask << 32);
+    if (interrupt) {
+        guest_cli(cpu);
+    }
+
+    if (!is_kernel(cpu)) {
+        /* user mode */
+        switch_mode(cpu);
+        rsp = cpu->kernel_sp;
+    } else {
+        /* kernel mode */
+        stack_cs &= ~3;         /* signal kernel mode */
+        rsp = regs->rsp & ~0xf; /* align stack */
+    }
+    stack = (void*)(rsp);
+
+    stack[-(++k)] = regs->ss;        // push ss
+    stack[-(++k)] = regs->rsp;       // push rsp
+    stack[-(++k)] = stack_rflags;    // push rflags
+    stack[-(++k)] = stack_cs;        // push cs
+    stack[-(++k)] = regs->rip;       // push rip
+    if (error_code) {
+        stack[-(++k)] = regs->error; // push error code
+    }
+
+    if (failsafe) {
+        /* push segment registers */;
+    }
+
+    stack[-(++k)] = regs->r11;       // push r11
+    stack[-(++k)] = regs->rcx;       // push rcx
+
+    /* prepare emu stack, so iret jumps to the kernels handler. */
+    regs->rip     = rip;
+    regs->cs      = FLAT_KERNEL_CS;
+    regs->rflags &= EFLAGS_TRAPMASK;
+    regs->rsp     = rsp - 8*k;
+    regs->ss      = FLAT_KERNEL_SS;
+
+    return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static const struct kvm_segment xen64_cs0_64 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe008,
+    .dpl      = 0,
+    .type     = 0xb,
+    .present  = 1,  .l = 1,  .s = 1,  .g = 1,
+};
+static const struct kvm_segment xen64_ds0_32 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe010,
+    .dpl      = 0,
+    .type     = 0x3,
+    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
+};
+static const struct kvm_segment xen64_cs3_32 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe023,
+    .dpl      = 3,
+    .type     = 0xb,
+    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
+};
+static const struct kvm_segment xen64_ds3_32 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe02b,
+    .dpl      = 3,
+    .type     = 0x3,
+    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
+};
+static const struct kvm_segment xen64_cs3_64 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe033,
+    .dpl      = 3,
+    .type     = 0xb,
+    .present  = 1,  .l = 1,  .s = 1,  .g = 1,
+};
+static const struct kvm_segment xen64_cs0_32 = {
+    .base     = 0,
+    .limit    = 0xffffffff,
+    .selector = 0xe038,
+    .dpl      = 0,
+    .type     = 0xb,
+    .present  = 1,  .db = 1,  .s = 1,  .g = 1,
+};
+
+void gdt_init(struct xen_cpu *cpu)
+{
+    printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+    if (!cpu->gdt) {
+        cpu->gdt = get_pages(16, "gdt");
+    }
+
+    gdt_set(cpu->gdt, &xen64_cs0_64);
+    gdt_set(cpu->gdt, &xen64_ds0_32);
+    gdt_set(cpu->gdt, &xen64_cs3_32);
+    gdt_set(cpu->gdt, &xen64_ds3_32);
+    gdt_set(cpu->gdt, &xen64_cs3_64);
+    gdt_set(cpu->gdt, &xen64_cs0_32);
+}
+
+void tss_init(struct xen_cpu *cpu)
+{
+    struct descriptor_32 *gdt = cpu->gdt;
+    int size, idx = tss(cpu);
+    uint64_t base;
+
+    printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+    cpu->tss.rsp0   = (uintptr_t)cpu->stack_high;
+    cpu->tss.ist[0] = (uintptr_t)cpu->irqstack_high;
+
+    base = (uintptr_t)(&cpu->tss);
+    size = sizeof(cpu->tss)-1;
+    gdt[ idx +0 ] = mkdesc32(base & 0xffffffff, size, 0x89, 0);
+    gdt[ idx +1 ].a = base >> 32;
+    gdt[ idx +1 ].b = 0;
+}
+
+void msrs_init(struct xen_cpu *cpu)
+{
+    printk(2, "%s: cpu %d\n", __FUNCTION__, cpu->id);
+
+    /* syscall setup */
+    wrmsrl(MSR_STAR, (((uint64_t)0xe023 << 48) |
+                      ((uint64_t)0xe008 << 32)));
+    wrmsrl(MSR_LSTAR, (uintptr_t)STACK_PTR(cpu, trampoline_start));
+    wrmsrl(MSR_SYSCALL_MASK, X86_EFLAGS_VM | X86_EFLAGS_RF |
+           X86_EFLAGS_NT | X86_EFLAGS_DF | X86_EFLAGS_IF | X86_EFLAGS_TF);
+}
+
+void idt_init(void)
+{
+    intptr_t entry;
+    int i, len;
+
+    printk(2, "%s\n", __FUNCTION__);
+
+    len = (irq_common - irq_entries) / 256;
+    for (i = 0; i < 256; i++) {
+        entry = (intptr_t)(irq_entries + i*len);
+        xen_idt[i]  = mkgate64(0xe008, (uintptr_t)irq_entries + i*len, 0x8e, 1);
+    }
+
+    xen_idt[    0 ] = mkgate64(0xe008, (uintptr_t)division_by_zero,    0x8e, 0);
+    xen_idt[    1 ] = mkgate64(0xe008, (uintptr_t)debug_int1,          0x8e, 0);
+    xen_idt[    2 ] = mkgate64(0xe008, (uintptr_t)nmi,                 0x8e, 0);
+    xen_idt[    3 ] = mkgate64(0xe008, (uintptr_t)debug_int3,          0xee, 0);
+    xen_idt[    4 ] = mkgate64(0xe008, (uintptr_t)overflow,            0xee, 0);
+    xen_idt[    5 ] = mkgate64(0xe008, (uintptr_t)bound_check,         0x8e, 0);
+    xen_idt[    6 ] = mkgate64(0xe008, (uintptr_t)illegal_instruction, 0x8e, 0);
+    xen_idt[    7 ] = mkgate64(0xe008, (uintptr_t)no_device,           0x8e, 0);
+    xen_idt[    8 ] = mkgate64(0xe008, (uintptr_t)double_fault,        0x8e, 0);
+    xen_idt[    9 ] = mkgate64(0xe008, (uintptr_t)coprocessor,         0x8e, 0);
+    xen_idt[   10 ] = mkgate64(0xe008, (uintptr_t)invalid_tss,         0x8e, 0);
+    xen_idt[   11 ] = mkgate64(0xe008, (uintptr_t)segment_not_present, 0x8e, 0);
+    xen_idt[   12 ] = mkgate64(0xe008, (uintptr_t)stack_fault,         0x8e, 0);
+    xen_idt[   13 ] = mkgate64(0xe008, (uintptr_t)general_protection,  0x8e, 0);
+    xen_idt[   14 ] = mkgate64(0xe008, (uintptr_t)page_fault,          0x8e, 0);
+    xen_idt[   16 ] = mkgate64(0xe008, (uintptr_t)floating_point,      0x8e, 0);
+    xen_idt[   17 ] = mkgate64(0xe008, (uintptr_t)alignment,           0x8e, 0);
+    xen_idt[   18 ] = mkgate64(0xe008, (uintptr_t)machine_check,       0x8e, 0);
+    xen_idt[   19 ] = mkgate64(0xe008, (uintptr_t)simd_floating_point, 0x8e, 0);
+
+    xen_idt[ VECTOR_FLUSH_TLB  ] =
+        mkgate64(0xe008, (uintptr_t)smp_flush_tlb, 0x8e, 1);
+
+    xen_idt[ 0x80 ] = mkgate64(0xe008, (uintptr_t)int_80,              0xee, 0);
+}
+
+void guest_regs_init(struct xen_cpu *cpu, struct regs_64 *regs)
+{
+    struct vcpu_guest_context *ctxt = cpu->init_ctxt;
+
+    cpu->kernel_ss = ctxt->kernel_ss;
+    cpu->kernel_sp = ctxt->kernel_sp;
+
+    regs->rax    = ctxt->user_regs.rax;
+    regs->rbx    = ctxt->user_regs.rbx;
+    regs->rcx    = ctxt->user_regs.rcx;
+    regs->rdx    = ctxt->user_regs.rdx;
+    regs->rsi    = ctxt->user_regs.rsi;
+    regs->rdi    = ctxt->user_regs.rdi;
+    regs->rbp    = ctxt->user_regs.rbp;
+    regs->rip    = ctxt->user_regs.rip;
+    regs->cs     = ctxt->user_regs.cs;
+    regs->rflags = ctxt->user_regs.rflags;
+    regs->rsp    = ctxt->user_regs.rsp;
+    regs->ss     = ctxt->user_regs.ss;
+
+    asm volatile("mov %0, %%ds;\n" :: "r" (ctxt->user_regs.ds) : "memory");
+    asm volatile("mov %0, %%es;\n" :: "r" (ctxt->user_regs.es) : "memory");
+    asm volatile("mov %0, %%fs;\n" :: "r" (ctxt->user_regs.fs) : "memory");
+    asm volatile("mov %0, %%gs;\n" :: "r" (ctxt->user_regs.gs) : "memory");
+}
+
+static void set_up_shared_info(void)
+{
+    int i;
+
+    memset(&shared_info, 0, sizeof(shared_info));
+    for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) {
+        shared_info.vcpu_info[i].evtchn_upcall_mask = 1;
+    }
+}
+
+static void set_up_context(void *_ctxt, unsigned long boot_cr3,
+                           unsigned long init_pt_len)
+{
+    vcpu_guest_context_t *ctxt = _ctxt;
+    uint64_t virt_base = emudev_get(EMUDEV_CONF_PV_VIRT_BASE, 0);
+    uint64_t virt_entry = emudev_get(EMUDEV_CONF_PV_VIRT_ENTRY, 0);
+    uint64_t boot_stack_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0) +
+                              addr_to_frame(init_pt_len + PAGE_SIZE - 1);
+    uint64_t start_info_pfn = emudev_get(EMUDEV_CONF_PFN_START_INFO, 0);
+
+    set_up_shared_info();
+
+    /* clear everything */
+    memset(ctxt, 0, sizeof(*ctxt));
+
+    ctxt->user_regs.ds = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.es = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.fs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.gs = FLAT_KERNEL_DS_X86_64;
+    ctxt->user_regs.ss = FLAT_KERNEL_SS_X86_64;
+    ctxt->user_regs.cs = FLAT_KERNEL_CS_X86_64;
+    ctxt->user_regs.rip = virt_entry;
+    ctxt->user_regs.rsp = virt_base | ((boot_stack_pfn + 1) << PAGE_SHIFT);
+    ctxt->user_regs.rsi = virt_base | (start_info_pfn << PAGE_SHIFT);
+    ctxt->user_regs.rflags = 1 << 9; /* Interrupt Enable */
+
+    ctxt->kernel_ss = ctxt->user_regs.ss;
+    ctxt->kernel_sp = ctxt->user_regs.esp;
+
+    ctxt->flags = VGCF_in_kernel_X86_64 | VGCF_online_X86_64;
+    ctxt->ctrlreg[3] = boot_cr3;
+}
+
+static void guest_hypercall_page(struct xen_cpu *cpu)
+{
+    uint64_t _hypercall_page = emudev_get(EMUDEV_CONF_HYPERCALL_PAGE, 0);
+    char *hypercall_page = (char*)_hypercall_page;
+
+    char *p;
+    int i;
+
+    /* Fill in all the transfer points with template machine code. */
+    for ( i = 0; i < (PAGE_SIZE / 32); i++ ) {
+        p = (char *)(hypercall_page + (i * 32));
+        *(uint8_t  *)(p+ 0) = 0x51;    /* push %rcx */
+        *(uint16_t *)(p+ 1) = 0x5341;  /* push %r11 */
+        *(uint8_t  *)(p+ 3) = 0xb8;    /* mov  $<i>,%eax */
+        *(uint32_t *)(p+ 4) = i;
+        *(uint16_t *)(p+ 8) = 0x050f;  /* syscall */
+        *(uint16_t *)(p+10) = 0x5b41;  /* pop  %r11 */
+        *(uint8_t  *)(p+12) = 0x59;    /* pop  %rcx */
+        *(uint8_t  *)(p+13) = 0xc3;    /* ret */
+    }
+
+    /*
+     * HYPERVISOR_iret is special because it doesn't return and expects a
+     * special stack frame. Guests jump at this transfer point instead of
+     * calling it.
+     */
+    p = (char *)(hypercall_page + (__HYPERVISOR_iret * 32));
+    *(uint8_t  *)(p+ 0) = 0x51;    /* push %rcx */
+    *(uint16_t *)(p+ 1) = 0x5341;  /* push %r11 */
+    *(uint8_t  *)(p+ 3) = 0x50;    /* push %rax */
+    *(uint8_t  *)(p+ 4) = 0xb8;    /* mov  $__HYPERVISOR_iret,%eax */
+    *(uint32_t *)(p+ 5) = __HYPERVISOR_iret;
+    *(uint16_t *)(p+ 9) = 0x050f;  /* syscall */
+
+}
+
+/* --------------------------------------------------------------------- */
+/* called from assembler                                                 */
+
+asmlinkage void do_page_fault(struct regs_64 *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+    uint64_t cr2 = read_cr2();
+
+    vminfo.faults[XEN_FAULT_PAGE_FAULT]++;
+
+    if (context_is_emu(regs)) {
+        if (fixup_extable(regs)) {
+            return;
+        }
+        print_page_fault_info(0, cpu, regs, cr2);
+        pgtable_walk(0, cr2, read_cr3_mfn(cpu));
+        panic("ring0 (emu) page fault", regs);
+    }
+
+    /* fixup error code for kernel faults */
+    if (context_is_kernel(cpu, regs)) {
+        regs->error &= ~0x04;
+    }
+
+    if (wrpt && regs->error == 3) {
+        /* kernel write to r/o page */
+        if (!cpu->user_cr3_mfn || !pgtable_is_present(cr2, cpu->user_cr3_mfn)) {
+            /* is kernel page -> rw fixup for page tables */
+            if (pgtable_fixup_flag(cpu, cr2, _PAGE_RW) > 0) {
+                vminfo.faults[XEN_FAULT_PAGE_FAULT_FIX_RO]++;
+                return;
+            }
+        }
+    }
+
+    if (regs->error & 0x01) {
+        /* present */
+        if (pgtable_fixup_flag(cpu, cr2, _PAGE_USER) > 0) {
+            vminfo.faults[XEN_FAULT_PAGE_FAULT_FIX_USER]++;
+            return;
+        }
+    }
+
+    vminfo.faults[XEN_FAULT_PAGE_FAULT_GUEST]++;
+    bounce_trap(cpu, regs, 14, -1);
+}
+
+asmlinkage void do_int_80(struct regs_64 *regs)
+{
+    struct xen_cpu *cpu = get_cpu();
+
+    vminfo.faults[XEN_FAULT_INT_80]++;
+    bounce_trap(cpu, regs, 0x80, -1);
+}