Patchwork [11/40] xenner: kernel: Hypercall handler (x86_64)

login
register
mail settings
Submitter Alexander Graf
Date Nov. 1, 2010, 3:01 p.m.
Message ID <1288623713-28062-12-git-send-email-agraf@suse.de>
Download mbox | patch
Permalink /patch/69785/
State New
Headers show

Comments

Alexander Graf - Nov. 1, 2010, 3:01 p.m.
Xenner handles guest hypercalls itself. This patch adds all the handling
code that is x86_64 specific.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 pc-bios/xenner/xenner-hcall64.c |  323 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 323 insertions(+), 0 deletions(-)
 create mode 100644 pc-bios/xenner/xenner-hcall64.c

Patch

diff --git a/pc-bios/xenner/xenner-hcall64.c b/pc-bios/xenner/xenner-hcall64.c
new file mode 100644
index 0000000..93dfb99
--- /dev/null
+++ b/pc-bios/xenner/xenner-hcall64.c
@@ -0,0 +1,323 @@ 
+/*
+ *  Copyright (C) Red Hat 2007
+ *  Copyright (C) Novell Inc. 2010
+ *
+ *  Author(s): Gerd Hoffmann <kraxel@redhat.com>
+ *             Alexander Graf <agraf@suse.de>
+ *
+ *  Xenner 64 bit hypercall handlers
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <inttypes.h>
+#include <errno.h>
+#include <xen/xen.h>
+
+#include "msr-index.h"
+
+#include "xenner.h"
+
+/* --------------------------------------------------------------------- */
+
+typedef int64_t (*xen_hcall)(struct xen_cpu *cpu, uint64_t *args);
+static int64_t multicall(struct xen_cpu *cpu, uint64_t *args);
+
+/* --------------------------------------------------------------------- */
+
+void switch_mode(struct xen_cpu *cpu)
+{
+    vminfo.faults[XEN_FAULT_OTHER_SWITCH_MODE]++;
+    cpu->user_mode = !cpu->user_mode;
+    if (cpu->user_mode) {
+        pv_write_cr3(cpu, cpu->user_cr3_mfn);
+    } else {
+        pv_write_cr3(cpu, cpu->kernel_cr3_mfn);
+    }
+    __asm__("swapgs" ::: "memory");
+}
+
+int is_kernel(struct xen_cpu *cpu)
+{
+    return !cpu->user_mode;
+}
+
+/* --------------------------------------------------------------------- */
+
+static int64_t update_va_mapping(struct xen_cpu *cpu, uint64_t *args)
+{
+    uint64_t va    = args[0];
+    uint64_t val   = args[1];
+    uint64_t flags = args[2];
+    uint64_t *pte;
+    uint64_t pte_val;
+
+    pte = find_pte_64(va);
+    if (addr_is_kernel(va)) {
+        if (test_pgflag_64(val, _PAGE_PRESENT) &&
+            !test_pgflag_64(val, _PAGE_USER)) {
+            vminfo.faults[XEN_FAULT_UPDATE_VA_FIX_USER]++;
+            val |= _PAGE_USER;
+        }
+    }
+
+    if (memcpy_pf(&pte_val, pte, sizeof(uint64_t)) < 0) {
+        /* pte is missing levels below - get out quick */
+        return -1;
+    }
+
+    if (pte_val != val) {
+        *pte = val;
+    }
+
+    switch (flags & UVMF_FLUSHTYPE_MASK) {
+    case UVMF_NONE:
+        break;
+    case UVMF_TLB_FLUSH:
+        flush_tlb();
+        break;
+    case UVMF_INVLPG:
+        flush_tlb_addr(va);
+        break;
+    }
+    return 0;
+}
+
+static int64_t mmu_update(struct xen_cpu *cpu, uint64_t *args)
+{
+    uint64_t *reqs = (void*)args[0];
+    uint64_t count = args[1];
+    uint64_t *done = (void*)args[2];
+    uint64_t dom   = args[3];
+    uint64_t *pte;
+    int i;
+
+    if (dom != DOMID_SELF) {
+        printk(1, "%s: foreigndom not supported\n", __FUNCTION__);
+        return -ENOSYS;
+    }
+
+    for (i = 0; i < count; i++) {
+        switch (reqs[0] & 3) {
+        case MMU_NORMAL_PT_UPDATE:
+            pte = map_page(reqs[0]);
+            *pte = reqs[1];
+            break;
+        case MMU_MACHPHYS_UPDATE:
+        {
+            xen_pfn_t gmfn = reqs[0] >> PAGE_SHIFT;
+            xen_pfn_t gpfn = reqs[1];
+            if (gmfn < vmconf.mfn_guest)
+                panic("suspious m2p update", NULL);
+            m2p[gmfn] = gpfn;
+            break;
+        }
+        default:
+            return -ENOSYS;
+        }
+        reqs += 2;
+    }
+    if (done) {
+        *done = i;
+    }
+
+    return 0;
+}
+
+static int64_t iret(struct xen_cpu *cpu, uint64_t *args)
+{
+    struct regs_64 *regs = (void*)cpu->stack_high - sizeof(*regs);
+    struct iret_context stack;
+
+    stack = *((struct iret_context*)regs->rsp);
+
+    if ((stack.cs & 3) == 3) {
+        /* return to userspace */
+        switch_mode(cpu);
+    }
+
+    regs->rip     = stack.rip;
+    regs->cs      = fix_sel64(stack.cs);
+    regs->rsp     = stack.rsp;
+    regs->ss      = fix_sel64(stack.ss);
+    regs->rflags  = stack.rflags;
+    regs->rflags &= ~(X86_EFLAGS_IOPL|X86_EFLAGS_VM);
+    regs->rflags |= X86_EFLAGS_IF;
+
+    if (stack.rflags & X86_EFLAGS_IF) {
+        guest_sti(cpu);
+    } else {
+        guest_cli(cpu);
+    }
+
+    if (!(stack.flags & VGCF_in_syscall)) {
+        regs->r11 = stack.r11;
+        regs->rcx = stack.rcx;
+    }
+
+    regs->rax     = stack.rax;
+    return -EINTR;
+}
+
+static int64_t set_segment_base(struct xen_cpu *cpu, uint64_t *args)
+{
+    switch (args[0]) {
+    case SEGBASE_FS:
+        wrmsrl(MSR_FS_BASE, args[1]);
+        break;
+    case SEGBASE_GS_USER:
+        wrmsrl(MSR_KERNEL_GS_BASE, args[1]);
+        break;
+    case SEGBASE_GS_KERNEL:
+        wrmsrl(MSR_GS_BASE, args[1]);
+        break;
+    case SEGBASE_GS_USER_SEL:
+        __asm__("swapgs         \n"
+                "movl %k0, %%gs \n"
+                "mfence         \n"
+                "swapgs         \n"
+                :: "r" (args[1] & 0xffff));
+        return 0;
+    default:
+        printk(0, "%s: unknown %d\n", __FUNCTION__, (int)args[0]);
+        return -ENOSYS;
+    }
+    return 0;
+}
+
+/* --------------------------------------------------------------------- */
+
+static xen_hcall hcalls[XEN_HCALL_MAX] = {
+    [ __HYPERVISOR_update_va_mapping ]       = update_va_mapping,
+    [ __HYPERVISOR_mmu_update ]              = mmu_update,
+    [ __HYPERVISOR_mmuext_op ]               = mmuext_op,
+    [ __HYPERVISOR_stack_switch ]            = stack_switch,
+    [ __HYPERVISOR_multicall ]               = multicall,
+    [ __HYPERVISOR_iret ]                    = iret,
+    [ __HYPERVISOR_update_descriptor ]       = update_descriptor,
+    [ __HYPERVISOR_set_segment_base ]        = set_segment_base,
+    [ __HYPERVISOR_fpu_taskswitch ]          = fpu_taskswitch,
+    [ __HYPERVISOR_grant_table_op ]          = grant_table_op,
+    [ __HYPERVISOR_xen_version ]             = xen_version,
+    [ __HYPERVISOR_vm_assist ]               = vm_assist,
+    [ __HYPERVISOR_sched_op ]                = sched_op,
+    [ __HYPERVISOR_sched_op_compat ]         = sched_op_compat,
+    [ __HYPERVISOR_memory_op ]               = memory_op,
+    [ __HYPERVISOR_set_trap_table ]          = set_trap_table,
+    [ __HYPERVISOR_set_callbacks ]           = set_callbacks,
+    [ __HYPERVISOR_callback_op ]             = callback_op,
+    [ __HYPERVISOR_set_gdt ]                 = set_gdt,
+    [ __HYPERVISOR_vcpu_op ]                 = vcpu_op,
+    [ __HYPERVISOR_event_channel_op ]        = event_channel_op,
+    [ __HYPERVISOR_event_channel_op_compat ] = event_channel_op_compat,
+    [ __HYPERVISOR_set_timer_op ]            = set_timer_op,
+    [ __HYPERVISOR_physdev_op ]              = physdev_op,
+    [ __HYPERVISOR_get_debugreg ]            = get_debugreg,
+    [ __HYPERVISOR_set_debugreg ]            = set_debugreg,
+    [ __HYPERVISOR_console_io ]              = console_io,
+
+    [ __HYPERVISOR_platform_op ]             = error_noperm,
+    [ __HYPERVISOR_physdev_op_compat ]       = error_noperm,
+    [ __HYPERVISOR_set_debugreg ]            = error_noop,
+};
+
+static int64_t multicall(struct xen_cpu *cpu, uint64_t *args)
+{
+    struct multicall_entry *calls = (void*)args[0];
+    uint64_t i, count = args[1];
+    uint64_t margs[6];
+
+    for (i = 0; i < count; i++) {
+        if (!hcalls[calls[i].op]) {
+            printk(0, "%s: unknown hypercall #%ld\n", __FUNCTION__, calls[i].op);
+            panic("unknown hypercall in multicall list", NULL);
+        }
+        vminfo.hcalls[calls[i].op]++;
+        margs[0] = calls[i].args[0];
+        margs[1] = calls[i].args[1];
+        margs[2] = calls[i].args[2];
+        margs[3] = calls[i].args[3];
+        margs[4] = calls[i].args[4];
+        margs[5] = calls[i].args[5];
+        calls[i].result = hcalls[calls[i].op](cpu, margs);
+    }
+    return 0;
+}
+
+static void do_hypercall(struct xen_cpu *cpu, struct regs_64 *regs)
+{
+    uint64_t args[6];
+    uint64_t retval = -ENOSYS;
+
+    if (regs->rax >= XEN_HCALL_MAX) {
+        /* invalid hypercall number */
+        printk(5, "hcall %ld >= XEN_HCALL_MAX\n", regs->rax);
+        goto handled;
+    }
+    if (!hcalls[regs->rax]) {
+        /* no hypercall handler */
+        printk(5, "hcall %ld no handler (%p)\n", regs->rax, hcalls[regs->rax]);
+        goto handled;
+    }
+
+    /* do call */
+    vminfo.hcalls[regs->rax]++;
+    args[0] = regs->rdi;
+    args[1] = regs->rsi;
+    args[2] = regs->rdx;
+    args[3] = regs->r10;
+    args[4] = regs->r8;
+    args[5] = regs->r9;
+
+    retval = hcalls[regs->rax](cpu, args);
+
+    if (-EINTR == retval)
+        goto iret;
+
+handled:
+    if (-ENOSYS == retval) {
+        printk(0, "hypercall %s (#%ld)  |  arg0 0x%lx  arg1 0x%lx  -> -ENOSYS\n",
+               __hypervisor_name(regs->rax), regs->rax, args[0], args[1]);
+    }
+
+    regs->rax = retval;
+    regs->error = HCALL_HANDLED;
+    evtchn_try_forward(cpu, regs);
+    return;
+
+iret:
+    regs->error = HCALL_IRET;
+    evtchn_try_forward(cpu, regs);
+    return;
+}
+
+asmlinkage void do_syscall(struct regs_64 *regs)
+{
+    struct xen_cpu *cpu =get_cpu();
+
+    if (is_kernel(cpu)) {
+        /* init segments: not done in syscall path */
+        regs->cs = FLAT_KERNEL_CS;
+        regs->ss = FLAT_KERNEL_SS;
+        do_hypercall(cpu, regs);
+    } else {
+        vminfo.faults[XEN_FAULT_SYSCALL]++;
+        /* init segments: not done in syscall path */
+        regs->cs = FLAT_USER_CS;
+        regs->ss = FLAT_USER_SS;
+        bounce_trap(cpu, regs, -1, CALLBACKTYPE_syscall);
+        /* return via iretq please */
+        regs->error = HCALL_IRET;
+    }
+    return;
+}