@@ -1,7 +1,7 @@
# -*-Makefile-*-
SUBDIRS += core
-CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o
+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o
CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o
CORE_OBJS += opal-msg.o pci.o pci-iov.o pci-virt.o pci-slot.o pcie-slot.o
CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o
@@ -407,12 +407,16 @@ static void cpu_idle_p9(enum cpu_wake_cause wake_on)
/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=3 */
psscr = PPC_BIT(42) | PPC_BIT(43) |
PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+ vm_exit();
enter_p9_pm_state(psscr);
+ vm_enter();
} else {
/* stop with EC=0 (resumes) which does not require sreset. */
/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=3 */
psscr = PPC_BITMASK(54, 55) | PPC_BITMASK(62,63);
+ vm_exit();
enter_p9_pm_lite_state(psscr);
+ vm_enter();
}
skip_sleep:
@@ -403,6 +403,7 @@ static bool load_kernel(void)
"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
kernel_size);
+ vm_map((unsigned long)kh, sizeof(*kh));
if (kh->ei_ident != ELF_IDENT) {
prerror("INIT: ELF header not found. Assuming raw binary.\n");
return true;
@@ -418,6 +419,7 @@ static bool load_kernel(void)
prerror("INIT: Neither ELF32 not ELF64 ?\n");
return false;
}
+ vm_unmap((unsigned long)kh, sizeof(*kh));
if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) {
secureboot_verify(RESOURCE_ID_KERNEL,
@@ -458,6 +460,27 @@ int64_t mem_dump_free(void);
void *fdt;
+static void cpu_stop_vm(void *arg __unused)
+{
+ printf("CPU PIR 0x%04x cpu_stop_vm\n", this_cpu()->pir);
+ vm_exit();
+}
+
+static void cpu_all_stop_vm(void)
+{
+ struct cpu_thread *cpu;
+
+ for_each_available_cpu(cpu) {
+ if (cpu == this_cpu()) {
+ cpu_stop_vm(NULL);
+ continue;
+ }
+ cpu_wait_job(cpu_queue_job(cpu, "cpu_stop_vm",
+ cpu_stop_vm, NULL), true);
+ }
+}
+
+
void __noreturn load_and_boot_kernel(bool is_reboot)
{
const struct dt_property *memprop;
@@ -542,12 +565,6 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
cpu_give_self_os();
- mem_dump_free();
-
- /* Take processours out of nap */
- cpu_set_sreset_enable(false);
- cpu_set_ipi_enable(false);
-
/* Dump the selected console */
stdoutp = dt_prop_get_def(dt_chosen, "linux,stdout-path", NULL);
prlog(PR_DEBUG, "INIT: stdout-path: %s\n", stdoutp ? stdoutp : "");
@@ -559,6 +576,18 @@ void __noreturn load_and_boot_kernel(bool is_reboot)
debug_descriptor.state_flags |= OPAL_BOOT_COMPLETE;
fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+
+ /* Take processours out of nap */
+ cpu_set_sreset_enable(false);
+ cpu_set_ipi_enable(false);
+
+ mem_dump_free();
+
+ /* Go back to realmode and tear down our VM before booting kernel */
+ printf("VMM: TEARDOWN\n");
+ cpu_all_stop_vm();
+ vm_destroy();
+
if (kernel_32bit)
start_kernel32(kernel_entry, fdt, mem_top);
start_kernel(kernel_entry, fdt, mem_top);
@@ -720,17 +749,21 @@ void setup_reset_vector(void)
{
uint32_t *src, *dst;
+ vm_map(0x100, 0x100);
/* Copy the reset code over the entry point. */
src = &reset_patch_start;
dst = (uint32_t *)0x100;
while(src < &reset_patch_end)
*(dst++) = *(src++);
sync_icache();
+ vm_unmap(0x100, 0x100);
cpu_set_sreset_enable(true);
}
void copy_exception_vectors(void)
{
+ vm_map(0x0, 0x2000);
+
/* Backup previous vectors as this could contain a kernel
* image.
*/
@@ -743,6 +776,7 @@ void copy_exception_vectors(void)
BUILD_ASSERT((&reset_patch_end - &reset_patch_start) < 0x1f00);
memcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100), 0x1f00);
sync_icache();
+ vm_unmap(0x0, 0x2000);
}
static void per_thread_sanity_checks(void)
@@ -930,6 +964,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
*/
mem_region_init();
+ vm_init();
+
/* Reserve HOMER and OCC area */
homer_init();
@@ -950,7 +986,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
cpu_set_ipi_enable(true);
/* Allocate our split trace buffers now. Depends add_opal_node() */
- init_trace_buffers();
+ // XXX: this blows up due to NUMA allocation
+ // init_trace_buffers();
/* On P7/P8, get the ICPs and make sure they are in a sane state */
init_interrupts();
@@ -1111,6 +1148,8 @@ void __noreturn __secondary_cpu_entry(void)
{
struct cpu_thread *cpu = this_cpu();
+ vm_init_secondary();
+
/* Secondary CPU called in */
cpu_callin(cpu);
@@ -26,7 +26,7 @@
#define STACK_BUF_ENTRIES 60
static struct bt_entry bt_buf[STACK_BUF_ENTRIES];
-extern uint32_t _stext, _etext;
+// extern uint32_t _stext, _extext;
/* Dumps backtrace to buffer */
void __nomcount __backtrace(struct bt_entry *entries, unsigned int *count)
new file mode 100644
@@ -0,0 +1,335 @@
+/* Copyright 2018 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <stack.h>
+#include <cpu.h>
+#include <trace.h>
+#include <ccan/str/str.h>
+#include <ccan/container_of/container_of.h>
+
+static bool vm_setup = false;
+
+static void slb_install(unsigned long ea, unsigned long va, unsigned int index)
+{
+ unsigned long rs;
+ unsigned long rb;
+
+ rs = (va >> 28) << (63-51); /* 256MB VSID */
+ rs |= 1UL << (63-53); /* Kp = 1 */
+
+ rb = (ea >> 28) << (63-35); /* 256MB ESID */
+ rb |= 1UL << (63-36); /* V = 1 */
+ rb |= index;
+
+ asm volatile("slbmte %0,%1" : : "r"(rs), "r"(rb) : "memory");
+}
+
+static void slb_remove(unsigned long ea)
+{
+ asm volatile("slbie %0" : : "r"((ea >> 28)<<28) : "memory");
+}
+
+static void slb_remove_all(void)
+{
+ asm volatile("slbmte %0,%0 ; slbia" : : "r"(0) : "memory");
+}
+
+struct hpte {
+ unsigned long dword[2];
+};
+
+struct hpteg {
+ struct hpte hpte[8];
+};
+
+struct hpteg *htab;
+unsigned long htab_nr_bytes;
+unsigned long htab_nr_ptegs;
+unsigned long htab_pteg_mask;
+
+static void htab_install(unsigned long va, unsigned long pa, int rw, int ex)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ unsigned int i;
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ for (i = 0; i < 8; i++) {
+ struct hpte *hpte = &hpteg->hpte[i];
+ unsigned long ava = va >> 23;
+ unsigned long arpn = pa >> 12;
+ unsigned long dw0, dw1;
+
+ if (be64_to_cpu(hpte->dword[0]) & 1) {
+ assert(be64_to_cpu(hpte->dword[0]) >> 7 != ava);
+ continue;
+ }
+
+ assert(!hpte->dword[0]);
+ assert(!hpte->dword[1]);
+
+ dw0 = (ava << (63-56)) | 0x1;
+
+ dw1 = (arpn << (63-43 - 8));
+ if (!rw)
+ dw1 |= (1UL << (63 - 0)) | (1UL << (63 - 63 + 1));
+ if (!ex)
+ dw1 |= (1UL << (63 - 61));
+ dw1 |= (1UL << (63 - 60 + 1)); /* WIMG = 0010 */
+
+ hpte->dword[1] = cpu_to_be64(dw1);
+ eieio();
+ hpte->dword[0] = cpu_to_be64(dw0);
+
+ if (0 && va <= 0x000000003001748c)
+ printf("va:%lx pa:%lx hpteg:%p hpte:%p dw0:%lx dw1:%lx\n", va, pa, hpteg, hpte, dw0, dw1);
+ return;
+ }
+ assert(0);
+}
+
+static void htab_remove(unsigned long va)
+{
+ unsigned long hash;
+ struct hpteg *hpteg;
+ unsigned int i;
+
+ hash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);
+ hpteg = &htab[hash & htab_pteg_mask];
+
+ for (i = 0; i < 8; i++) {
+ struct hpte *hpte = &hpteg->hpte[i];
+ unsigned long ava = va >> 23;
+
+ if (!(be64_to_cpu(hpte->dword[0]) & 1)) {
+ assert(!hpte->dword[0]);
+ assert(!hpte->dword[1]);
+ continue;
+ }
+
+ if (be64_to_cpu(hpte->dword[0]) >> 7 != ava)
+ continue;
+
+ hpte->dword[0] = 0;
+ eieio();
+ hpte->dword[1] = 0;
+ eieio();
+ asm volatile("tlbie %0,%1" : : "r"(ava<<12), "r"(0));
+ asm volatile("eieio ; tlbsync ; ptesync" ::: "memory");
+ return;
+ }
+ assert(0);
+}
+
+#define PAGE_SIZE 4096
+
+void vm_map(unsigned long addr, unsigned long len)
+{
+ unsigned long va;
+ unsigned long vseg = addr >> 28;
+ unsigned long end = addr + len;
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ if (!vm_setup)
+ return;
+
+ printf("vm_map: %lx-%lx\n", addr, addr + len);
+ vseg = addr >> 28;
+ assert(vseg == (end >> 28)); /* same segment */
+
+ if (vseg != (SKIBOOT_BASE >> 28))
+ slb_install(addr, addr, 1);
+
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ if (va >= SKIBOOT_BASE && va < SKIBOOT_BASE + SKIBOOT_SIZE)
+ continue;
+ htab_install(va, va, 1, 0);
+ }
+}
+
+void vm_unmap(unsigned long addr, unsigned long len)
+{
+ unsigned long va;
+ unsigned long vseg = addr >> 28;
+ unsigned long end = addr + len;
+ end = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
+ addr &= ~(PAGE_SIZE - 1);
+ len = end - addr;
+
+ if (!vm_setup)
+ return;
+
+ vseg = addr >> 28;
+ assert(vseg == (end >> 28)); /* same segment */
+
+ printf("vm_unmap: %lx-%lx vseg:%lx\n", addr, addr + len, vseg);
+
+ if (vseg != (SKIBOOT_BASE >> 28))
+ slb_remove(addr);
+
+ for (va = addr; va < end; va += PAGE_SIZE) {
+ if (va >= SKIBOOT_BASE && va < SKIBOOT_BASE + SKIBOOT_SIZE)
+ continue;
+ htab_remove(va);
+ }
+}
+
+
+struct prte {
+ unsigned long dword[2];
+};
+
+static struct prte *prtab;
+
+static unsigned long stack_end = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+void vm_map_stacks(void)
+{
+ unsigned long start = stack_end;
+ unsigned long end = start + (cpu_max_pir + 1)*STACK_SIZE;
+ unsigned long va;
+
+ if (start == end)
+ return;
+
+ printf("VMM: map stacks for %u\n", cpu_max_pir);
+
+ for (va = start; va < end; va += PAGE_SIZE)
+ htab_install(va, va, 1, 0);
+
+ printf("Installed TLB:%lx-%lx\n", start, end);
+
+ stack_end = end;
+}
+
+static void vm_init_cpu(void)
+{
+ mtspr(SPR_LPCR, mfspr(SPR_LPCR) &
+ ~(PPC_BITMASK(0,3) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(54)));
+ mtspr(SPR_LPID, 0);
+ mtspr(SPR_PID, 0);
+ mtspr(SPR_HRMOR, 0);
+ mtspr(SPR_PTCR, (unsigned long)prtab);
+
+ slb_install(SKIBOOT_BASE, SKIBOOT_BASE, 0);
+}
+
+static void vm_cleanup_cpu(void)
+{
+ slb_remove_all();
+ mtspr(SPR_PTCR, 0);
+}
+
+void vm_init_secondary(void)
+{
+ vm_init_cpu();
+ vm_enter();
+}
+
+void vm_exit_cleanup(void)
+{
+ vm_exit();
+ vm_cleanup_cpu();
+}
+
+void vm_enter(void)
+{
+ if (vm_setup)
+ mtmsr(mfmsr() | (MSR_IR|MSR_DR));
+}
+
+void vm_exit(void)
+{
+ if (vm_setup)
+ mtmsr(mfmsr() & ~(MSR_IR|MSR_DR));
+}
+
+void vm_init(void)
+{
+ unsigned long va;
+
+// prtab = local_alloc(0, 64*1024, 64*1024);
+ prtab = memalign(64*1024, 64*1024);
+ assert(prtab);
+ memset(prtab, 0, 64*1024);
+
+ htab_nr_bytes = 1UL<<18;
+ htab_nr_ptegs = htab_nr_bytes / sizeof(struct hpteg);
+ htab_pteg_mask = htab_nr_ptegs - 1;
+// htab = local_alloc(0, htab_nr_bytes, 1UL<<18);
+ htab = memalign(1UL<<18, htab_nr_bytes);
+ assert(htab);
+ memset(htab, 0, htab_nr_bytes);
+
+ prtab[0].dword[0] = cpu_to_be64((unsigned long)htab);
+ prtab[0].dword[1] = 0;
+
+ eieio();
+
+ vm_init_cpu();
+
+ printf("mapping skiboot base %x-%x\n", SKIBOOT_BASE, HEAP_BASE);
+ printf(" text %lx-%lx\n", (unsigned long)_stext, (unsigned long)_etext);
+ printf("Installed SLB:%x-%x\n", SKIBOOT_BASE, SKIBOOT_BASE + (256*1024*1024));
+ for (va = SKIBOOT_BASE; va < HEAP_BASE; va += PAGE_SIZE) {
+ if (va >= (unsigned long)_stext && va <= (unsigned long)_etext)
+ htab_install(va, va, 0, 1);
+ else if (va >= (unsigned long)__rodata_start &&
+ va <= (unsigned long)__rodata_end)
+ htab_install(va, va, 0, 0);
+ else
+ htab_install(va, va, 1, 0);
+ }
+ for (; va < SKIBOOT_BASE + SKIBOOT_SIZE; va += PAGE_SIZE)
+ htab_install(va, va, 1, 0);
+
+ printf("Installed TLB:%x-%lx\n", SKIBOOT_BASE, va);
+
+ vm_map_stacks();
+
+ eieio();
+
+ printf("PRTAB:%p\n", prtab);
+ printf("HTAB:%p\n", htab);
+
+ vm_setup = true;
+
+ vm_enter();
+}
+
+void vm_destroy(void)
+{
+ unsigned long va;
+
+ if (!vm_setup)
+ return;
+
+ vm_exit_cleanup();
+
+ vm_setup = false;
+
+ for (va = SKIBOOT_BASE; va < SKIBOOT_BASE + SKIBOOT_SIZE + (cpu_max_pir + 1) * STACK_SIZE; va += PAGE_SIZE)
+ htab_remove(va);
+
+ free(htab);
+ free(prtab);
+}
@@ -22,6 +22,9 @@
static struct mem_region *nvram_region;
static struct lock fake_nvram_lock = LOCK_UNLOCKED;
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+
int fake_nvram_info(uint32_t *total_size)
{
nvram_region = find_mem_region("ibm,fake-nvram");
@@ -39,11 +42,13 @@ int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)
if (!nvram_region)
return -ENODEV;
+ vm_map(nvram_region->start + src, len);
lock(&fake_nvram_lock);
memcpy(dst, (void *) (nvram_region->start + src), len);
unlock(&fake_nvram_lock);
nvram_read_complete(true);
+ vm_unmap(nvram_region->start + src, len);
return 0;
}
@@ -53,6 +53,7 @@
#define SPR_SRR1 0x01b /* RW: Exception save/restore reg 1 */
#define SPR_CFAR 0x01c /* RW: Come From Address Register */
#define SPR_AMR 0x01d /* RW: Authority Mask Register */
+#define SPR_PID 0x030 /* RW: PID register */
#define SPR_IAMR 0x03d /* RW: Instruction Authority Mask Register */
#define SPR_RPR 0x0ba /* RW: Relative Priority Register */
#define SPR_TBRL 0x10c /* RO: Timebase low */
@@ -75,9 +76,11 @@
#define SPR_HSRR1 0x13b /* RW: HV Exception save/restore reg 1 */
#define SPR_TFMR 0x13d
#define SPR_LPCR 0x13e
+#define SPR_LPID 0x13f /* RW: LPID register */
#define SPR_HMER 0x150 /* Hypervisor Maintenance Exception */
#define SPR_HMEER 0x151 /* HMER interrupt enable mask */
#define SPR_AMOR 0x15d
+#define SPR_PTCR 0x1d0 /* RW: Partition table control register */
#define SPR_PSSCR 0x357 /* RW: Stop status and control (ISA 3) */
#define SPR_TSCR 0x399
#define SPR_HID0 0x3f0
@@ -49,6 +49,7 @@ struct mem_region;
extern struct mem_region *mem_region_next(struct mem_region *region);
#ifndef __TESTING__
+extern char _stext[], _etext[];
/* Readonly section start and end. */
extern char __rodata_start[], __rodata_end[];
@@ -357,4 +358,15 @@ extern int occ_sensor_group_clear(u32 group_hndl, int token);
extern void occ_add_sensor_groups(struct dt_node *sg, u32 *phandles,
int nr_phandles, int chipid);
+/* core/vm.c */
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+void vm_init(void);
+void vm_destroy(void);
+void vm_init_secondary(void);
+void vm_enter(void);
+void vm_exit(void);
+void vm_exit_cleanup(void);
+void vm_map_stacks(void);
+
#endif /* __SKIBOOT_H */
@@ -17,16 +17,24 @@
#include <skiboot.h>
#include "container.h"
+void vm_map(unsigned long addr, unsigned long len);
+void vm_unmap(unsigned long addr, unsigned long len);
+
bool stb_is_container(const void *buf, size_t size)
{
ROM_container_raw *c;
+ bool ret = true;;
c = (ROM_container_raw*) buf;
if (!buf || size < SECURE_BOOT_HEADERS_SIZE)
return false;
- if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER )
- return false;
- return true;
+
+ vm_map((unsigned long )&c->magic_number, 4);
+ if (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER)
+ ret = false;
+ vm_unmap((unsigned long )&c->magic_number, 4);
+
+ return ret;
}
uint32_t stb_payload_magic(const void *buf, size_t size)
@@ -51,19 +51,21 @@ SECTIONS
KEEP(*(.cpuctrl.data))
}
- . = ALIGN(0x10);
+ . = ALIGN(0x1000);
_stext = .;
.text : {
*(.text*)
*(.sfpr)
}
_etext = .;
+ . = ALIGN(0x1000);
.rodata : {
__rodata_start = .;
*(.rodata .rodata.*)
__rodata_end = .;
}
+ . = ALIGN(0x1000);
.data : {
/*