get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/1110283/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1110283,
    "url": "http://patchwork.ozlabs.org/api/patches/1110283/?format=api",
    "web_url": "http://patchwork.ozlabs.org/project/skiboot/patch/20190605023616.26893-3-npiggin@gmail.com/",
    "project": {
        "id": 44,
        "url": "http://patchwork.ozlabs.org/api/projects/44/?format=api",
        "name": "skiboot firmware development",
        "link_name": "skiboot",
        "list_id": "skiboot.lists.ozlabs.org",
        "list_email": "skiboot@lists.ozlabs.org",
        "web_url": "http://github.com/open-power/skiboot",
        "scm_url": "http://github.com/open-power/skiboot",
        "webscm_url": "",
        "list_archive_url": "",
        "list_archive_url_format": "",
        "commit_url_format": ""
    },
    "msgid": "<20190605023616.26893-3-npiggin@gmail.com>",
    "list_archive_url": null,
    "date": "2019-06-05T02:36:15",
    "name": "[RFC,2/3] virtual memory for OPAL boot",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": false,
    "hash": "2cf81d4dd562b783f8175fb30fd6024c6c40d6c4",
    "submitter": {
        "id": 69518,
        "url": "http://patchwork.ozlabs.org/api/people/69518/?format=api",
        "name": "Nicholas Piggin",
        "email": "npiggin@gmail.com"
    },
    "delegate": null,
    "mbox": "http://patchwork.ozlabs.org/project/skiboot/patch/20190605023616.26893-3-npiggin@gmail.com/mbox/",
    "series": [
        {
            "id": 111858,
            "url": "http://patchwork.ozlabs.org/api/series/111858/?format=api",
            "web_url": "http://patchwork.ozlabs.org/project/skiboot/list/?series=111858",
            "date": "2019-06-05T02:36:13",
            "name": "WIP VMM for OPAL boot",
            "version": 1,
            "mbox": "http://patchwork.ozlabs.org/series/111858/mbox/"
        }
    ],
    "comments": "http://patchwork.ozlabs.org/api/patches/1110283/comments/",
    "check": "fail",
    "checks": "http://patchwork.ozlabs.org/api/patches/1110283/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>",
        "X-Original-To": [
            "incoming@patchwork.ozlabs.org",
            "skiboot@lists.ozlabs.org"
        ],
        "Delivered-To": [
            "patchwork-incoming@bilbo.ozlabs.org",
            "skiboot@lists.ozlabs.org"
        ],
        "Received": [
            "from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\tkey-exchange X25519 server-signature RSA-PSS (4096 bits))\n\t(No client certificate requested)\n\tby ozlabs.org (Postfix) with ESMTPS id 45JXy74Fw7z9s6w\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed,  5 Jun 2019 12:37:35 +1000 (AEST)",
            "from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3])\n\tby lists.ozlabs.org (Postfix) with ESMTP id 45JXy72vRWzDqVQ\n\tfor <incoming@patchwork.ozlabs.org>;\n\tWed,  5 Jun 2019 12:37:35 +1000 (AEST)",
            "from mail-pl1-x62c.google.com (mail-pl1-x62c.google.com\n\t[IPv6:2607:f8b0:4864:20::62c])\n\t(using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n\tkey-exchange X25519 server-signature RSA-PSS (2048 bits)\n\tserver-digest SHA256) (No client certificate requested)\n\tby lists.ozlabs.org (Postfix) with ESMTPS id 45JXxZ3hzxzDqVQ\n\tfor <skiboot@lists.ozlabs.org>; Wed,  5 Jun 2019 12:37:06 +1000 (AEST)",
            "by mail-pl1-x62c.google.com with SMTP id d21so9123244plr.3\n\tfor <skiboot@lists.ozlabs.org>; Tue, 04 Jun 2019 19:37:06 -0700 (PDT)",
            "from bobo.local0.net ([61.68.71.55])\n\tby smtp.gmail.com with ESMTPSA id\n\tc9sm9077122pfn.3.2019.06.04.19.36.58\n\t(version=TLS1_3 cipher=AEAD-AES256-GCM-SHA384 bits=256/256);\n\tTue, 04 Jun 2019 19:37:01 -0700 (PDT)"
        ],
        "Authentication-Results": [
            "ozlabs.org;\n\tdmarc=fail (p=none dis=none) header.from=gmail.com",
            "ozlabs.org;\n\tdkim=fail reason=\"signature verification failed\" (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Rgvdi2OH\"; dkim-atps=neutral",
            "lists.ozlabs.org;\n\tspf=pass (mailfrom) smtp.mailfrom=gmail.com\n\t(client-ip=2607:f8b0:4864:20::62c; helo=mail-pl1-x62c.google.com;\n\tenvelope-from=npiggin@gmail.com; receiver=<UNKNOWN>)",
            "lists.ozlabs.org;\n\tdmarc=pass (p=none dis=none) header.from=gmail.com",
            "lists.ozlabs.org; dkim=pass (2048-bit key;\n\tunprotected) header.d=gmail.com header.i=@gmail.com\n\theader.b=\"Rgvdi2OH\"; dkim-atps=neutral"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;\n\th=from:to:cc:subject:date:message-id:in-reply-to:references\n\t:mime-version:content-transfer-encoding;\n\tbh=JMfnK3aXjPYJzw4ltRMge4cUIHFNhO5nAH9Ji2bgDpE=;\n\tb=Rgvdi2OHnsbDt13PFVgmfaKCzjDEB3YVu/NNk2w76dZojV80L3XtcL57xm1wJm4QNF\n\tiCLRKcpqwYvVIDzAXajR3USPeotez952ie73QXpweOYeOUuPbByjamZKCbiLTKksRw57\n\tl093JeyXeVVbr6JajGnZYEDwwost+mu89BnlsxMytyQR7H3Gg4NNOeRwotQpdQ9wHoZs\n\ti91yMBIpzM7ZCRnBpqdJv8uVasIQj9iAP+MCJBwxjbykzOZg3t+QBiemw9QpRInJME9d\n\tlGw/IjTz8Ndso/xWkr65l+SOLi91UhCHqNRCU6g0zXdMg5d3O4VMzS9ZDdIgzwt+X1R/\n\twlSg==",
        "X-Google-DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed;\n\td=1e100.net; s=20161025;\n\th=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to\n\t:references:mime-version:content-transfer-encoding;\n\tbh=JMfnK3aXjPYJzw4ltRMge4cUIHFNhO5nAH9Ji2bgDpE=;\n\tb=HzxDbd4cGKXQ2waE+n/Kxy4pCYNHIn8afr7UOFyhX1tFwiWRvupb0yMsU36NcqkpCC\n\t+OroKE22buDrRJvhVoJwyBoO4i+itC3ApdNT9HKarwnCUHMlpM88G73Nfh23IKrympJf\n\tcOiWZRtYid5712fGZIfKu1MllnqK7vdz+stVsxk2mX22Zn1fkQHmO3ahBHjwdBuB7YAU\n\tKGRSYmEslwj8+fXikrgMHugFOhw0eNMywKOWy6eFH4uGMlgl7jkaX+2in9fA4KADyLH7\n\tPnnojwNqnwKS3PkDfVRStx8k7Z1usXktMYlSjI61dJMy4HS5V9N5NQvMA+xEk8PFRg8r\n\tyg4g==",
        "X-Gm-Message-State": "APjAAAVlB1vUKVXYBvHPvmRC+Mg1ey88XYKEjecWuzse0tQo7ievw5qp\n\t02P+AvxPLoUZnxJiAwmob8GSEp+G",
        "X-Google-Smtp-Source": "APXvYqz02veUjwNCYhlfTFCYIFyJXUmHli/A/g2yAy6sN6mdSBo+av7XiQ3WlUfAE+eqoWa3NZa7yg==",
        "X-Received": "by 2002:a17:902:2aab:: with SMTP id\n\tj40mr1492832plb.76.1559702222332; \n\tTue, 04 Jun 2019 19:37:02 -0700 (PDT)",
        "From": "Nicholas Piggin <npiggin@gmail.com>",
        "To": "skiboot@lists.ozlabs.org",
        "Date": "Wed,  5 Jun 2019 12:36:15 +1000",
        "Message-Id": "<20190605023616.26893-3-npiggin@gmail.com>",
        "X-Mailer": "git-send-email 2.20.1",
        "In-Reply-To": "<20190605023616.26893-1-npiggin@gmail.com>",
        "References": "<20190605023616.26893-1-npiggin@gmail.com>",
        "MIME-Version": "1.0",
        "Subject": "[Skiboot] [RFC PATCH 2/3] virtual memory for OPAL boot",
        "X-BeenThere": "skiboot@lists.ozlabs.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "Mailing list for skiboot development <skiboot.lists.ozlabs.org>",
        "List-Unsubscribe": "<https://lists.ozlabs.org/options/skiboot>,\n\t<mailto:skiboot-request@lists.ozlabs.org?subject=unsubscribe>",
        "List-Archive": "<http://lists.ozlabs.org/pipermail/skiboot/>",
        "List-Post": "<mailto:skiboot@lists.ozlabs.org>",
        "List-Help": "<mailto:skiboot-request@lists.ozlabs.org?subject=help>",
        "List-Subscribe": "<https://lists.ozlabs.org/listinfo/skiboot>,\n\t<mailto:skiboot-request@lists.ozlabs.org?subject=subscribe>",
        "Content-Type": "text/plain; charset=\"us-ascii\"",
        "Content-Transfer-Encoding": "7bit",
        "Errors-To": "skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org",
        "Sender": "\"Skiboot\"\n\t<skiboot-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org>"
    },
    "content": "vm_map / vm_unmap is a per-cpu mapping which can not nest. This returns\nan EA which is different than the PA of the memory when in vmm mode.\n\nvm_map_global / vm_unmap_global sets up globally visible 1:1 mappings.\n\nA list of global extents + a local extent per cpu is kept to describe\nactive mappings. Fault handlers look these up to install SLB/HPTE entries.\n\nThis should move toward having fewer global mappings for things kept\naround, and unmap them when finished, or better yet move to local\nmappings.\n---\n core/Makefile.inc    |   2 +-\n core/cpu.c           |  19 +-\n core/exceptions.c    |  40 ++-\n core/fast-reboot.c   |  30 +-\n core/flash.c         |   4 +-\n core/init.c          | 169 +++++++--\n core/mem_region.c    |  76 ++--\n core/opal.c          |  20 +-\n core/vm.c            | 812 +++++++++++++++++++++++++++++++++++++++++++\n hdata/spira.c        |  21 +-\n hw/fake-nvram.c      |  12 +-\n hw/homer.c           |   5 +\n hw/lpc-uart.c        |  31 +-\n hw/lpc.c             |   2 +\n hw/phb4.c            |   9 +-\n hw/psi.c             |   2 +\n hw/slw.c             |   4 +-\n hw/xive.c            |   5 +\n hw/xscom.c           |   4 +-\n include/cmpxchg.h    |   3 +\n include/cpu.h        |  22 ++\n include/elf-abi.h    |  20 +-\n include/io.h         |  57 ++-\n include/mem_region.h |   1 +\n include/processor.h  |  13 +-\n include/skiboot.h    |  27 ++\n libstb/container.c   |  12 +-\n skiboot.lds.S        |  56 +--\n 28 files changed, 1354 insertions(+), 124 deletions(-)\n create mode 100644 core/vm.c",
    "diff": "diff --git a/core/Makefile.inc b/core/Makefile.inc\nindex 21c12fb8d..cdc4adb8e 100644\n--- a/core/Makefile.inc\n+++ b/core/Makefile.inc\n@@ -1,7 +1,7 @@\n # -*-Makefile-*-\n \n SUBDIRS += core\n-CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o\n+CORE_OBJS = relocate.o console.o stack.o init.o chip.o mem_region.o vm.o\n CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o timebase.o\n CORE_OBJS += opal-msg.o pci.o pci-virt.o pci-slot.o pcie-slot.o\n CORE_OBJS += pci-opal.o fast-reboot.o device.o exceptions.o trace.o affinity.o\ndiff --git a/core/cpu.c b/core/cpu.c\nindex 54111a954..08bc78d7f 100644\n--- a/core/cpu.c\n+++ b/core/cpu.c\n@@ -389,6 +389,7 @@ static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)\n \tuint64_t lpcr = mfspr(SPR_LPCR) & ~SPR_LPCR_P8_PECE;\n \tstruct cpu_thread *cpu = this_cpu();\n \tunsigned int vec = 0;\n+\tbool vm_setup = cpu->vm_setup;\n \n \tif (!pm_enabled) {\n \t\tprlog_once(PR_DEBUG, \"cpu_idle_p8 called pm disabled\\n\");\n@@ -429,8 +430,13 @@ static unsigned int cpu_idle_p8(enum cpu_wake_cause wake_on)\n \t}\n \tisync();\n \n+\tif (vm_setup)\n+\t\tvm_exit();\n \t/* Enter nap */\n \tvec = enter_p8_pm_state(false);\n+\tmtmsrd(MSR_RI, 1);\n+\tif (vm_setup)\n+\t\tvm_enter();\n \n skip_sleep:\n \t/* Restore */\n@@ -485,15 +491,24 @@ static unsigned int cpu_idle_p9(enum cpu_wake_cause wake_on)\n \tisync();\n \n \tif (sreset_enabled) {\n+\t\tbool vm_setup = cpu->vm_setup;\n+\n \t\t/* stop with EC=1 (sreset) and ESL=1 (enable thread switch). */\n \t\t/* PSSCR SD=0 ESL=1 EC=1 PSSL=0 TR=3 MTL=0 RL=1 */\n \t\tpsscr = PPC_BIT(42) | PPC_BIT(43) |\n \t\t\tPPC_BITMASK(54, 55) | PPC_BIT(63);\n+\t\tif (vm_setup)\n+\t\t\tvm_exit();\n \t\tvec = enter_p9_pm_state(psscr);\n+\t\t/* XXX don't enable VM if 0x100 or 0x200 */\n+\t\tmtmsrd(MSR_RI, 1);\n+\t\tif (vm_setup)\n+\t\t\tvm_enter();\n \t} else {\n \t\t/* stop with EC=0 (resumes) which does not require sreset. */\n \t\t/* PSSCR SD=0 ESL=0 EC=0 PSSL=0 TR=3 MTL=0 RL=1 */\n \t\tpsscr = PPC_BITMASK(54, 55) | PPC_BIT(63);\n+\t\t/* Can run with VM enabled */\n \t\tenter_p9_pm_lite_state(psscr);\n \t}\n \n@@ -536,12 +551,10 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on)\n \t\tdefault:\n \t\t\tbreak;\n \t\t}\n-\t\tmtmsrd(MSR_RI, 1);\n \n \t} else if (vec == 0x200) {\n \t\texception_entry_pm_mce();\n \t\tenable_machine_check();\n-\t\tmtmsrd(MSR_RI, 1);\n \t}\n }\n \n@@ -1374,7 +1387,7 @@ static int64_t opal_return_cpu(void)\n \t\tprintf(\"OPAL in_opal_call=%u\\n\", this_cpu()->in_opal_call);\n \t}\n \n-\t__secondary_cpu_entry();\n+\t__return_cpu_entry();\n \n \treturn OPAL_HARDWARE; /* Should not happen */\n }\ndiff --git a/core/exceptions.c b/core/exceptions.c\nindex 5e453264e..89b4451ab 100644\n--- a/core/exceptions.c\n+++ b/core/exceptions.c\n@@ -98,6 +98,41 @@ void exception_entry(struct stack_frame *stack)\n \t\t\t\"Fatal MCE at \"REG\"   \", nip);\n \t\tbreak;\n \n+\tcase 0x300:\n+\t\tif (vm_dsi(nip, stack->dar, !!(stack->dsisr & DSISR_ISSTORE)))\n+\t\t\tgoto out;\n+\t\tfatal = true;\n+\t\tl += snprintf(buf + l, EXCEPTION_MAX_STR - l,\n+\t\t\t\"Fatal %s address \"REG\" at \"REG\"   \",\n+\t\t\t(stack->dsisr & DSISR_ISSTORE) ? \"store\" : \"load\",\n+\t\t\tstack->dar, nip);\n+\t\tbreak;\n+\n+\tcase 0x380:\n+\t\tif (vm_dslb(nip, stack->dar))\n+\t\t\tgoto out;\n+\t\tfatal = true;\n+\t\tl += snprintf(buf + l, EXCEPTION_MAX_STR - l,\n+\t\t\t\"Fatal load/store address \"REG\" at \"REG\"   \",\n+\t\t\tstack->dar, nip);\n+\t\tbreak;\n+\n+\tcase 0x400:\n+\t\tif (vm_isi(nip))\n+\t\t\tgoto out;\n+\t\tfatal = true;\n+\t\tl += snprintf(buf + l, EXCEPTION_MAX_STR - l,\n+\t\t\t\"Fatal ifetch at \"REG\"   \", nip);\n+\t\tbreak;\n+\n+\tcase 0x480:\n+\t\tif (vm_islb(nip))\n+\t\t\tgoto out;\n+\t\tfatal = true;\n+\t\tl += snprintf(buf + l, EXCEPTION_MAX_STR - l,\n+\t\t\t\"Fatal ifetch at \"REG\"   \", nip);\n+\t\tbreak;\n+\n \tdefault:\n \t\tfatal = true;\n \t\tprerror(\"***********************************************\\n\");\n@@ -110,10 +145,11 @@ void exception_entry(struct stack_frame *stack)\n \tprerror(\"%s\\n\", buf);\n \tdump_regs(stack);\n \n+\tif (!fatal)\n+\t\tbacktrace();\n+out:\n \tif (fatal)\n \t\tabort();\n-\telse\n-\t\tbacktrace();\n \n \tif (hv) {\n \t\t/* Set up for SRR return */\ndiff --git a/core/fast-reboot.c b/core/fast-reboot.c\nindex 07f83a30f..c10b78d33 100644\n--- a/core/fast-reboot.c\n+++ b/core/fast-reboot.c\n@@ -355,6 +355,9 @@ void __noreturn fast_reboot_entry(void)\n \t * up and go processing jobs.\n \t */\n \tif (this_cpu() != boot_cpu) {\n+\t\tcleanup_cpu_state();\n+\n+\t\tsync();\n \t\tif (!fast_boot_release) {\n \t\t\tsmt_lowest();\n \t\t\twhile (!fast_boot_release)\n@@ -362,9 +365,6 @@ void __noreturn fast_reboot_entry(void)\n \t\t\tsmt_medium();\n \t\t}\n \t\tsync();\n-\t\tcleanup_cpu_state();\n-\t\tenable_machine_check();\n-\t\tmtmsrd(MSR_RI, 1);\n \n \t\t__secondary_cpu_entry();\n \t}\n@@ -379,15 +379,22 @@ void __noreturn fast_reboot_entry(void)\n \tif (proc_gen == proc_gen_p9)\n \t\txive_reset();\n \n+\t/* Cleanup ourselves */\n+\tcleanup_cpu_state();\n+\n+\t/* XXX: need this? */\n+\tenable_machine_check();\n+\tmtmsrd(MSR_RI, 1);\n+\n+\t/* Enter virtual memory mode */\n+\tvm_init();\n+\n \tprlog(PR_INFO, \"RESET: Releasing secondaries...\\n\");\n \n \t/* Release everybody */\n \tsync();\n \tfast_boot_release = true;\n \n-\t/* Cleanup ourselves */\n-\tcleanup_cpu_state();\n-\n \t/* Set our state to active */\n \tsync();\n \tthis_cpu()->state = cpu_state_active;\n@@ -414,6 +421,7 @@ void __noreturn fast_reboot_entry(void)\n \tcpu_set_ipi_enable(true);\n \n \tif (!chip_quirk(QUIRK_MAMBO_CALLOUTS)) {\n+\t\tvoid *t;\n \t\t/*\n \t\t * mem_region_clear_unused avoids these preload regions\n \t\t * so it can run along side image preloading. Clear these\n@@ -423,8 +431,14 @@ void __noreturn fast_reboot_entry(void)\n \t\t * Mambo may have embedded payload here, so don't clear\n \t\t * it at all.\n \t\t */\n-\t\tmemset(KERNEL_LOAD_BASE, 0, KERNEL_LOAD_SIZE);\n-\t\tmemset(INITRAMFS_LOAD_BASE, 0, INITRAMFS_LOAD_SIZE);\n+\n+\t\tt = vm_map((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE, true);\n+\t\tmemset(t, 0, KERNEL_LOAD_SIZE);\n+\t\tvm_unmap((unsigned long)t, KERNEL_LOAD_SIZE);\n+\n+\t\tt = vm_map((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE, true);\n+\t\tmemset(t, 0, INITRAMFS_LOAD_SIZE);\n+\t\tvm_unmap((unsigned long)t, INITRAMFS_LOAD_SIZE);\n \t}\n \n \t/* Start preloading kernel and ramdisk */\ndiff --git a/core/flash.c b/core/flash.c\nindex 3da6d4a42..420ae3244 100644\n--- a/core/flash.c\n+++ b/core/flash.c\n@@ -762,9 +762,11 @@ done_reading:\n \t * Verify and measure the retrieved PNOR partition as part of the\n \t * secure boot and trusted boot requirements\n \t */\n+#if 0\n+// XXX: this chekstops\n \tsecureboot_verify(id, buf, *len);\n \ttrustedboot_measure(id, buf, *len);\n-\n+#endif\n \t/* Find subpartition */\n \tif (subid != RESOURCE_SUBID_NONE) {\n \t\tmemmove(buf, bufp, content_size);\ndiff --git a/core/init.c b/core/init.c\nindex 3db9df314..0fad02f67 100644\n--- a/core/init.c\n+++ b/core/init.c\n@@ -91,6 +91,7 @@ static bool try_load_elf64_le(struct elf_hdr *header)\n \tuint64_t load_base = (uint64_t)kh;\n \tstruct elf64_phdr *ph;\n \tunsigned int i;\n+\tbool ret = false;\n \n \tprintf(\"INIT: 64-bit LE kernel discovered\\n\");\n \n@@ -102,6 +103,9 @@ static bool try_load_elf64_le(struct elf_hdr *header)\n \t * but it will not work for any ELF binary.\n \t */\n \tph = (struct elf64_phdr *)(load_base + le64_to_cpu(kh->e_phoff));\n+\tvm_map_global(\"KERNEL ELF Program Headers\", (unsigned long)ph,\n+\t\t\tle16_to_cpu(kh->e_phnum)*sizeof(struct elf64_phdr),\n+\t\t\tfalse, false);\n \tfor (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {\n \t\tif (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)\n \t\t\tcontinue;\n@@ -118,7 +122,7 @@ static bool try_load_elf64_le(struct elf_hdr *header)\n \n \tif (!kernel_entry) {\n \t\tprerror(\"INIT: Failed to find kernel entry !\\n\");\n-\t\treturn false;\n+\t\tgoto out_unmap;\n \t}\n \tkernel_entry += load_base;\n \tkernel_32bit = false;\n@@ -130,7 +134,12 @@ static bool try_load_elf64_le(struct elf_hdr *header)\n \tprlog(PR_DEBUG, \"INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\\n\",\n \t      kernel_entry, kernel_size);\n \n-\treturn true;\n+\tret = true;\n+\n+out_unmap:\n+\tvm_unmap_global((unsigned long)ph, le16_to_cpu(kh->e_phnum)*sizeof(struct elf64_phdr));\n+\n+\treturn ret;\n }\n \n static bool try_load_elf64(struct elf_hdr *header)\n@@ -140,12 +149,17 @@ static bool try_load_elf64(struct elf_hdr *header)\n \tstruct elf64_phdr *ph;\n \tstruct elf64_shdr *sh;\n \tunsigned int i;\n+\tbool ret = false;\n+\n+\tvm_map_global(\"KERNEL ELF64 Header\", (unsigned long)header,\n+\t\t\tsizeof(struct elf64_hdr), false, false);\n \n \t/* Check it's a ppc64 LE ELF */\n \tif (kh->ei_ident == ELF_IDENT\t\t&&\n \t    kh->ei_data == ELF_DATA_LSB\t\t&&\n \t    kh->e_machine == le16_to_cpu(ELF_MACH_PPC64)) {\n-\t\treturn try_load_elf64_le(header);\n+\t\tret = try_load_elf64_le(header);\n+\t\tgoto out_unmap1;\n \t}\n \n \t/* Check it's a ppc64 ELF */\n@@ -153,7 +167,7 @@ static bool try_load_elf64(struct elf_hdr *header)\n \t    kh->ei_data != ELF_DATA_MSB\t\t||\n \t    kh->e_machine != ELF_MACH_PPC64) {\n \t\tprerror(\"INIT: Kernel doesn't look like an ppc64 ELF\\n\");\n-\t\treturn false;\n+\t\tgoto out_unmap1;\n \t}\n \n \t/* Look for a loadable program header that has our entry in it\n@@ -164,6 +178,8 @@ static bool try_load_elf64(struct elf_hdr *header)\n \t * but it will not work for any ELF binary.\n \t */\n \tph = (struct elf64_phdr *)(load_base + kh->e_phoff);\n+\tvm_map_global(\"KERNEL ELF Program Headers\", (unsigned long)ph,\n+\t\t\tkh->e_phnum*sizeof(struct elf64_phdr), false, false);\n \tfor (i = 0; i < kh->e_phnum; i++, ph++) {\n \t\tif (ph->p_type != ELF_PTYPE_LOAD)\n \t\t\tcontinue;\n@@ -178,7 +194,7 @@ static bool try_load_elf64(struct elf_hdr *header)\n \n \tif (!kernel_entry) {\n \t\tprerror(\"INIT: Failed to find kernel entry !\\n\");\n-\t\treturn false;\n+\t\tgoto out_unmap2;\n \t}\n \n \t/* For the normal big-endian ELF ABI, the kernel entry points\n@@ -188,6 +204,8 @@ static bool try_load_elf64(struct elf_hdr *header)\n \t * to assuming it obeys the ABI.\n \t */\n \tsh = (struct elf64_shdr *)(load_base + kh->e_shoff);\n+\tvm_map_global(\"KERNEL ELF Section Headers\", (unsigned long)sh,\n+\t\t\tkh->e_shnum*sizeof(struct elf64_shdr), false, false);\n \tfor (i = 0; i < kh->e_shnum; i++, sh++) {\n \t\tif (sh->sh_addr <= kh->e_entry &&\n \t\t      (sh->sh_addr + sh->sh_size) > kh->e_entry)\n@@ -208,7 +226,15 @@ static bool try_load_elf64(struct elf_hdr *header)\n \tprintf(\"INIT: 64-bit kernel entry at 0x%llx, size 0x%lx\\n\",\n \t       kernel_entry, kernel_size);\n \n-\treturn true;\n+\tret = true;\n+\n+\tvm_unmap_global((unsigned long)sh, kh->e_shnum*sizeof(struct elf64_shdr));\n+out_unmap2:\n+\tvm_unmap_global((unsigned long)ph, kh->e_phnum*sizeof(struct elf64_phdr));\n+out_unmap1:\n+\tvm_unmap_global((unsigned long)header, sizeof(struct elf64_hdr));\n+\n+\treturn ret;\n }\n \n static bool try_load_elf32_le(struct elf_hdr *header)\n@@ -321,6 +347,7 @@ bool start_preload_kernel(void)\n \tint loaded;\n \n \t/* Try to load an external kernel payload through the platform hooks */\n+\tvm_map_global(\"KERNEL\", (unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE, true, false);\n \tkernel_size = KERNEL_LOAD_SIZE;\n \tloaded = start_preload_resource(RESOURCE_ID_KERNEL,\n \t\t\t\t\tRESOURCE_SUBID_NONE,\n@@ -329,9 +356,11 @@ bool start_preload_kernel(void)\n \tif (loaded != OPAL_SUCCESS) {\n \t\tprintf(\"INIT: platform start load kernel failed\\n\");\n \t\tkernel_size = 0;\n+\t\tvm_unmap_global((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE);\n \t\treturn false;\n \t}\n \n+\tvm_map_global(\"INITRAMFS\", (unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE, true, false);\n \tinitramfs_size = INITRAMFS_LOAD_SIZE;\n \tloaded = start_preload_resource(RESOURCE_ID_INITRAMFS,\n \t\t\t\t\tRESOURCE_SUBID_NONE,\n@@ -339,6 +368,7 @@ bool start_preload_kernel(void)\n \tif (loaded != OPAL_SUCCESS) {\n \t\tprintf(\"INIT: platform start load initramfs failed\\n\");\n \t\tinitramfs_size = 0;\n+\t\tvm_unmap_global((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE);\n \t\treturn false;\n \t}\n \n@@ -348,13 +378,16 @@ bool start_preload_kernel(void)\n static bool load_kernel(void)\n {\n \tvoid *stb_container = NULL;\n-\tstruct elf_hdr *kh;\n+\tstruct elf_hdr *kh, *t;\n+\tuint32_t ei_ident;\n+\tuint8_t ei_class;\n \tint loaded;\n \n \tprlog(PR_NOTICE, \"INIT: Waiting for kernel...\\n\");\n \n \tloaded = wait_for_resource_loaded(RESOURCE_ID_KERNEL,\n \t\t\t\t\t  RESOURCE_SUBID_NONE);\n+\tvm_unmap_global((unsigned long)KERNEL_LOAD_BASE, KERNEL_LOAD_SIZE);\n \n \tif (loaded != OPAL_SUCCESS) {\n \t\tprintf(\"INIT: platform wait for kernel load failed\\n\");\n@@ -370,8 +403,10 @@ static bool load_kernel(void)\n \t\t\t\t((uint64_t)__builtin_kernel_start) -\n \t\t\t\tSKIBOOT_BASE + boot_offset;\n \t\t\tprintf(\"Using built-in kernel\\n\");\n+\t\t\tvm_map_global(\"KERNEL\", (unsigned long)KERNEL_LOAD_BASE, kernel_size, true, false);\n \t\t\tmemmove(KERNEL_LOAD_BASE, (void*)builtin_base,\n \t\t\t\tkernel_size);\n+\t\t\tvm_unmap_global((unsigned long)KERNEL_LOAD_BASE, kernel_size);\n \t\t}\n \t}\n \n@@ -387,7 +422,7 @@ static bool load_kernel(void)\n \t\tif (kernel_entry < EXCEPTION_VECTORS_END) {\n \t\t\tcpu_set_sreset_enable(false);\n \t\t\tmemcpy_null(NULL, old_vectors, EXCEPTION_VECTORS_END);\n-\t\t\tsync_icache();\n+\t\t\tsync_icache(0);\n \t\t} else {\n \t\t\t/* Hack for STB in Mambo, assume at least 4kb in mem */\n \t\t\tif (!kernel_size)\n@@ -418,15 +453,20 @@ static bool load_kernel(void)\n \t      \"INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\\n\",\n \t      kernel_size);\n \n-\tif (kh->ei_ident != ELF_IDENT) {\n+\tt = vm_map((unsigned long)kh, sizeof(*kh), false);\n+\tei_ident = t->ei_ident;\n+\tei_class = t->ei_class;\n+\tvm_unmap((unsigned long)t, sizeof(*kh));\n+\n+\tif (ei_ident != ELF_IDENT) {\n \t\tprerror(\"INIT: ELF header not found. Assuming raw binary.\\n\");\n \t\treturn true;\n \t}\n \n-\tif (kh->ei_class == ELF_CLASS_64) {\n+\tif (ei_class == ELF_CLASS_64) {\n \t\tif (!try_load_elf64(kh))\n \t\t\treturn false;\n-\t} else if (kh->ei_class == ELF_CLASS_32) {\n+\t} else if (ei_class == ELF_CLASS_32) {\n \t\tif (!try_load_elf32(kh))\n \t\t\treturn false;\n \t} else {\n@@ -454,7 +494,7 @@ static void load_initramfs(void)\n \n \tloaded = wait_for_resource_loaded(RESOURCE_ID_INITRAMFS,\n \t\t\t\t\t  RESOURCE_SUBID_NONE);\n-\n+\tvm_unmap_global((unsigned long)INITRAMFS_LOAD_BASE, INITRAMFS_LOAD_SIZE);\n \tif (loaded != OPAL_SUCCESS || !initramfs_size)\n \t\treturn;\n \n@@ -526,6 +566,7 @@ void __noreturn load_and_boot_kernel(bool is_reboot)\n \tconst struct dt_property *memprop;\n \tconst char *cmdline, *stdoutp;\n \tuint64_t mem_top;\n+\tuint32_t *t;\n \n \tmemprop = dt_find_property(dt_root, DT_PRIVATE \"maxmem\");\n \tif (memprop)\n@@ -619,11 +660,13 @@ void __noreturn load_and_boot_kernel(bool is_reboot)\n \n \tfdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);\n \n+\tt = vm_map(kernel_entry, 4, false);\n \t/* Check there is something there before we branch to it */\n-\tif (*(uint32_t *)kernel_entry == 0) {\n+\tif (*t == 0) {\n \t\tprlog(PR_EMERG, \"FATAL: Kernel is zeros, can't execute!\\n\");\n \t\tassert(0);\n \t}\n+\tvm_unmap(kernel_entry, 4);\n \n \t/* Take processors out of nap */\n \tcpu_set_sreset_enable(false);\n@@ -632,6 +675,9 @@ void __noreturn load_and_boot_kernel(bool is_reboot)\n \tprintf(\"INIT: Starting kernel at 0x%llx, fdt at %p %u bytes\\n\",\n \t       kernel_entry, fdt, fdt_totalsize(fdt));\n \n+\t/* Go back to realmode and tear down our VM before booting kernel */\n+\tvm_destroy();\n+\n \t/* Disable machine checks on all */\n \tcpu_disable_ME_RI_all();\n \n@@ -798,34 +844,55 @@ static void setup_branch_null_catcher(void)\n \n void copy_sreset_vector(void)\n {\n+\tstatic char patch[0x100];\n \tuint32_t *src, *dst;\n+\tuint32_t *t;\n+\tuint32_t len = (void *)&reset_patch_end - (void *)&reset_patch_start;\n \n \t/* Copy the reset code over the entry point. */\n \tsrc = &reset_patch_start;\n+\tt = vm_map((unsigned long)src, len, false);\n+\tmemcpy(patch, t, len);\n+\tvm_unmap((unsigned long)src, len);\n+\n \tdst = (uint32_t *)0x100;\n-\twhile(src < &reset_patch_end)\n-\t\t*(dst++) = *(src++);\n-\tsync_icache();\n+\tt = vm_map((unsigned long)dst, len, true);\n+\tmemcpy(t, patch, len);\n+\tsync_icache((unsigned long)t);\n+\tvm_unmap((unsigned long)dst, len);\n }\n \n void copy_sreset_vector_fast_reboot(void)\n {\n+\tstatic char patch[0x100];\n \tuint32_t *src, *dst;\n+\tuint32_t *t;\n+\tuint32_t len = (void *)&reset_fast_reboot_patch_end -\n+\t\t\t(void *)&reset_fast_reboot_patch_start;\n \n \t/* Copy the reset code over the entry point. */\n \tsrc = &reset_fast_reboot_patch_start;\n+\tt = vm_map((unsigned long)src, len, false);\n+\tmemcpy(patch, t, len);\n+\tvm_unmap((unsigned long)src, len);\n+\n \tdst = (uint32_t *)0x100;\n-\twhile(src < &reset_fast_reboot_patch_end)\n-\t\t*(dst++) = *(src++);\n-\tsync_icache();\n+\tt = vm_map((unsigned long)dst, len, true);\n+\tmemcpy(t, patch, len);\n+\tsync_icache((unsigned long)t);\n+\tvm_unmap((unsigned long)dst, len);\n }\n \n void copy_exception_vectors(void)\n {\n+\tvoid *t;\n+\n+\tt = vm_map(0x0, 0x2000, true);\n+\n \t/* Backup previous vectors as this could contain a kernel\n \t * image.\n \t */\n-\tmemcpy_null(old_vectors, NULL, EXCEPTION_VECTORS_END);\n+\tmemcpy(old_vectors, t, EXCEPTION_VECTORS_END);\n \n \t/* Copy from 0x100 to EXCEPTION_VECTORS_END, avoid below 0x100 as\n \t * this is the boot flag used by CPUs still potentially entering\n@@ -833,9 +900,10 @@ void copy_exception_vectors(void)\n \t */\n \tBUILD_ASSERT((&reset_patch_end - &reset_patch_start) <\n \t\t\tEXCEPTION_VECTORS_END - 0x100);\n-\tmemcpy((void *)0x100, (void *)(SKIBOOT_BASE + 0x100),\n+\tmemcpy(t + 0x100, (void *)(SKIBOOT_BASE + 0x100),\n \t\t\tEXCEPTION_VECTORS_END - 0x100);\n-\tsync_icache();\n+\tsync_icache((unsigned long)t);\n+\tvm_unmap(0x0, 0x2000);\n }\n \n static void per_thread_sanity_checks(void)\n@@ -899,16 +967,25 @@ static uint32_t romem_csum;\n \n static void checksum_romem(void)\n {\n+\tvoid *t;\n+\tunsigned long size;\n \tuint32_t csum;\n \n \tromem_csum = 0;\n \tif (chip_quirk(QUIRK_SLOW_SIM))\n \t\treturn;\n \n-\tcsum = mem_csum(_start, _romem_end);\n+\tsize = (unsigned long)_romem_end - (unsigned long)_start;\n+\tt = vm_map((unsigned long)_start, size, false);\n+\tcsum = mem_csum(t, t + size);\n \tromem_csum ^= csum;\n-\tcsum = mem_csum(__builtin_kernel_start, __builtin_kernel_end);\n+\tvm_unmap((unsigned long)_start, size);\n+\n+\tsize = (unsigned long)__builtin_kernel_end - (unsigned long)__builtin_kernel_start;\n+\tt = vm_map((unsigned long)__builtin_kernel_start, size, false);\n+\tcsum = mem_csum(t, t + size);\n \tromem_csum ^= csum;\n+\tvm_unmap((unsigned long)__builtin_kernel_start, size);\n }\n \n bool verify_romem(void)\n@@ -984,7 +1061,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)\n \tprlog(PR_DEBUG, \"initial console log level: memory %d, driver %d\\n\",\n \t       (debug_descriptor.console_log_levels >> 4),\n \t       (debug_descriptor.console_log_levels & 0x0f));\n-\tprlog(PR_TRACE, \"OPAL is Powered By Linked-List Technology.\\n\");\n+\tprlog(PR_TRACE, \"OPAL is Powered By Linked-List Technology. Now with more indirection.\\n\");\n \n #ifdef SKIBOOT_GCOV\n \tskiboot_gcov_done();\n@@ -996,6 +1073,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)\n \t/* Now locks can be used */\n \tinit_locks();\n \n+\t/* Enter virtual memory mode */\n+\tvm_init();\n+\n \t/* Create the OPAL call table early on, entries can be overridden\n \t * later on (FSP console code for example)\n \t */\n@@ -1021,7 +1101,20 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)\n \t\tif (parse_hdat(false) < 0)\n \t\t\tabort();\n \t} else {\n+\t\tvoid *t;\n+\t\tuint32_t size;\n+\n+\t\tt = vm_map((unsigned long)fdt, sizeof(struct fdt_header), false);\n+\t\tsize = fdt_totalsize(t);\n+\t\tvm_unmap((unsigned long)fdt, sizeof(struct fdt_header));\n+\n+\t\t/*\n+\t\t * Would be nice to make this a local map, but it seems\n+\t\t * to need to be expanded in place.\n+\t\t */\n+\t\tvm_map_global(\"fdt\", (unsigned long)fdt, size, false, false);\n \t\tdt_expand(fdt);\n+\t\tvm_unmap_global((unsigned long)fdt, size);\n \t}\n \tdt_add_cpufeatures(dt_root);\n \n@@ -1072,6 +1165,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)\n \t */\n \tinit_cpu_max_pir();\n \n+\tvm_init_stacks();\n+\n \t/*\n \t * Now, we init our memory map from the device-tree, and immediately\n \t * reserve areas which we know might contain data coming from\n@@ -1308,6 +1403,30 @@ void __noreturn __secondary_cpu_entry(void)\n \tenable_machine_check();\n \tmtmsrd(MSR_RI, 1);\n \n+\tvm_init_secondary();\n+\n+\t/* Some XIVE setup */\n+\txive_cpu_callin(cpu);\n+\n+\t/* Wait for work to do */\n+\twhile(true) {\n+\t\tif (cpu_check_jobs(cpu))\n+\t\t\tcpu_process_jobs();\n+\t\telse\n+\t\t\tcpu_idle_job();\n+\t}\n+}\n+\n+void __noreturn __return_cpu_entry(void)\n+{\n+\tstruct cpu_thread *cpu = this_cpu();\n+\n+\t/* Secondary CPU called in */\n+\tcpu_callin(cpu);\n+\n+\tenable_machine_check();\n+\tmtmsrd(MSR_RI, 1);\n+\n \t/* Some XIVE setup */\n \txive_cpu_callin(cpu);\n \ndiff --git a/core/mem_region.c b/core/mem_region.c\nindex 74551922b..fe89cedb6 100644\n--- a/core/mem_region.c\n+++ b/core/mem_region.c\n@@ -66,24 +66,27 @@ static struct mem_region skiboot_os_reserve = {\n \t.type\t\t= REGION_OS,\n };\n \n-struct mem_region skiboot_heap = {\n-\t.name\t\t= \"ibm,firmware-heap\",\n-\t.start\t\t= HEAP_BASE,\n-\t.len\t\t= HEAP_SIZE,\n-\t.type\t\t= REGION_SKIBOOT_HEAP,\n-};\n-\n static struct mem_region skiboot_code_and_text = {\n \t.name\t\t= \"ibm,firmware-code\",\n \t.start\t\t= SKIBOOT_BASE,\n \t.len\t\t= HEAP_BASE - SKIBOOT_BASE,\n+\t.vm_mapped_len\t= HEAP_BASE - SKIBOOT_BASE,\n \t.type\t\t= REGION_SKIBOOT_FIRMWARE,\n };\n \n+struct mem_region skiboot_heap = {\n+\t.name\t\t= \"ibm,firmware-heap\",\n+\t.start\t\t= HEAP_BASE,\n+\t.len\t\t= HEAP_SIZE,\n+\t.vm_mapped_len\t= HEAP_SIZE,\n+\t.type\t\t= REGION_SKIBOOT_HEAP,\n+};\n+\n static struct mem_region skiboot_after_heap = {\n \t.name\t\t= \"ibm,firmware-data\",\n \t.start\t\t= HEAP_BASE + HEAP_SIZE,\n \t.len\t\t= SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),\n+\t.vm_mapped_len\t= SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),\n \t.type\t\t= REGION_SKIBOOT_FIRMWARE,\n };\n \n@@ -153,14 +156,6 @@ static struct alloc_hdr *next_hdr(const struct mem_region *region,\n #if POISON_MEM_REGION == 1\n static void mem_poison(struct free_hdr *f)\n {\n-\tsize_t poison_size = (void*)tailer(f) - (void*)(f+1);\n-\n-\t/* We only poison up to a limit, as otherwise boot is\n-\t * kinda slow */\n-\tif (poison_size > POISON_MEM_REGION_LIMIT)\n-\t\tpoison_size = POISON_MEM_REGION_LIMIT;\n-\n-\tmemset(f+1, POISON_MEM_REGION_WITH, poison_size);\n }\n #endif\n \n@@ -168,23 +163,42 @@ static void mem_poison(struct free_hdr *f)\n static void init_allocatable_region(struct mem_region *region)\n {\n \tstruct free_hdr *f = region_start(region);\n+\tunsigned long num_longs;\n+\tunsigned long *t;\n+\n \tassert(region->type == REGION_SKIBOOT_HEAP ||\n \t       region->type == REGION_MEMORY);\n-\tf->hdr.num_longs = region->len / sizeof(long);\n+\n+\tnum_longs = region->len / sizeof(long);\n+\n+\tif (!region->vm_mapped_len) {\n+\t\t/* SKIBOOT_BASE-SIZE regions already come mapped */\n+\t\tregion->vm_mapped_len = PAGE_SIZE;\n+\t\tvm_map_global(region->name, region->start, PAGE_SIZE, true, false);\n+\t}\n+\n+\tassert(PAGE_SIZE >= sizeof(*f));\n+\tassert(region->len >= PAGE_SIZE*2);\n+\n+\tf->hdr.num_longs = num_longs;\n \tf->hdr.free = true;\n \tf->hdr.prev_free = false;\n-\t*tailer(f) = f->hdr.num_longs;\n \tlist_head_init(&region->free_list);\n \tlist_add(&region->free_list, &f->list);\n-#if POISON_MEM_REGION == 1\n+#if 0 && POISON_MEM_REGION == 1\n \tmem_poison(f);\n #endif\n+\n+\tt = vm_map((unsigned long)tailer(f), sizeof(long), true);\n+\t*t = num_longs;\n+\tvm_unmap((unsigned long)tailer(f), sizeof(long));\n }\n \n static void make_free(struct mem_region *region, struct free_hdr *f,\n \t\t      const char *location, bool skip_poison)\n {\n \tstruct alloc_hdr *next;\n+\tunsigned long *t;\n \n #if POISON_MEM_REGION == 1\n \tif (!skip_poison)\n@@ -212,7 +226,9 @@ static void make_free(struct mem_region *region, struct free_hdr *f,\n \t}\n \n \t/* Fix up tailer. */\n-\t*tailer(f) = f->hdr.num_longs;\n+\tt = vm_map((unsigned long)tailer(f), sizeof(long), true);\n+\t*t = f->hdr.num_longs;\n+\tvm_unmap((unsigned long)tailer(f), sizeof(long));\n \n \t/* If next is free, coalesce it */\n \tnext = next_hdr(region, &f->hdr);\n@@ -401,6 +417,7 @@ static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,\n \tsize_t alloc_longs, offset;\n \tstruct free_hdr *f;\n \tstruct alloc_hdr *next;\n+\tunsigned long newsz;\n \n \t/* Align must be power of 2. */\n \tassert(!((align - 1) & align));\n@@ -456,6 +473,17 @@ found:\n \t\tnext->prev_free = false;\n \t}\n \n+\tnewsz = ((void *)((unsigned long *)f + alloc_longs + offset) - region_start(region) + sizeof(struct free_hdr));\n+\tif (newsz > region->vm_mapped_len) {\n+\t\t/* TODO: unmap on free */\n+\t\tnewsz += PAGE_SIZE-1;\n+\t\tnewsz &= ~(PAGE_SIZE-1);\n+\t\tvm_map_global(location,\n+\t\t\tregion->start + region->vm_mapped_len,\n+\t\t\tnewsz - region->vm_mapped_len, true, false);\n+\t\tregion->vm_mapped_len = newsz;\n+\t}\n+\n \tif (offset != 0) {\n \t\tstruct free_hdr *pre = f;\n \n@@ -700,6 +728,7 @@ static struct mem_region *new_region(const char *name,\n \tregion->name = name;\n \tregion->start = start;\n \tregion->len = len;\n+\tregion->vm_mapped_len = 0;\n \tregion->node = node;\n \tregion->type = type;\n \tregion->free_list.n.next = NULL;\n@@ -1232,6 +1261,7 @@ void mem_region_release_unused(void)\n static void mem_clear_range(uint64_t s, uint64_t e)\n {\n \tuint64_t res_start, res_end;\n+\tvoid *t;\n \n \t/* Skip exception vectors */\n \tif (s < EXCEPTION_VECTORS_END)\n@@ -1271,7 +1301,10 @@ static void mem_clear_range(uint64_t s, uint64_t e)\n \n \tprlog(PR_DEBUG, \"Clearing region %llx-%llx\\n\",\n \t      (long long)s, (long long)e);\n-\tmemset((void *)s, 0, e - s);\n+\n+\tt = vm_map(s, e - s, true);\n+\tmemset(t, 0, e - s);\n+\tvm_unmap(s, e - s);\n }\n \n struct mem_region_clear_job_args {\n@@ -1285,7 +1318,8 @@ static void mem_region_clear_job(void *data)\n \tmem_clear_range(arg->s, arg->e);\n }\n \n-#define MEM_REGION_CLEAR_JOB_SIZE (16ULL*(1<<30))\n+/* Limited by 256MB segment size (could fix) */\n+#define MEM_REGION_CLEAR_JOB_SIZE (128ULL*(1<<20))\n \n static struct cpu_job **mem_clear_jobs;\n static struct mem_region_clear_job_args *mem_clear_job_args;\ndiff --git a/core/opal.c b/core/opal.c\nindex 3a2fbb95b..df6e70a3c 100644\n--- a/core/opal.c\n+++ b/core/opal.c\n@@ -68,7 +68,16 @@ void opal_table_init(void)\n \tprlog(PR_DEBUG, \"OPAL table: %p .. %p, branch table: %p\\n\",\n \t      s, e, opal_branch_table);\n \twhile(s < e) {\n-\t\topal_branch_table[s->token] = function_entry_address(s->func);\n+\t\tuint64_t f;\n+\t\tuint64_t *t;\n+\n+\t\tf = function_entry_address(s->func);\n+\n+\t\tt = vm_map((unsigned long)&opal_branch_table[s->token], sizeof(*t), true);\n+\n+\t\t*t = f;\n+\t\tvm_unmap((unsigned long)&opal_branch_table[s->token], sizeof(*t));\n+\n \t\topal_num_args[s->token] = s->nargs;\n \t\ts++;\n \t}\n@@ -331,9 +340,16 @@ opal_call(OPAL_QUIESCE, opal_quiesce, 2);\n \n void __opal_register(uint64_t token, void *func, unsigned int nargs)\n {\n+\tuint64_t f;\n+\tuint64_t *t;\n+\n \tassert(token <= OPAL_LAST);\n \n-\topal_branch_table[token] = function_entry_address(func);\n+\tf = function_entry_address(func);\n+\n+\tt = vm_map((unsigned long)&opal_branch_table[token], sizeof(uint64_t), true);\n+\t*t = f;\n+\tvm_unmap((unsigned long)&opal_branch_table[token], sizeof(uint64_t));\n \topal_num_args[token] = nargs;\n }\n \ndiff --git a/core/vm.c b/core/vm.c\nnew file mode 100644\nindex 000000000..1bf5e4bd8\n--- /dev/null\n+++ b/core/vm.c\n@@ -0,0 +1,812 @@\n+/* Copyright 2018 IBM Corp.\n+ *\n+ * Licensed under the Apache License, Version 2.0 (the \"License\");\n+ * you may not use this file except in compliance with the License.\n+ * You may obtain a copy of the License at\n+ *\n+ * \thttp://www.apache.org/licenses/LICENSE-2.0\n+ *\n+ * Unless required by applicable law or agreed to in writing, software\n+ * distributed under the License is distributed on an \"AS IS\" BASIS,\n+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n+ * implied.\n+ * See the License for the specific language governing permissions and\n+ * limitations under the License.\n+ */\n+\n+#include <ccan/container_of/container_of.h>\n+#include <ccan/list/list.h>\n+#include <ccan/str/str.h>\n+#include <cmpxchg.h>\n+#include <cpu.h>\n+#include <opal.h>\n+#include <skiboot.h>\n+#include <stack.h>\n+#include <timebase.h>\n+#include <trace.h>\n+\n+static bool vm_setup = false;\n+static bool vm_globals_allocated = false;\n+\n+#define SLB_SZ\t\t(256UL*1024*1024)\n+#define SLB_NR\t\t32\n+#define LOCAL_SLB_NR\t2\n+#define GLOBAL_SLB_NR\t(SLB_NR - LOCAL_SLB_NR)\n+#define LOCAL_SLB_BASE\tGLOBAL_SLB_NR\n+\n+#define LOCAL_EA_BEGIN\t0x0800000000000000ULL\n+#define LOCAL_EA_END\t0x0900000000000000ULL\n+\n+static void __nomcount slb_install(unsigned long esid, unsigned long vsid, unsigned int index)\n+{\n+\tunsigned long rs;\n+\tunsigned long rb;\n+\n+\trs = vsid << (63-51);\t\t/* 256MB VSID */\n+\trs |= 1UL << (63-53);\t\t/* Kp = 1 */\n+\n+\trb = esid << (63-35);\t\t/* 256MB ESID */\n+\trb |= 1UL << (63-36);\t\t/* V = 1 */\n+\trb |= index;\n+\n+\tasm volatile(\"slbmte %0,%1\" : : \"r\"(rs), \"r\"(rb) : \"memory\");\n+}\n+\n+#if 0\n+static void slb_remove(unsigned long esid)\n+{\n+\tasm volatile(\"isync ; slbie %0 ; isync\" : : \"r\"(esid << 28) : \"memory\");\n+}\n+#endif\n+\n+static void slb_remove_all(void)\n+{\n+\tasm volatile(\"isync ; slbmte %0,%0 ; slbia ; isync\" : : \"r\"(0) : \"memory\");\n+}\n+\n+static void __nomcount slb_add(unsigned long ea)\n+{\n+\tstruct cpu_thread *cpu = this_cpu();\n+\tuint64_t esid = ea >> 28;\n+\tuint64_t vsid = ea >> 28;\n+\n+\tslb_install(esid, vsid, cpu->vm_slb_rr);\n+\n+\tcpu->vm_slb_rr++;\n+\tif (cpu->vm_slb_rr == GLOBAL_SLB_NR)\n+\t\tcpu->vm_slb_rr = 0;\n+}\n+\n+struct hpte {\n+\tuint64_t dword[2];\n+};\n+\n+struct hpteg {\n+\tstruct hpte hpte[8];\n+};\n+\n+static struct hpteg *htab;\n+static unsigned long htab_shift;\n+static unsigned long htab_pteg_mask;\n+\n+static struct lock htab_lock;\n+\n+static void __nomcount htab_install(unsigned long va, unsigned long pa, int rw, int ex, int ci, bool local)\n+{\n+\tunsigned long hash;\n+\tstruct hpteg *hpteg;\n+\tstruct hpte *hpte;\n+\tunsigned long ava = va >> 23;\n+\tunsigned long arpn = pa >> 12;\n+\tunsigned long dw0, dw1;\n+\tunsigned long _dw0;\n+\tunsigned long _ava;\n+\tunsigned int hstart, hend;\n+\tunsigned int i;\n+\n+\tdw0 = ava << (63-56); /* AVA = ava */\n+\tdw0 |= 0x1; /* V = 1 */\n+\tif (local)\n+\t\tdw0 |= 0x8; /* SW[0] = 1 */\n+\n+\tdw1 = (arpn << (63-43 - 8)); /* ARPN||LP = arpn */\n+\tif (!rw)\n+\t\tdw1 |= (1UL << (63 - 0)) | (1UL << (63 - 63 + 1)); /* pp = 110 */\n+\tif (!ex)\n+\t\tdw1 |= (1UL << (63 - 61)); /* N = 1 */\n+\tdw1 |= (1UL << (63 - 60 + 1)); /* WIMG = 0010 */\n+\tif (ci)\n+\t\tdw1 |= (1UL << (63 - 60)) | (1UL << (63 - 60 + 2)); /* WIMG = 0111 */\n+\tdw1 |= (1UL << (63 - 55)) | (1UL << (63 - 56)); /* R=C=1 */\n+\n+\thash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);\n+\thpteg = &htab[hash & htab_pteg_mask];\n+\n+\tlock(&htab_lock);\n+\n+\thstart = 0;\n+\thend = 7;\n+\n+\tfor (i = hstart; i <= hend; i++) {\n+\t\thpte = &hpteg->hpte[i];\n+\n+\t\t_dw0 = be64_to_cpu(hpte->dword[0]);\n+\t\tif (_dw0 & 1) {\n+\t\t\t_ava = _dw0 >> (63 - 56);\n+\t\t\tif (_ava == ava) {\n+\t\t\t\t/* Replace insertion */\n+\t\t\t\tgoto install;\n+\t\t\t}\n+\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tassert(!_dw0);\n+\t\tgoto install;\n+\t}\n+\n+\ti = mftb();\n+\ti = (i ^ (i >> 4)) & 0x7;\n+\thpte = &hpteg->hpte[i];\n+\n+install:\n+\thpte->dword[0] = 0;\n+\teieio();\n+\thpte->dword[1] = cpu_to_be64(dw1);\n+\teieio();\n+\thpte->dword[0] = cpu_to_be64(dw0);\n+\tasm volatile(\"ptesync\" ::: \"memory\");\n+\tunlock(&htab_lock);\n+}\n+\n+static void htab_remove(unsigned long va, int local)\n+{\n+\tunsigned long hash;\n+\tstruct hpteg *hpteg;\n+\tunsigned long ava = va >> 23;\n+\tunsigned long dw0;\n+\tunsigned int hstart, hend;\n+\tunsigned int i;\n+\n+\tdw0 = ava << (63-56);\n+\tdw0 |= 0x1;\n+\tif (local)\n+\t\tdw0 |= 0x8;\n+\n+\thash = ((va >> 12) & 0xffff) ^ ((va >> 28) & 0x7fffffffffUL);\n+\thpteg = &htab[hash & htab_pteg_mask];\n+\n+\tif (!local)\n+\t\tlock(&htab_lock);\n+again:\n+\thstart = 0;\n+\thend = 7;\n+\n+\tfor (i = hstart; i <= hend; i++) {\n+\t\tstruct hpte *hpte = &hpteg->hpte[i];\n+\t\tunsigned long _raw_dw0, _dw0;\n+\n+\t\t_raw_dw0 = hpte->dword[0];\n+\t\t_dw0 = be64_to_cpu(_raw_dw0);\n+\n+\t\tif (!(_dw0 & 1)) {\n+\t\t\tassert(!_raw_dw0);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tif (_dw0 != dw0) {\n+\t\t\tassert(_dw0 >> 7 != ava);\n+\t\t\tcontinue;\n+\t\t}\n+\n+\t\tif (local) {\n+\t\t\tif (__cmpxchg64(&hpte->dword[0], _raw_dw0, 0) != _raw_dw0)\n+\t\t\t\tgoto again;\n+\t\t} else {\n+\t\t\thpte->dword[0] = 0;\n+\t\t}\n+\n+\t\tbreak;\n+\t}\n+\n+\tif (local) {\n+\t\tasm volatile(\"ptesync\" ::: \"memory\");\n+\t\tasm volatile(\"tlbiel %0\" : : \"r\"(va & ~0xfffULL));\n+\t\tasm volatile(\"ptesync\" ::: \"memory\");\n+\t} else {\n+\t\tunlock(&htab_lock);\n+\t\tasm volatile(\"ptesync\" ::: \"memory\");\n+\t\tasm volatile(\"tlbie %0,%1\" : : \"r\"(va & ~0xfffULL), \"r\"(0));\n+\t\tasm volatile(\"eieio ; tlbsync ; ptesync\" ::: \"memory\");\n+\t}\n+}\n+\n+/*\n+ * Try to fix problems in callers if !strict.\n+ */\n+static bool vm_strict = false;\n+\n+static struct list_head vm_maps = LIST_HEAD_INIT(vm_maps);\n+static struct lock vm_maps_lock;\n+static unsigned long nr_vm_maps;\n+\n+static void __vm_map(const char *name, unsigned long addr, unsigned long len, unsigned long pa, bool r, bool w, bool x, bool ci, bool local)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\tstruct vm_map *new;\n+\tstruct vm_map *vmm;\n+\n+\tif (local) {\n+\t\tnew = &c->vm_local_map;\n+\t\tnew->name = name;\n+\t\tnew->address = addr;\n+\t\tnew->length = len;\n+\t\tnew->pa = pa;\n+\t\tnew->readable = r;\n+\t\tnew->writeable = w;\n+\t\tnew->executable = x;\n+\t\tnew->ci = ci;\n+\n+\t\treturn;\n+\t}\n+\n+\tnew = zalloc(sizeof(*new));\n+\tassert(new);\n+\n+\tnew->name = name;\n+\tnew->address = addr;\n+\tnew->length = len;\n+\tnew->pa = pa;\n+\tnew->readable = r;\n+\tnew->writeable = w;\n+\tnew->executable = x;\n+\tnew->ci = ci;\n+\n+\t/* Can not take a d-side fault while holding this lock */\n+\tif (vm_setup)\n+\t\tvm_exit();\n+\tlock(&vm_maps_lock);\n+\n+\tlist_for_each(&vm_maps, vmm, list) {\n+\t\tif (addr >= vmm->address + vmm->length)\n+\t\t\tcontinue;\n+\t\tif (addr + len <= vmm->address) {\n+\t\t\tlist_add_before(&vm_maps, &new->list, &vmm->list);\n+\t\t\tgoto found;\n+\t\t}\n+\n+\t\tif (!vm_strict) {\n+\t\t\tprintf(\"vm_map_global %s %lx-%lx collided with vmm:%s %llx-%llx\\n\", name, addr, addr + len, vmm->name, vmm->address, vmm->address + vmm->length);\n+\t\t\tlist_add_before(&vm_maps, &new->list, &vmm->list);\n+\t\t\tgoto found;\n+\t\t}\n+\t\tassert(0);\n+\t}\n+\tlist_add_tail(&vm_maps, &new->list);\n+found:\n+\tnr_vm_maps++;\n+\tunlock(&vm_maps_lock);\n+\tif (vm_setup)\n+\t\tvm_enter();\n+}\n+\n+static void __vm_unmap(unsigned long addr, unsigned long len, bool local)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\tunsigned long end = addr + len;\n+\tstruct vm_map *vmm;\n+\n+\tif (local) {\n+\t\tvmm = &c->vm_local_map;\n+\t\tassert(addr == vmm->address);\n+\t\tassert(len == vmm->length);\n+\t\tmemset(vmm, 0, sizeof(struct vm_map));\n+\n+\t\tif (vm_setup) {\n+\t\t\twhile (addr < end) {\n+\t\t\t\thtab_remove(addr, local);\n+\t\t\t\taddr += PAGE_SIZE;\n+\t\t\t}\n+\t\t}\n+\n+\t\treturn;\n+\t}\n+\n+\t/* Can not take a d-side fault while holding this lock */\n+\tif (vm_setup)\n+\t\tvm_exit();\n+\tlock(&vm_maps_lock);\n+\tlist_for_each(&vm_maps, vmm, list) {\n+\t\tif (addr != vmm->address)\n+\t\t\tcontinue;\n+\t\tif (len != vmm->length)\n+\t\t\tcontinue;\n+\t\tgoto found;\n+\t}\n+\tvmm = NULL;\n+\tunlock(&vm_maps_lock);\n+\tif (!vm_strict) {\n+\t\tprintf(\"unmap didn't find anything\\n\");\n+\t\tbacktrace();\n+\t\tgoto out;\n+\t}\n+\tassert(0);\n+\n+found:\n+\tlist_del(&vmm->list);\n+\n+\tif (vm_setup) {\n+\t\twhile (addr < end) {\n+\t\t\thtab_remove(addr, local);\n+\t\t\taddr += PAGE_SIZE;\n+\t\t}\n+\t}\n+\n+\tnr_vm_maps--;\n+\tunlock(&vm_maps_lock);\n+out:\n+\tif (vm_setup)\n+\t\tvm_enter();\n+\n+\tif (vmm)\n+\t\tfree(vmm);\n+}\n+\n+\n+void vm_map_global(const char *name, unsigned long addr, unsigned long len, bool rw, bool ci)\n+{\n+\t__vm_map(name, addr, len, addr, true, rw, false, ci, false);\n+}\n+\n+static void vm_map_global_text(const char *name, unsigned long addr, unsigned long len)\n+{\n+\t__vm_map(name, addr, len, addr, true, false, true, false, false);\n+}\n+\n+void vm_unmap_global(unsigned long addr, unsigned long len)\n+{\n+\t__vm_unmap(addr, len, false);\n+}\n+\n+\n+void *vm_map(unsigned long addr, unsigned long len, bool rw)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tunsigned long newaddr = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30));\n+\tunsigned long end = addr + len;\n+\tunsigned long offset = addr & (PAGE_SIZE - 1);\n+\n+\t/* Can't do nested mappings */\n+\tassert(!c->vm_local_map_inuse);\n+\tc->vm_local_map_inuse = true;\n+\n+\tif (!c->vm_setup)\n+\t\treturn (void *)addr;\n+\n+\tend = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);\n+\taddr &= ~(PAGE_SIZE - 1);\n+\tlen = end - addr;\n+\n+\tassert(len < (1 << 28)); /* same segment */\n+\n+\t__vm_map(\"local\", newaddr, len, addr, true, rw, false, false, true);\n+\n+\treturn (void *)newaddr + offset;\n+}\n+\n+void vm_unmap(unsigned long addr, unsigned long len)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tunsigned long newaddr = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30));\n+\tunsigned long end = addr + len;\n+\n+\tassert(c->vm_local_map_inuse);\n+\tc->vm_local_map_inuse = false;\n+\n+\tif (!c->vm_setup)\n+\t\treturn;\n+\n+\tend = (end + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);\n+\taddr &= ~(PAGE_SIZE - 1);\n+\tlen = end - addr;\n+\n+\tassert(len < (1 << 28)); /* same segment */\n+\n+\t__vm_unmap(newaddr, len, true);\n+}\n+\n+struct prte {\n+\tunsigned long dword[2];\n+};\n+\n+static struct prte *prtab;\n+\n+static void vm_init_cpu(void)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tunsigned long esid = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30)) >> 28;\n+\tunsigned long vsid = (LOCAL_EA_BEGIN + ((unsigned long)c->pir << 30)) >> 28;\n+\n+\tmtspr(SPR_LPCR, mfspr(SPR_LPCR) &\n+\t\t~(PPC_BITMASK(0,3) | PPC_BIT(41) | PPC_BIT(43) | PPC_BIT(54)));\n+\tmtspr(SPR_LPID, 0);\n+\tmtspr(SPR_PID, 0);\n+\tmtspr(SPR_HRMOR, 0);\n+\tmtspr(SPR_PTCR, (unsigned long)prtab);\n+\tmtspr(SPR_AMR, 0);\n+\tmtspr(SPR_IAMR, 0);\n+\tmtspr(SPR_AMOR, 0);\n+\tmtspr(SPR_UAMOR, 0);\n+\n+\tslb_remove_all();\n+\tslb_install(esid, vsid, LOCAL_SLB_BASE);\n+}\n+\n+void vm_init_secondary(void)\n+{\n+\tvm_init_cpu();\n+\tvm_enter();\n+}\n+\n+bool vm_realmode(void)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\n+\treturn !vm_setup || !c->vm_setup;\n+}\n+\n+void vm_enter(void)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\n+\tassert(vm_setup);\n+\tif (c->vm_setup) {\n+\t\tmtmsr(mfmsr() | (MSR_IR|MSR_DR));\n+\t\tprintf(\"CPU:%d vm_enter already entered\\n\", c->pir);\n+\t\tbacktrace();\n+\t\treturn;\n+\t}\n+\tc->vm_setup = true;\n+\tmtmsr(mfmsr() | (MSR_IR|MSR_DR));\n+}\n+\n+void vm_exit(void)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\n+\tassert(vm_setup);\n+\tif (!c->vm_setup) {\n+\t\tmtmsr(mfmsr() & ~(MSR_IR|MSR_DR));\n+\t\tprintf(\"CPU:%d vm_exit already exited\\n\", c->pir);\n+\t\tbacktrace();\n+\t\treturn;\n+\t}\n+\tc->vm_setup = false;\n+\tmtmsr(mfmsr() & ~(MSR_IR|MSR_DR));\n+}\n+\n+bool __nomcount vm_dslb(uint64_t nia, uint64_t dar)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\n+\tassert(vm_setup);\n+\tc->vm_setup = false;\n+\n+\t/*\n+\t * Per-cpu map ranges are bolted to per-cpu SLBs.\n+\t */\n+\tassert((dar < LOCAL_EA_BEGIN) ||\n+\t\t(dar >= LOCAL_EA_END));\n+\n+\t(void)nia;\n+\tslb_add(dar);\n+\n+\tc->vm_setup = true;\n+\n+\treturn true;\n+}\n+\n+bool __nomcount vm_islb(uint64_t nia)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\n+\tassert(vm_setup);\n+\tc->vm_setup = false;\n+\n+\tslb_add(nia);\n+\n+\tc->vm_setup = true;\n+\n+\treturn true;\n+}\n+\n+bool __nomcount vm_dsi(uint64_t nia, uint64_t dar, bool store)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\tstruct vm_map *vmm;\n+\tuint64_t pa;\n+\tbool ret = true;\n+\tbool local;\n+\n+\t(void)nia;\n+\n+\tassert(vm_setup);\n+\tc->vm_setup = false;\n+\n+\tif ((dar >= LOCAL_EA_BEGIN) && (dar < LOCAL_EA_END)) {\n+\t\tlocal = true;\n+\t\tvmm = &c->vm_local_map;\n+\t\tif (dar >= vmm->address && dar < vmm->address + vmm->length)\n+\t\t\tgoto found;\n+\t\tgoto not_found;\n+\t}\n+\n+\tlocal = false;\n+\n+\tlock(&vm_maps_lock);\n+\tlist_for_each(&vm_maps, vmm, list) {\n+\t\tassert(vmm->pa == vmm->address);\n+\t\tif (dar >= vmm->address && dar < vmm->address + vmm->length)\n+\t\t\tgoto found;\n+\t}\n+\tif (!vm_strict) {\n+\t\tif (dar >= 0x0006000000000000 && dar < 0x0007000000000000)\n+\t\t\t/* MMIO */\n+\t\t\thtab_install(dar, dar, 1, 0, 1, false);\n+\t\telse if (dar < LOCAL_EA_BEGIN)\n+\t\t\thtab_install(dar, dar, 1, 0, 0, false);\n+\t\telse\n+\t\t\tret = false;\n+\t\tunlock(&vm_maps_lock);\n+\t\tprintf(\"Page fault with no VMM at NIA:0x%016llx DAR:0x%016llx, store:%d\\n\", nia, dar, store);\n+\t\tbacktrace();\n+\t\tgoto out;\n+\t}\n+\tunlock(&vm_maps_lock);\n+not_found:\n+\tprintf(\"  vmm not found\\n\");\n+\tret = false;\n+\tassert(0);\n+\tgoto out;\n+\n+found:\n+\tpa = vmm->pa + (dar & ~(PAGE_SIZE - 1)) - vmm->address;\n+\tif (!vmm->readable) {\n+\t\tunlock(&vm_maps_lock);\n+\t\tprintf(\"  vmm not readable\\n\");\n+\t\tret = false;\n+\t\tassert(0);\n+\t\tgoto out;\n+\t}\n+\tif (store && !vmm->writeable) {\n+\t\tif (!vm_strict) {\n+\t\t\thtab_install(dar, pa, store, 0, vmm->ci, local);\n+\t\t\tunlock(&vm_maps_lock);\n+\t\t\tprintf(\"Page fault store to RO VMM:%s at NIA:0x%016llx DAR:0x%016llx\\n\", vmm->name, nia, dar);\n+\t\t\tbacktrace();\n+\t\t\tgoto out;\n+\t\t}\n+\t\tunlock(&vm_maps_lock);\n+\t\tprintf(\"  vmm not writeable\\n\");\n+\t\tret = false;\n+\t\tassert(0);\n+\t\tgoto out;\n+\t}\n+\n+\thtab_install(dar, pa, vmm->writeable, vmm->executable, vmm->ci, local);\n+\tif (!local)\n+\t\tunlock(&vm_maps_lock);\n+\n+out:\n+\tc->vm_setup = true;\n+\treturn ret;\n+}\n+\n+bool __nomcount vm_isi(uint64_t nia)\n+{\n+\tstruct cpu_thread *c = this_cpu();\n+\tbool vm_setup = c->vm_setup;\n+\n+\tassert(vm_setup);\n+\n+\tif (nia < (unsigned long)_stext)\n+\t\treturn false;\n+\tif (nia >= (unsigned long)_etext)\n+\t\treturn false;\n+\n+\tc->vm_setup = false;\n+\thtab_install(nia, nia, 0, 1, 0, false);\n+\tc->vm_setup = true;\n+\n+\treturn true;\n+}\n+\n+static void cpu_stop_vm(void *arg __unused)\n+{\n+\tvm_exit();\n+}\n+\n+static void cpu_cleanup_vm(void *arg __unused)\n+{\n+\tslb_remove_all();\n+\tmtspr(SPR_PTCR, 0);\n+}\n+\n+static void cpu_all_destroy_vm(void)\n+{\n+\tstruct cpu_thread *cpu;\n+\tstruct cpu_job **jobs;\n+\n+\tjobs = zalloc(sizeof(struct cpu_job *) * cpu_max_pir + 1);\n+\tassert(jobs);\n+\n+\t/* Stop all CPUs */\n+\tfor_each_available_cpu(cpu) {\n+\t\tif (cpu == this_cpu())\n+\t\t\tcontinue;\n+\t\tjobs[cpu->pir] = cpu_queue_job(cpu, \"cpu_stop_vm\",\n+\t\t\t\t\t\tcpu_stop_vm, NULL);\n+\t}\n+\n+\t/* this cpu */\n+\tcpu_stop_vm(NULL);\n+\n+\t/* Cleaup after all stop */\n+\tfor_each_available_cpu(cpu) {\n+\t\tif (jobs[cpu->pir])\n+\t\t\tcpu_wait_job(jobs[cpu->pir], true);\n+\t}\n+\n+\tfor_each_available_cpu(cpu) {\n+\t\tif (cpu == this_cpu())\n+\t\t\tcontinue;\n+\t\tjobs[cpu->pir] = cpu_queue_job(cpu, \"cpu_cleanup_vm\",\n+\t\t\t\t\t\tcpu_cleanup_vm, NULL);\n+\t}\n+\n+\t/* this cpu */\n+\tcpu_cleanup_vm(NULL);\n+\n+\tfor_each_available_cpu(cpu) {\n+\t\tif (jobs[cpu->pir])\n+\t\t\tcpu_wait_job(jobs[cpu->pir], true);\n+\t}\n+\n+\tfree(jobs);\n+}\n+\n+void vm_init(void)\n+{\n+\tunsigned long stack_start = SKIBOOT_BASE + SKIBOOT_SIZE;\n+\tunsigned long stack_end = stack_start + (cpu_max_pir + 1)*STACK_SIZE;\n+\tunsigned long htab_nr_bytes;\n+\tunsigned long htab_nr_ptegs;\n+\n+\tprtab = memalign(64*1024, 64*1024);\n+\tassert(prtab);\n+\tmemset(prtab, 0, 64*1024);\n+\n+\thtab_shift = 18;\n+\thtab_nr_bytes = 1UL << htab_shift;\n+\thtab_nr_ptegs = htab_nr_bytes / sizeof(struct hpteg);\n+\thtab_pteg_mask = htab_nr_ptegs - 1;\n+\thtab = memalign(1UL << htab_shift, htab_nr_bytes);\n+\tassert(htab);\n+\tmemset(htab, 0, htab_nr_bytes);\n+\n+\tprtab[0].dword[0] = cpu_to_be64((unsigned long)htab | (htab_shift - 18));\n+\tprtab[0].dword[1] = 0;\n+\n+\teieio();\n+\n+\tvm_init_cpu();\n+\n+\tcleanup_global_tlb();\n+\n+\tif (vm_globals_allocated)\n+\t\tgoto done;\n+\n+\tvm_map_global_text(\"OPAL text\", (unsigned long)_stext,\n+\t\t(unsigned long)_etext - (unsigned long)_stext);\n+\tvm_map_global(\"OPAL rodata\", (unsigned long)__rodata_start,\n+\t\t(unsigned long)__rodata_end - (unsigned long)__rodata_start,\n+\t\tfalse, false);\n+\tvm_map_global(\"OPAL data\", (unsigned long)_sdata,\n+\t\t(unsigned long)_edata - (unsigned long)_sdata,\n+\t\ttrue, false);\n+\tvm_map_global(\"OPAL bss\", (unsigned long)_sbss,\n+\t\t(unsigned long)_ebss - (unsigned long)_sbss,\n+\t\ttrue, false);\n+\tvm_map_global(\"OPAL sym map\", (unsigned long)__sym_map_start,\n+\t\t(unsigned long)__sym_map_end - (unsigned long)__sym_map_start,\n+\t\tfalse, false);\n+\tvm_map_global(\"OPAL heap\", HEAP_BASE, HEAP_SIZE, true, false);\n+\tvm_map_global(\"Memory console\", INMEM_CON_START, INMEM_CON_LEN, true, false);\n+\tvm_map_global(\"Hostboot console\", HBRT_CON_START, HBRT_CON_LEN, false, false);\n+\tvm_map_global(\"SPIRA heap\", SPIRA_HEAP_BASE, SPIRA_HEAP_SIZE, false, false);\n+\tvm_map_global(\"PSI TCE table\", PSI_TCE_TABLE_BASE, PSI_TCE_TABLE_SIZE_P8, false, false);\n+\tvm_map_global(\"OPAL boot stacks\", stack_start, stack_end - stack_start, true, false);\n+\tvm_globals_allocated = true;\n+\n+done:\n+\tif (1) {\n+\t\tstruct vm_map *vmm;\n+\t\tprintf(\"VMM: SETUP\\n\");\n+\t\tprintf(\" PRTAB:%p\\n\", prtab);\n+\t\tprintf(\" HTAB: %p\\n\", htab);\n+\t\tprintf(\" Global mappings\\n\");\n+\t\tlist_for_each(&vm_maps, vmm, list)\n+\t\t\tprintf(\"%28s 0x%08llx-0x%08llx\\n\", vmm->name,\n+\t\t\t\tvmm->address, vmm->address + vmm->length);\n+\t}\n+\n+\tvm_setup = true;\n+\n+\tvm_enter();\n+}\n+\n+void vm_init_stacks(void)\n+{\n+\tunsigned long stack_start = SKIBOOT_BASE + SKIBOOT_SIZE;\n+\tunsigned long stack_end = stack_start + (cpu_max_pir + 1)*STACK_SIZE;\n+\tstruct cpu_thread *c = this_cpu();\n+\tstruct vm_map *vmm;\n+\n+\t/* Can not take a d-side fault while holdig this lock */\n+\tif (c->vm_setup)\n+\t\tmtmsr(mfmsr() & ~MSR_DR);\n+\tlock(&vm_maps_lock);\n+\tlist_for_each(&vm_maps, vmm, list) {\n+\t\tif (vmm->address >= stack_end)\n+\t\t\tcontinue;\n+\t\tif (vmm->address + vmm->length <= stack_start)\n+\t\t\tcontinue;\n+\t\tgoto found;\n+\t}\n+\tunlock(&vm_maps_lock);\n+\tassert(0);\n+\n+found:\n+\tvmm->name = \"OPAL stacks\";\n+\tvmm->address = stack_start;\n+\tvmm->length = stack_end - stack_start;\n+\tunlock(&vm_maps_lock);\n+\tif (c->vm_setup)\n+\t\tmtmsr(mfmsr() | MSR_DR);\n+}\n+\n+void vm_destroy(void)\n+{\n+\tassert(vm_setup);\n+\n+\tif (1) {\n+\t\tstruct vm_map *vmm;\n+\t\tprintf(\"VMM: TEARDOWN\\n\");\n+\t\tprintf(\" Global mappings\\n\");\n+\t\tlist_for_each(&vm_maps, vmm, list)\n+\t\t\tprintf(\"%28s 0x%08llx-0x%08llx\\n\", vmm->name,\n+\t\t\t\tvmm->address, vmm->address + vmm->length);\n+\t}\n+\n+\tcpu_all_destroy_vm();\n+\n+\tvm_setup = false;\n+\n+\tif (0) { /* XXX: leave for VMM enabled fast-reboot */\n+\t\twhile (!list_empty(&vm_maps)) {\n+\t\t\tstruct vm_map *vmm;\n+\t\t\tvmm = list_pop(&vm_maps, struct vm_map, list);\n+\t\t\tfree(vmm);\n+\t\t}\n+\t}\n+\n+\tfree(htab);\n+\thtab = NULL;\n+\tfree(prtab);\n+\tprtab = NULL;\n+}\ndiff --git a/hdata/spira.c b/hdata/spira.c\nindex 6891a9c71..743aecfd6 100644\n--- a/hdata/spira.c\n+++ b/hdata/spira.c\n@@ -1578,11 +1578,18 @@ static void fixup_spira(void)\n \n int parse_hdat(bool is_opal)\n {\n+\tint ret = 0;\n+\n \tcpu_type = PVR_TYPE(mfspr(SPR_PVR));\n \n \tprlog(PR_DEBUG, \"Parsing HDAT...\\n\");\n \n+\tvm_map_global(\"SPIRA\", SKIBOOT_BASE + SPIRA_OFF, sizeof(spira), true, false);\n \tfixup_spira();\n+\tvm_unmap_global(SKIBOOT_BASE + SPIRA_OFF, sizeof(spira));\n+\n+\tvm_map_global(\"SPIRA\", SKIBOOT_BASE + SPIRA_OFF, sizeof(spira), false, false);\n+\tvm_map_global(\"SPIRA-H\", SKIBOOT_BASE + SPIRAH_OFF, sizeof(spirah), false, false);\n \n \t/*\n \t * Basic DT root stuff\n@@ -1603,9 +1610,12 @@ int parse_hdat(bool is_opal)\n \tdt_init_led_node();\n \n \t/* Parse SPPACA and/or PCIA */\n-\tif (!pcia_parse())\n-\t\tif (paca_parse() < 0)\n-\t\t\treturn -1;\n+\tif (!pcia_parse()) {\n+\t\tif (paca_parse() < 0) {\n+\t\t\tret = -1;\n+\t\t\tgoto out;\n+\t\t}\n+\t}\n \n \t/* IPL params */\n \tadd_iplparams();\n@@ -1652,6 +1662,9 @@ int parse_hdat(bool is_opal)\n \t\tnode_stb_parse();\n \n \tprlog(PR_DEBUG, \"Parsing HDAT...done\\n\");\n+out:\n+\tvm_unmap_global(SKIBOOT_BASE + SPIRA_OFF, sizeof(spira));\n+\tvm_unmap_global(SKIBOOT_BASE + SPIRAH_OFF, sizeof(spirah));\n \n-\treturn 0;\n+\treturn ret;\n }\ndiff --git a/hw/fake-nvram.c b/hw/fake-nvram.c\nindex 236ad5b91..97f3f31ec 100644\n--- a/hw/fake-nvram.c\n+++ b/hw/fake-nvram.c\n@@ -36,12 +36,16 @@ int fake_nvram_info(uint32_t *total_size)\n \n int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)\n {\n+\tvoid *t;\n+\n \tif (!nvram_region)\n \t\treturn -ENODEV;\n \n+\tt = vm_map(nvram_region->start + src, len, false);\n \tlock(&fake_nvram_lock);\n-\tmemcpy(dst, (void *) (nvram_region->start + src), len);\n+\tmemcpy(dst, t, len);\n \tunlock(&fake_nvram_lock);\n+\tvm_unmap(nvram_region->start + src, len);\n \n \tnvram_read_complete(true);\n \n@@ -50,12 +54,16 @@ int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len)\n \n int fake_nvram_write(uint32_t offset, void *src, uint32_t size)\n {\n+\tvoid *t;\n+\n \tif (!nvram_region)\n \t\treturn OPAL_HARDWARE;\n \n+\tt = vm_map(nvram_region->start + offset, size, true);\n \tlock(&fake_nvram_lock);\n-\tmemcpy((void *) (nvram_region->start + offset), src, size);\n+\tmemcpy(t, src, size);\n \tunlock(&fake_nvram_lock);\n+\tvm_unmap(nvram_region->start + offset, size);\n \n \treturn 0;\n }\ndiff --git a/hw/homer.c b/hw/homer.c\nindex 34ee3370d..6b51da59c 100644\n--- a/hw/homer.c\n+++ b/hw/homer.c\n@@ -121,6 +121,9 @@ static void homer_init_chip(struct proc_chip *chip)\n \n \t\tchip->homer_base = hbase;\n \t\tchip->homer_size = hsize;\n+\t\t/* slw late init and xive late init want to write to HOMER */\n+\t\t/* XXX: make it read only until then? */\n+\t\tvm_map_global(\"HOMER Image\", hbase, hsize, true, false);\n \t}\n \n \t/*\n@@ -147,6 +150,7 @@ static void homer_init_chip(struct proc_chip *chip)\n \t\tchip->slw_base = sbase;\n \t\tchip->slw_bar_size = ssize;\n \t\tchip->slw_image_size = ssize; /* will be adjusted later */\n+\t\t/* XXX */\n \t}\n \n \tif (read_pba_bar(chip, bar_occ_common, &obase, &osize)) {\n@@ -154,6 +158,7 @@ static void homer_init_chip(struct proc_chip *chip)\n \t\t      obase, osize / 0x100000);\n \t\tchip->occ_common_base = obase;\n \t\tchip->occ_common_size = osize;\n+\t\tvm_map_global(\"OCC Common Area\", obase, osize, false, false);\n \t}\n }\n \ndiff --git a/hw/lpc-uart.c b/hw/lpc-uart.c\nindex bca10e0e9..9e89050fb 100644\n--- a/hw/lpc-uart.c\n+++ b/hw/lpc-uart.c\n@@ -600,6 +600,8 @@ void early_uart_init(void)\n \tif (!mmio_uart_base)\n \t\treturn;\n \n+\tvm_map_global(\"UART MMIO\", (unsigned long)mmio_uart_base, 8, true, true);\n+\n \tclk = dt_prop_get_u32(uart_node, \"clock-frequency\");\n \tbaud = dt_prop_get_u32(uart_node, \"current-speed\");\n \n@@ -608,6 +610,7 @@ void early_uart_init(void)\n \t\tprlog(PR_DEBUG, \"UART: Using UART at %p\\n\", mmio_uart_base);\n \t} else {\n \t\tprerror(\"UART: Early init failed!\");\n+\t\tvm_unmap_global((unsigned long)mmio_uart_base, 8);\n \t\tmmio_uart_base = NULL;\n \t}\n }\n@@ -619,9 +622,6 @@ void uart_init(void)\n \tchar *path __unused;\n \tconst uint32_t *irqp;\n \n-\t/* Clean up after early_uart_init() */\n-\tmmio_uart_base = NULL;\n-\n \t/* UART lock is in the console path and thus must block\n \t * printf re-entrancy\n \t */\n@@ -639,13 +639,28 @@ void uart_init(void)\n \t * directly mapped UARTs in simulation environments\n \t */\n \tif (n->parent == dt_root) {\n+\t\tvoid *base;\n+\n \t\tprintf(\"UART: Found at root !\\n\");\n-\t\tmmio_uart_base = (void *)dt_translate_address(n, 0, NULL);\n-\t\tif (!mmio_uart_base) {\n+\n+\t\tbase = (void *)dt_translate_address(n, 0, NULL);\n+\t\tif (!base) {\n \t\t\tprintf(\"UART: Failed to translate address !\\n\");\n \t\t\treturn;\n \t\t}\n \n+\t\tif (mmio_uart_base != base) {\n+\t\t\tvoid *old;\n+\n+\t\t\tvm_map_global(\"UART MMIO\", (unsigned long)base, 8, true, true);\n+\t\t\told = mmio_uart_base;\n+\t\t\tmmio_uart_base = base;\n+\n+\t\t\t/* Clean up after early_uart_init() */\n+\t\t\tif (old)\n+\t\t\t\tvm_unmap_global((unsigned long)old, 8);\n+\t\t}\n+\n \t\t/* If it has an interrupt properly, we consider this to be\n \t\t * a direct XICS/XIVE interrupt\n \t\t */\n@@ -674,6 +689,12 @@ void uart_init(void)\n \t\t\tlpc_irq = be32_to_cpu(*irqp);\n \t\t\tprlog(PR_DEBUG, \"UART: Using LPC IRQ %d\\n\", lpc_irq);\n \t\t}\n+\n+\t\t/* Clean up after early_uart_init() */\n+\t\tif (mmio_uart_base) {\n+\t\t\tvm_unmap_global((unsigned long)mmio_uart_base, 8);\n+\t\t\tmmio_uart_base = NULL;\n+\t\t}\n \t}\n \n \ndiff --git a/hw/lpc.c b/hw/lpc.c\nindex 3f5109d73..d040e4136 100644\n--- a/hw/lpc.c\n+++ b/hw/lpc.c\n@@ -1259,6 +1259,8 @@ static void lpc_init_chip_p9(struct dt_node *opb_node)\n \tif (!lpc_node)\n \t\treturn;\n \n+\tvm_map_global(\"LPC MMIO\", addr, 0x100000000UL, true, true);\n+\n \tlpc = zalloc(sizeof(struct lpcm));\n \tassert(lpc);\n \tlpc->chip_id = gcid;\ndiff --git a/hw/phb4.c b/hw/phb4.c\nindex 9a38dc752..79037d767 100644\n--- a/hw/phb4.c\n+++ b/hw/phb4.c\n@@ -5773,6 +5773,7 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,\n \tuint64_t val, phb_bar = 0, irq_bar = 0, bar_en;\n \tuint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;\n \tuint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;\n+\tuint64_t bar_sz;\n \tuint64_t reg[4];\n \tvoid *foo;\n \tuint64_t mmio_win[4];\n@@ -5802,7 +5803,8 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,\n \tbar_en = 0;\n \n \t/* Initialize PHB register BAR */\n-\tphys_map_get(gcid, PHB4_REG_SPC, phb_num, &phb_bar, NULL);\n+\tphys_map_get(gcid, PHB4_REG_SPC, phb_num, &phb_bar, &bar_sz);\n+\tvm_map_global(\"PHB REGS\", phb_bar, bar_sz, true, true);\n \trc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,\n \t\t\t phb_bar << 8);\n \n@@ -5816,18 +5818,21 @@ static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,\n \tbar_en |= XPEC_NEST_STK_BAR_EN_PHB;\n \n \t/* Same with INT BAR (ESB) */\n-\tphys_map_get(gcid, PHB4_XIVE_ESB, phb_num, &irq_bar, NULL);\n+\tphys_map_get(gcid, PHB4_XIVE_ESB, phb_num, &irq_bar, &bar_sz);\n+\tvm_map_global(\"PHB IRQ\", irq_bar, bar_sz, true, true);\n \txscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);\n \tbar_en |= XPEC_NEST_STK_BAR_EN_INT;\n \n \n \t/* Same with MMIO windows */\n \tphys_map_get(gcid, PHB4_64BIT_MMIO, phb_num, &mmio0_bar, &mmio0_sz);\n+\tvm_map_global(\"PHB MMIO0\", mmio0_bar, mmio0_sz, true, true);\n \tmmio0_bmask =  (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;\n \txscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);\n \txscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);\n \n \tphys_map_get(gcid, PHB4_32BIT_MMIO, phb_num, &mmio1_bar, &mmio1_sz);\n+\tvm_map_global(\"PHB MMIO1\", mmio1_bar, mmio1_sz, true, true);\n \tmmio1_bmask =  (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;\n \txscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);\n \txscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);\ndiff --git a/hw/psi.c b/hw/psi.c\nindex 5435c4655..74f497801 100644\n--- a/hw/psi.c\n+++ b/hw/psi.c\n@@ -964,6 +964,8 @@ static bool psi_init_psihb(struct dt_node *psihb)\n \n \tlist_add(&psis, &psi->list);\n \n+\tvm_map_global(\"PSI\", (unsigned long)psi->regs, 0x100, true, true);\n+\n \tval = in_be64(psi->regs + PSIHB_CR);\n \tif (val & PSIHB_CR_FSP_LINK_ACTIVE) {\n \t\tlock(&psi_lock);\ndiff --git a/hw/slw.c b/hw/slw.c\nindex c872b630b..9ddb5393e 100644\n--- a/hw/slw.c\n+++ b/hw/slw.c\n@@ -166,7 +166,7 @@ static void slw_patch_reset(void)\n \t\t*(sav++) = *(dst);\n \t\t*(dst++) = *(src++);\n \t}\n-\tsync_icache();\n+\tsync_icache(0);\n }\n \n static void slw_unpatch_reset(void)\n@@ -182,7 +182,7 @@ static void slw_unpatch_reset(void)\n \t\t*(dst++) = *(sav++);\n \t\tsrc++;\n \t}\n-\tsync_icache();\n+\tsync_icache(0);\n }\n \n static bool slw_general_init(struct proc_chip *chip, struct cpu_thread *c)\ndiff --git a/hw/xive.c b/hw/xive.c\nindex a9f1e7707..ec0d1f5b3 100644\n--- a/hw/xive.c\n+++ b/hw/xive.c\n@@ -1621,6 +1621,7 @@ static bool xive_configure_bars(struct xive *x)\n \n \t/* IC BAR */\n \tphys_map_get(chip_id, XIVE_IC, 0, (uint64_t *)&x->ic_base, &x->ic_size);\n+\tvm_map_global(\"XIVE IC\", (unsigned long)x->ic_base, x->ic_size, true, true);\n \tval = (uint64_t)x->ic_base | CQ_IC_BAR_VALID;\n \tif (IC_PAGE_SIZE == 0x10000) {\n \t\tval |= CQ_IC_BAR_64K;\n@@ -1636,6 +1637,8 @@ static bool xive_configure_bars(struct xive *x)\n \t * all phys_map_get(XIVE_TM) calls.\n \t */\n \tphys_map_get(0, XIVE_TM, 0, (uint64_t *)&x->tm_base, &x->tm_size);\n+\tif (chip_id == 0)\n+\t\tvm_map_global(\"XIVE TM\", (unsigned long)x->tm_base, x->tm_size, true, true);\n \tval = (uint64_t)x->tm_base | CQ_TM_BAR_VALID;\n \tif (TM_PAGE_SIZE == 0x10000) {\n \t\tx->tm_shift = 16;\n@@ -1651,6 +1654,7 @@ static bool xive_configure_bars(struct xive *x)\n \n \t/* PC BAR. Clear first, write mask, then write value */\n \tphys_map_get(chip_id, XIVE_PC, 0, (uint64_t *)&x->pc_base, &x->pc_size);\n+\tvm_map_global(\"XIVE PC\", (unsigned long)x->pc_base, x->pc_size, true, true);\n \txive_regwx(x, CQ_PC_BAR, 0);\n \tif (x->last_reg_error)\n \t\treturn false;\n@@ -1665,6 +1669,7 @@ static bool xive_configure_bars(struct xive *x)\n \n \t/* VC BAR. Clear first, write mask, then write value */\n \tphys_map_get(chip_id, XIVE_VC, 0, (uint64_t *)&x->vc_base, &x->vc_size);\n+\tvm_map_global(\"XIVE VC\", (unsigned long)x->vc_base, x->vc_size, true, true);\n \txive_regwx(x, CQ_VC_BAR, 0);\n \tif (x->last_reg_error)\n \t\treturn false;\ndiff --git a/hw/xscom.c b/hw/xscom.c\nindex bfe51c22e..40cad2136 100644\n--- a/hw/xscom.c\n+++ b/hw/xscom.c\n@@ -869,6 +869,8 @@ void xscom_init(void)\n \t\tassert(reg);\n \n \t\tchip->xscom_base = dt_translate_address(xn, 0, NULL);\n+\t\t/* XXX: how large is this window? */\n+\t\tvm_map_global(\"XSCOM MMIO\", chip->xscom_base, 0x200000000UL, true, true);\n \n \t\t/* Grab processor type and EC level */\n \t\txscom_init_chip_info(chip);\n@@ -882,7 +884,7 @@ void xscom_init(void)\n \t\tprlog(PR_NOTICE, \"CHIP: Chip ID %04x type: %s DD%x.%x%d\\n\",\n \t\t      gcid, chip_name, chip->ec_level >> 4,\n \t\t      chip->ec_level & 0xf, chip->ec_rev);\n-\t\tprlog(PR_DEBUG, \"XSCOM: Base address: 0x%llx\\n\", chip->xscom_base);\n+\t\tprlog(PR_NOTICE, \"XSCOM: Base address: 0x%llx\\n\", chip->xscom_base);\n \t}\n \n \t/* Collect details to trigger xstop via XSCOM write */\ndiff --git a/include/cmpxchg.h b/include/cmpxchg.h\nindex 28911c08c..a46c9765b 100644\n--- a/include/cmpxchg.h\n+++ b/include/cmpxchg.h\n@@ -18,6 +18,9 @@\n #define __CMPXCHG_H\n \n #ifndef __TEST__\n+#include <stdint.h>\n+#include <processor.h>\n+\n /*\n  * Bare cmpxchg, no barriers.\n  */\ndiff --git a/include/cpu.h b/include/cpu.h\nindex 011b12bb9..7d1d35bc7 100644\n--- a/include/cpu.h\n+++ b/include/cpu.h\n@@ -25,6 +25,19 @@\n #include <stack.h>\n #include <timer.h>\n \n+struct vm_map {\n+\tstruct list_node list;\n+\n+\tconst char *name;\n+\tuint64_t address;\n+\tuint64_t pa;\n+\tuint64_t length;\n+\tbool readable;\n+\tbool writeable;\n+\tbool executable;\n+\tbool ci;\n+};\n+\n /*\n  * cpu_thread is our internal structure representing each\n  * thread in the system\n@@ -83,10 +96,19 @@ struct cpu_thread {\n \tstruct bt_entry\t\t\tstack_bot_bt[CPU_BACKTRACE_SIZE];\n \tstruct bt_metadata\t\tstack_bot_bt_metadata;\n #endif\n+\t/*\n+\t * Per-thread VM parameters\n+\t */\n+\tstruct vm_map\t\t\tvm_local_map; /* per-cpu map */\n+\tbool\t\t\t\tvm_local_map_inuse;\n+\tuint8_t\t\t\t\tvm_slb_rr; /* RR allocator */\n+\tbool\t\t\t\tvm_setup; /* virtual memory is up */\n+\n \tstruct lock\t\t\tjob_lock;\n \tstruct list_head\t\tjob_queue;\n \tuint32_t\t\t\tjob_count;\n \tbool\t\t\t\tjob_has_no_return;\n+\n \t/*\n \t * Per-core mask tracking for threads in HMI handler and\n \t * a cleanup done bit.\ndiff --git a/include/elf-abi.h b/include/elf-abi.h\nindex e8397f70a..18b5c3f07 100644\n--- a/include/elf-abi.h\n+++ b/include/elf-abi.h\n@@ -34,7 +34,15 @@\n static inline uint64_t function_entry_address(void *func)\n {\n #ifdef ELF_ABI_v2\n-\tu32 *insn = func;\n+\tu32 *i;\n+\tu32 insn;\n+\tu32 insn2;\n+\n+\ti = vm_map((unsigned long)func, sizeof(insn*2), false);\n+\tinsn = *i;\n+\tinsn2 = *(i+1);\n+\tvm_unmap((unsigned long)func, sizeof(insn*2));\n+\n \t/*\n \t * A PPC64 ABIv2 function may have a local and a global entry\n \t * point. We use the local entry point for branch tables called\n@@ -51,12 +59,12 @@ static inline uint64_t function_entry_address(void *func)\n \t * lis   r2,XXXX\n \t * addi  r2,r2,XXXX\n \t */\n-\tif ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||\n-\t     ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&\n-\t    ((*(insn+1) & OP_RT_RA_MASK) == ADDI_R2_R2))\n-\t\treturn (uint64_t)(insn + 2);\n+\tif ((((insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||\n+\t     ((insn & OP_RT_RA_MASK) == LIS_R2)) &&\n+\t    ((insn2 & OP_RT_RA_MASK) == ADDI_R2_R2))\n+\t\treturn (uint64_t)(i + 2);\n \telse\n-\t\treturn (uint64_t)func;\n+\t\treturn (uint64_t)i;\n #else\n \treturn *(uint64_t *)func;\n #endif\ndiff --git a/include/io.h b/include/io.h\nindex c056c37e4..cc8964049 100644\n--- a/include/io.h\n+++ b/include/io.h\n@@ -20,6 +20,7 @@\n #ifndef __ASSEMBLY__\n \n #include <compiler.h>\n+#include <skiboot.h>\n #include <stdint.h>\n #include <processor.h>\n #include <ccan/endian/endian.h>\n@@ -35,8 +36,14 @@\n static inline uint8_t __in_8(const volatile uint8_t *addr)\n {\n \tuint8_t val;\n-\tasm volatile(\"lbzcix %0,0,%1\" :\n+\n+\tif (vm_realmode())\n+\t\tasm volatile(\"lbzcix %0,0,%1\" :\n+\t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"lbzx %0,0,%1\" :\n \t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\n \treturn val;\n }\n \n@@ -49,8 +56,14 @@ static inline uint8_t in_8(const volatile uint8_t *addr)\n static inline uint16_t __in_be16(const volatile uint16_t *addr)\n {\n \tuint16_t val;\n-\tasm volatile(\"lhzcix %0,0,%1\" :\n+\n+\tif (vm_realmode())\n+\t\tasm volatile(\"lhzcix %0,0,%1\" :\n \t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"lhzx %0,0,%1\" :\n+\t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\n \treturn val;\n }\n \n@@ -68,8 +81,14 @@ static inline uint16_t in_le16(const volatile uint16_t *addr)\n static inline uint32_t __in_be32(const volatile uint32_t *addr)\n {\n \tuint32_t val;\n-\tasm volatile(\"lwzcix %0,0,%1\" :\n+\n+\tif (vm_realmode())\n+\t\tasm volatile(\"lwzcix %0,0,%1\" :\n+\t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"lwzx %0,0,%1\" :\n \t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\n \treturn val;\n }\n \n@@ -87,8 +106,14 @@ static inline uint32_t in_le32(const volatile uint32_t *addr)\n static inline uint64_t __in_be64(const volatile uint64_t *addr)\n {\n \tuint64_t val;\n-\tasm volatile(\"ldcix %0,0,%1\" :\n+\n+\tif (vm_realmode())\n+\t\tasm volatile(\"ldcix %0,0,%1\" :\n \t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"ldx %0,0,%1\" :\n+\t\t     \"=r\"(val) : \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\n \treturn val;\n }\n \n@@ -105,7 +130,11 @@ static inline uint64_t in_le64(const volatile uint64_t *addr)\n \n static inline void __out_8(volatile uint8_t *addr, uint8_t val)\n {\n-\tasm volatile(\"stbcix %0,0,%1\"\n+\tif (vm_realmode())\n+\t\tasm volatile(\"stbcix %0,0,%1\"\n+\t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"stbx %0,0,%1\"\n \t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n }\n \n@@ -117,7 +146,11 @@ static inline void out_8(volatile uint8_t *addr, uint8_t val)\n \n static inline void __out_be16(volatile uint16_t *addr, uint16_t val)\n {\n-\tasm volatile(\"sthcix %0,0,%1\"\n+\tif (vm_realmode())\n+\t\tasm volatile(\"sthcix %0,0,%1\"\n+\t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"sthx %0,0,%1\"\n \t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n }\n \n@@ -134,7 +167,11 @@ static inline void out_le16(volatile uint16_t *addr, uint16_t val)\n \n static inline void __out_be32(volatile uint32_t *addr, uint32_t val)\n {\n-\tasm volatile(\"stwcix %0,0,%1\"\n+\tif (vm_realmode())\n+\t\tasm volatile(\"stwcix %0,0,%1\"\n+\t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"stwx %0,0,%1\"\n \t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n }\n \n@@ -151,7 +188,11 @@ static inline void out_le32(volatile uint32_t *addr, uint32_t val)\n \n static inline void __out_be64(volatile uint64_t *addr, uint64_t val)\n {\n-\tasm volatile(\"stdcix %0,0,%1\"\n+\tif (vm_realmode())\n+\t\tasm volatile(\"stdcix %0,0,%1\"\n+\t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n+\telse\n+\t\tasm volatile(\"stdx %0,0,%1\"\n \t\t     : : \"r\"(val), \"r\"(addr), \"m\"(*addr) : \"memory\");\n }\n \ndiff --git a/include/mem_region.h b/include/mem_region.h\nindex d9e490af4..a18494d44 100644\n--- a/include/mem_region.h\n+++ b/include/mem_region.h\n@@ -46,6 +46,7 @@ struct mem_region {\n \tstruct list_node list;\n \tconst char *name;\n \tuint64_t start, len;\n+\tuint64_t vm_mapped_len;\n \tstruct dt_node *node;\n \tenum mem_region_type type;\n \tstruct list_head free_list;\ndiff --git a/include/processor.h b/include/processor.h\nindex b759752b5..56d988189 100644\n--- a/include/processor.h\n+++ b/include/processor.h\n@@ -52,7 +52,9 @@\n #define SPR_SRR1\t0x01b\t/* RW: Exception save/restore reg 1 */\n #define SPR_CFAR\t0x01c\t/* RW: Come From Address Register */\n #define SPR_AMR\t\t0x01d\t/* RW: Authority Mask Register */\n+#define SPR_PID\t\t0x030\t/* RW: PID register */\n #define SPR_IAMR\t0x03d\t/* RW: Instruction Authority Mask Register */\n+#define SPR_UAMOR\t0x09d\n #define SPR_RPR\t\t0x0ba   /* RW: Relative Priority Register */\n #define SPR_TBRL\t0x10c\t/* RO: Timebase low */\n #define SPR_TBRU\t0x10d\t/* RO: Timebase high */\n@@ -74,10 +76,12 @@\n #define SPR_HSRR1\t0x13b\t/* RW: HV Exception save/restore reg 1 */\n #define SPR_TFMR\t0x13d\n #define SPR_LPCR\t0x13e\n+#define SPR_LPID\t0x13f\t/* RW: LPID register */\n #define SPR_HMER\t0x150\t/* Hypervisor Maintenance Exception */\n #define SPR_HMEER\t0x151\t/* HMER interrupt enable mask */\n #define SPR_PCR\t\t0x152\n #define SPR_AMOR\t0x15d\n+#define SPR_PTCR\t0x1d0\t/* RW: Partition table control register */\n #define SPR_PSSCR\t0x357   /* RW: Stop status and control (ISA 3) */\n #define SPR_TSCR\t0x399\n #define SPR_HID0\t0x3f0\n@@ -93,6 +97,11 @@\n #define SPR_SRR1_PM_WAKE_SRESET\t0x100000\n #define SPR_SRR1_PM_WAKE_MCE\t0x3c0000\t/* Use reserved value for MCE */\n \n+/* Bits in DSISR */\n+\n+#define\tDSISR_ISSTORE\t\t0x02000000\n+\n+\n /* Bits in LPCR */\n \n /* Powersave Exit Cause Enable is different on each generation */\n@@ -322,9 +331,9 @@ static inline void isync(void)\n /*\n  * Cache sync\n  */\n-static inline void sync_icache(void)\n+static inline void sync_icache(unsigned long ptr)\n {\n-\tasm volatile(\"sync; icbi 0,%0; sync; isync\" : : \"r\" (0) : \"memory\");\n+\tasm volatile(\"sync; icbi 0,%0; sync; isync\" : : \"r\" (ptr) : \"memory\");\n }\n \n /*\ndiff --git a/include/skiboot.h b/include/skiboot.h\nindex 1b3bacbe7..98a69ef1d 100644\n--- a/include/skiboot.h\n+++ b/include/skiboot.h\n@@ -56,8 +56,13 @@ extern char __sym_map_end[];\n extern char _romem_end[];\n \n #ifndef __TESTING__\n+extern char _stext[], _etext[];\n /* Readonly section start and end. */\n extern char __rodata_start[], __rodata_end[];\n+extern char _sdata[], _edata[];\n+extern char __sym_map_start[], __sym_map_end[];\n+extern char _sbss[], _ebss[];\n+extern char _end[];\n \n static inline bool is_rodata(const void *p)\n {\n@@ -191,6 +196,7 @@ extern void disable_fast_reboot(const char *reason);\n extern void add_fast_reboot_dt_entries(void);\n extern void fast_reboot(void);\n extern void __noreturn __secondary_cpu_entry(void);\n+extern void __noreturn __return_cpu_entry(void);\n extern void __noreturn load_and_boot_kernel(bool is_reboot);\n extern void cleanup_local_tlb(void);\n extern void cleanup_global_tlb(void);\n@@ -341,4 +347,25 @@ extern int fake_nvram_info(uint32_t *total_size);\n extern int fake_nvram_start_read(void *dst, uint32_t src, uint32_t len);\n extern int fake_nvram_write(uint32_t offset, void *src, uint32_t size);\n \n+/* core/vm.c */\n+#define PAGE_SIZE 4096\n+\n+bool vm_realmode(void);\n+void vm_map_global(const char *name, unsigned long addr, unsigned long len, bool rw, bool ci);\n+void vm_unmap_global(unsigned long addr, unsigned long len);\n+void *vm_map(unsigned long addr, unsigned long len, bool rw);\n+void vm_unmap(unsigned long addr, unsigned long len);\n+void vm_init(void);\n+void vm_init_stacks(void);\n+void vm_destroy(void);\n+void vm_init_secondary(void);\n+void vm_enter(void);\n+void vm_exit(void);\n+void vm_exit_cleanup(void);\n+void vm_map_stacks(void);\n+bool vm_dslb(uint64_t nia, uint64_t dar);\n+bool vm_islb(uint64_t nia);\n+bool vm_dsi(uint64_t nia, uint64_t dar, bool store);\n+bool vm_isi(uint64_t nia);\n+\n #endif /* __SKIBOOT_H */\ndiff --git a/libstb/container.c b/libstb/container.c\nindex a720fbbf1..aef169e1c 100644\n--- a/libstb/container.c\n+++ b/libstb/container.c\n@@ -19,14 +19,20 @@\n \n bool stb_is_container(const void *buf, size_t size)\n {\n+\tuint32_t *t;\n \tROM_container_raw *c;\n+\tbool ret = true;;\n \n \tc = (ROM_container_raw*) buf;\n \tif (!buf || size < SECURE_BOOT_HEADERS_SIZE)\n \t\treturn false;\n-\tif (be32_to_cpu(c->magic_number) != ROM_MAGIC_NUMBER )\n-\t\treturn false;\n-\treturn true;\n+\n+\tt = vm_map((unsigned long)&c->magic_number, sizeof(*t), false);\n+\tif (be32_to_cpu(*t) != ROM_MAGIC_NUMBER)\n+\t\tret = false;\n+\tvm_unmap((unsigned long)&c->magic_number, sizeof(*t));\n+\n+\treturn ret;\n }\n \n uint32_t stb_payload_magic(const void *buf, size_t size)\ndiff --git a/skiboot.lds.S b/skiboot.lds.S\nindex 4a7727dc9..f157ebfc2 100644\n--- a/skiboot.lds.S\n+++ b/skiboot.lds.S\n@@ -95,18 +95,33 @@ SECTIONS\n \t\tKEEP(*(.cpuctrl.data))\n \t}\n \n+\t/* Relocations */\n \t. = ALIGN(0x10);\n+\t.dynamic : {\n+\t\t__dynamic_start = .;\n+\t\t*(.dynamic)\n+\t\t__dynamic_end = .;\n+\t}\n+\n+\t. = ALIGN(0x10);\n+\t.rela.dyn : {\n+\t\t__rela_dyn_start = .;\n+\t\t*(.rela*)\n+\t\t__rela_dyn_end = .;\n+\t}\n+\n+\t. = ALIGN(0x1000);\n \t_stext = .;\n  \t.text : {\n \t\t*(.text*)\n \t\t*(.sfpr .glink)\n \t}\n \t_etext = .;\n+\t. = ALIGN(0x1000);\n \n+\t__rodata_start = .;\n \t.rodata : {\n-\t\t__rodata_start = .;\n \t\t*(.rodata .rodata.*)\n-\t\t__rodata_end = .;\n \t}\n \n \t. = ALIGN(0x10);\n@@ -130,38 +145,21 @@ SECTIONS\n \t\t*(.toc)\n \t}\n \n-\t. = ALIGN(0x10);\n-\t.opal_table : {\n-\t\t__opal_table_start = .;\n-\t\tKEEP(*(.opal_table))\n-\t\t__opal_table_end = .;\n-\t}\n-\n \t.platforms : {\n \t\t__platforms_start = .;\n \t\tKEEP(*(.platforms))\n \t\t__platforms_end = .;\n \t}\n \n-\t/* Do I need to keep these ? */\n-\t.dynsym : { *(.dynsym)\t}\n-\t.dynstr : { *(.dynstr)\t}\n-\n-\t/* Relocations */\n \t. = ALIGN(0x10);\n-\t.dynamic : {\n-\t\t__dynamic_start = .;\n-\t\t*(.dynamic)\n-\t\t__dynamic_end = .;\n+\t.opal_table : {\n+\t\t__opal_table_start = .;\n+\t\tKEEP(*(.opal_table))\n+\t\t__opal_table_end = .;\n \t}\n+\t__rodata_end = .;\n \n-\t. = ALIGN(0x10);\n-\t.rela.dyn : {\n-\t\t__rela_dyn_start = .;\n-\t\t*(.rela*)\n-\t\t__rela_dyn_end = .;\n-\t}\n-\t.plt    : { *(.plt) *(.iplt) }\n+\t. = ALIGN(0x1000);\n \n \t.hash          : { *(.hash)   }\n \t.gnu.hash      : { *(.gnu.hash) }\n@@ -171,7 +169,6 @@ SECTIONS\n \t.gnu.version_d : { *(.gnu.version_d) }\n \t.gnu.version_r : { *(.gnu.version_r) }\n \n-\t. = ALIGN(0x10);\n \t.sym_map : {\n \t\t__sym_map_start = . ;\n \t\tKEEP(*(.sym_map))\n@@ -184,6 +181,9 @@ SECTIONS\n \t */\n \t_romem_end = .;\n \n+\t. = ALIGN(0x1000);\n+\n+\t_sdata = .;\n \t.data : {\n \t\t/*\n \t\t * A couple of things that need to be 4K aligned and\n@@ -200,6 +200,10 @@ SECTIONS\n \t\t*(.toc1)\n \t\t*(.branch_lt)\n \t}\n+\t.plt    : { *(.plt) *(.iplt) }\n+\t_edata = .;\n+\n+\t. = ALIGN(0x1000);\n \n \t/* We locate the BSS at 4M to leave room for the symbol map */\n \t. = 0x400000;\n",
    "prefixes": [
        "RFC",
        "2/3"
    ]
}