diff mbox series

[RFC,v2,2/8] Add ultravisor support in OPAL

Message ID 20190920135823.471-3-grimm@linux.ibm.com
State RFC
Headers show
Series PEF support in skiboot | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (470ffb5f29d741c3bed600f7bb7bf0cbb270e05a)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Ryan Grimm Sept. 20, 2019, 1:58 p.m. UTC
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>

Ultravisor is the firmware which runs in the new privelege mode called
ultravisor mode, which was introduced in Power 9. Ultravisor enables
running of secure virtual machines on the host.

Protected execution facility in Power 9 uses special memory areas
designated as secure memory, which can be accessed only in the ultravisor
mode. This protection is provided by the hardware. These designated memory
areas are used by the guests running as secure virtual machines.

The secure memory ranges are provided by the hostboot through HDATA. Get
secure memory ranges from HDATA and add to device tree for ultravisor
firmware.

Ultravisor firmware is present as a lid file or as 'UVISOR' partition.
Use flash resource load helper to load ultravisor firmware into secure
memory area pointed by the hdata.

The ultravisor image after is start on each CPU after being loaded from
the flash/fsp.  It is copied to secure memory and run.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
[ grimm: Add init_uv comments, logging, and logic cleanups ]
[ grimm: Increase UV image max size to 2MB ]
[ grimm: Redfine the OPAL UV shared data structure ]
[ grimm: Remove Hostboot regions from secure range 0 ]
[ grimm: SPDX licensing ]
[ grimm: clean up allocation and freeing ]
[ grimm: DT bindings fixups ]
[ grimm: Define load identifiers for ultra.lid.xz ]
[ grimm: Improve error logging ]
[ grimm: Fix secure-memory-ranges for multiple ranges ]
[ grimm: hdata: Dont ignore range if SMF is enbaled ]
[ grimm: use cleanup_addr on secure mem ranges ]
[ grimm: ret code checks, various cleanups for BML ]
Signed-off-by: Ryan Grimm <grimm@linux.ibm.com>
[ andmike: Split init and start of ultravisor ]
Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
---
 asm/head.S               |  22 ++
 core/flash.c             |   1 +
 core/init.c              |  11 +
 hdata/memory.c           |  17 +-
 hw/Makefile.inc          |   1 +
 hw/fsp/fsp.c             |   2 +
 hw/ultravisor.c          | 487 +++++++++++++++++++++++++++++++++++++++
 include/platform.h       |   1 +
 include/processor.h      |  12 +
 include/ultravisor-api.h |  18 ++
 include/ultravisor.h     |  27 +++
 11 files changed, 597 insertions(+), 2 deletions(-)
 create mode 100644 hw/ultravisor.c
 create mode 100644 include/ultravisor-api.h
 create mode 100644 include/ultravisor.h

Comments

Oliver O'Halloran Nov. 17, 2019, 11:49 p.m. UTC | #1
On Sat, Sep 21, 2019 at 12:00 AM Ryan Grimm <grimm@linux.ibm.com> wrote:
>
> From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
>
> Ultravisor is the firmware which runs in the new privelege mode called
> ultravisor mode, which was introduced in Power 9. Ultravisor enables
> running of secure virtual machines on the host.
>
> Protected execution facility in Power 9 uses special memory areas
> designated as secure memory, which can be accessed only in the ultravisor
> mode. This protection is provided by the hardware. These designated memory
> areas are used by the guests running as secure virtual machines.
>
> The secure memory ranges are provided by the hostboot through HDATA. Get
> secure memory ranges from HDATA and add to device tree for ultravisor
> firmware.
>
> Ultravisor firmware is present as a lid file or as 'UVISOR' partition.
> Use flash resource load helper to load ultravisor firmware into secure
> memory area pointed by the hdata.
>
> The ultravisor image after is start on each CPU after being loaded from
> the flash/fsp.  It is copied to secure memory and run.
>
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Santosh Sivaraj <santosh@fossix.org>
> [ grimm: Add init_uv comments, logging, and logic cleanups ]
> [ grimm: Increase UV image max size to 2MB ]
> [ grimm: Redfine the OPAL UV shared data structure ]
> [ grimm: Remove Hostboot regions from secure range 0 ]
> [ grimm: SPDX licensing ]
> [ grimm: clean up allocation and freeing ]
> [ grimm: DT bindings fixups ]
> [ grimm: Define load identifiers for ultra.lid.xz ]
> [ grimm: Improve error logging ]
> [ grimm: Fix secure-memory-ranges for multiple ranges ]
> [ grimm: hdata: Dont ignore range if SMF is enbaled ]
> [ grimm: use cleanup_addr on secure mem ranges ]
> [ grimm: ret code checks, various cleanups for BML ]
> Signed-off-by: Ryan Grimm <grimm@linux.ibm.com>
> [ andmike: Split init and start of ultravisor ]
> Signed-off-by: Michael Anderson <andmike@linux.ibm.com>
> ---
>  asm/head.S               |  22 ++
>  core/flash.c             |   1 +
>  core/init.c              |  11 +
>  hdata/memory.c           |  17 +-
>  hw/Makefile.inc          |   1 +
>  hw/fsp/fsp.c             |   2 +
>  hw/ultravisor.c          | 487 +++++++++++++++++++++++++++++++++++++++
>  include/platform.h       |   1 +
>  include/processor.h      |  12 +
>  include/ultravisor-api.h |  18 ++
>  include/ultravisor.h     |  27 +++
>  11 files changed, 597 insertions(+), 2 deletions(-)
>  create mode 100644 hw/ultravisor.c
>  create mode 100644 include/ultravisor-api.h
>  create mode 100644 include/ultravisor.h
>
> diff --git a/asm/head.S b/asm/head.S
> index e78dc520..18ce3044 100644
> --- a/asm/head.S
> +++ b/asm/head.S
> @@ -1065,3 +1065,25 @@ start_kernel_secondary:
>         mtctr   %r3
>         mfspr   %r3,SPR_PIR
>         bctr
> +
> +.global start_uv
> +start_uv:
> +       mflr    %r0
> +       std     %r0,16(%r1)
> +       sync
> +       icbi    0,%r3
> +       sync
> +       isync
> +       mtctr   %r3
> +       mr      %r3,%r4
> +       LOAD_IMM64(%r8,SKIBOOT_BASE);
> +       LOAD_IMM32(%r10, opal_entry - __head)
> +       add     %r9,%r8,%r10
> +       LOAD_IMM32(%r6, EPAPR_MAGIC)
> +       addi    %r7,%r5,1
> +       li      %r4,0
> +       li      %r5,0
> +       bctrl
> +       ld      %r0,16(%r1)
> +       mtlr    %r0
> +       blr
> diff --git a/core/flash.c b/core/flash.c
> index 203b695d..a9c25486 100644
> --- a/core/flash.c
> +++ b/core/flash.c
> @@ -42,6 +42,7 @@ static struct {
>         { RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,            "ROOTFS" },
>         { RESOURCE_ID_CAPP,     RESOURCE_SUBID_SUPPORTED,       "CAPP" },
>         { RESOURCE_ID_IMA_CATALOG,  RESOURCE_SUBID_SUPPORTED,   "IMA_CATALOG" },
> +       { RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,            "UVISOR" },
>         { RESOURCE_ID_VERSION,  RESOURCE_SUBID_NONE,            "VERSION" },
>         { RESOURCE_ID_KERNEL_FW,        RESOURCE_SUBID_NONE,            "BOOTKERNFW" },
>  };
> diff --git a/core/init.c b/core/init.c
> index 25d827f2..ca83df7e 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -44,6 +44,7 @@
>  #include <sbe-p9.h>
>  #include <debug_descriptor.h>
>  #include <occ.h>
> +#include <ultravisor.h>
>
>  enum proc_gen proc_gen;
>  unsigned int pcie_max_link_speed;
> @@ -1203,6 +1204,11 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>         pci_nvram_init();
>
>         preload_capp_ucode();
> +
> +       /* preload and decompress ultravisor image */
> +       uv_preload_image();
> +       uv_decompress_image();
> +
>         start_preload_kernel();
>
>         /* Catalog decompression routine */
> @@ -1258,6 +1264,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>         /* Add the list of interrupts going to OPAL */
>         add_opal_interrupts();
>
> +       /* Init uiltravisor software */
ultravisor
> +       init_uv();
> +
>         /* Now release parts of memory nodes we haven't used ourselves... */
>         mem_region_release_unused();
>
> @@ -1275,6 +1284,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>
>         checksum_romem();
>
> +       start_ultravisor();
> +
>         load_and_boot_kernel(false);
>  }
>
> diff --git a/hdata/memory.c b/hdata/memory.c
> index 9af7ae71..25b8088d 100644
> --- a/hdata/memory.c
> +++ b/hdata/memory.c
> @@ -10,6 +10,7 @@
>  #include <types.h>
>  #include <inttypes.h>
>  #include <processor.h>
> +#include <ultravisor.h>
>
>  #include "spira.h"
>  #include "hdata.h"
> @@ -59,6 +60,8 @@ struct HDIF_ms_area_address_range {
>  #define MS_CONTROLLER_MCS_ID(id)       GETFIELD(PPC_BITMASK32(4, 7), id)
>  #define MS_CONTROLLER_MCA_ID(id)       GETFIELD(PPC_BITMASK32(8, 15), id)
>
> +#define MS_ATTR_SMF                    (PPC_BIT32(23))
> +
>  struct HDIF_ms_area_id {
>         __be16 id;
>  #define MS_PTYPE_RISER_CARD    0x8000
> @@ -163,6 +166,16 @@ static bool add_address_range(struct dt_node *root,
>                 return false;
>         }
>
> +       if (arange->mirror_attr & MS_ATTR_SMF) {
> +               prlog(PR_DEBUG, "Found secure memory");
> +               if (!uv_add_mem_range(reg[0], cleanup_addr(be64_to_cpu(arange->end)))) {
> +                       prlog(PR_INFO, "Failed to add secure memory range to DT\n");
> +                       mem_reserve_fw(name, reg[0], reg[1]);

The reservation facility is there to allow marking bits of otherwise
normal memory as "special" so that skiboot and the kernel won't
allocate over them and trash their contents. The secure memory ranges
are completely disjoint from normal memory by design so IMO they
should be top-level nodes, similar to the normal memory@<addr> nodes.
There's a lot of code in this patch that exists purely to work around
the descision to abuse reserved memory this way, so it should simplify
the skiboot changes too.

> +                       return false;
> +               } else
> +                       return true;
> +       }
> +
>         if (be16_to_cpu(id->flags) & MS_AREA_SHARED) {
>                 mem = dt_find_by_name_addr(dt_root, name, reg[0]);
>                 if (mem) {
> @@ -676,9 +689,9 @@ static void get_hb_reserved_mem(struct HDIF_common_hdr *ms_vpd)
>
>                 /*
>                  * Workaround broken HDAT reserve regions which are
> -                * bigger than 512MBget_hb_reserved_mem
> +                * bigger than 512MB and not secure memory
>                  */
> -               if ((end_addr - start_addr) > 0x20000000) {
> +               if (((end_addr - start_addr) > 0x20000000) && !(start_addr & UV_SECURE_MEM_BIT)) {
>                         prlog(PR_ERR, "MEM: Ignoring Bad HDAT reserve: too big\n");
>                         continue;
>                 }
> diff --git a/hw/Makefile.inc b/hw/Makefile.inc
> index b708bdfe..848898b9 100644
> --- a/hw/Makefile.inc
> +++ b/hw/Makefile.inc
> @@ -9,6 +9,7 @@ HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
>  HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o
>  HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o
>  HW_OBJS += npu-opal.o npu3.o npu3-nvlink.o npu3-hw-procedures.o
> +HW_OBJS += ultravisor.o
>  HW=hw/built-in.a
>
>  include $(SRC)/hw/fsp/Makefile.inc
> diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
> index 6fa6534f..829e56f4 100644
> --- a/hw/fsp/fsp.c
> +++ b/hw/fsp/fsp.c
> @@ -114,6 +114,7 @@ static u64 fsp_hir_timeout;
>  #define KERNEL_LID_PHYP                        0x80a00701
>  #define KERNEL_LID_OPAL                        0x80f00101
>  #define INITRAMFS_LID_OPAL             0x80f00102
> +#define ULTRA_LID_OPAL                 0x80f00105
>
>  /*
>   * We keep track on last logged values for some things to print only on
> @@ -2375,6 +2376,7 @@ static struct {
>  } fsp_lid_map[] = {
>         { RESOURCE_ID_KERNEL,   RESOURCE_SUBID_NONE,    KERNEL_LID_OPAL },
>         { RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,    INITRAMFS_LID_OPAL },
> +       { RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,    ULTRA_LID_OPAL },
>         { RESOURCE_ID_IMA_CATALOG,IMA_CATALOG_NIMBUS,   0x80f00103 },
>         { RESOURCE_ID_CAPP,     CAPP_IDX_MURANO_DD20,   0x80a02002 },
>         { RESOURCE_ID_CAPP,     CAPP_IDX_MURANO_DD21,   0x80a02001 },
> diff --git a/hw/ultravisor.c b/hw/ultravisor.c
> new file mode 100644
> index 00000000..8e3cceb4
> --- /dev/null
> +++ b/hw/ultravisor.c
> @@ -0,0 +1,487 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/* Copyright 2018-2019 IBM Corp. */
> +
> +#include <skiboot.h>
> +#include <xscom.h>
> +#include <chip.h>
> +#include <device.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <ultravisor.h>
> +#include <mem_region.h>
> +#include <ultravisor-api.h>
> +#include <libfdt/libfdt.h>
> +
> +static char *uv_image = NULL;
> +static size_t uv_image_size;
> +struct xz_decompress *uv_xz = NULL;
> +static struct uv_opal *uv_opal;
> +
> +static struct dt_node *add_uv_dt_node(void)
> +{
> +       struct dt_node *dev, *uv;
> +
> +       dev = dt_new_check(dt_root, "ibm,ultravisor");
> +       if (!dev)
> +               return NULL;
> +
> +       dt_add_property_string(dev, "compatible", "ibm,ultravisor");
> +       uv = dt_new_check(dev, "firmware");
> +       if (!uv) {
> +               dt_free(dev);
> +               return NULL;
> +       }
> +
> +       dt_add_property_string(uv, "compatible", "firmware");

"firmware" ?

> +       return dev;
> +}
> +
> +static struct dt_node *find_uv_node(void)
> +{
> +       struct dt_node *uv_node, *dt;
> +
> +       uv_node = dt_find_compatible_node(dt_root, NULL, "ibm,uv-firmware");
> +       if (!uv_node) {
> +               prlog(PR_DEBUG, "ibm,uv-firmware compatible node not found, creating");
> +               dt = add_uv_dt_node();
> +               if (!dt)
> +                       return NULL;
> +               uv_node = dt_find_compatible_node(dt_root, NULL, "ibm,uv-firmware");
> +       }
> +
> +       return uv_node;
> +}
> +
> +static bool find_secure_mem_to_copy(uint64_t *target, uint64_t *sz)
> +{
> +       struct dt_node *uv_node = find_uv_node();
> +       const struct dt_property *ranges;
> +       uint64_t uv_pef_reg;
> +       uint64_t *range, sm_size, img_size = UV_LOAD_MAX_SIZE;
> +
> +       /*
> +        * "uv-secure-memory" property could have multiple
> +        * secure memory blocks. Pick first to load
> +        * ultravisor in it.
> +        */
> +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> +       if (!ranges)
> +               return false;
> +
> +       range = (void *)ranges->prop;
> +       do {
> +               uv_pef_reg = dt_get_number(range, 2);
> +               if (!uv_pef_reg)
> +                       return false;
> +
> +               sm_size = dt_get_number(range + 1, 2);
> +               if (sm_size > img_size)
> +                       break;
> +               range += 2;
> +       } while (range);
> +
> +       *target = uv_pef_reg;
> +       *sz = sm_size;
> +       return true;
> +}
> +
> +static uint64_t find_uv_fw_base_addr(struct dt_node *uv_node)
> +{
> +       uint64_t base_addr = 0;
> +
> +       if (dt_has_node_property(uv_node, "reg", NULL))
> +               base_addr = dt_prop_get_u64(uv_node, "reg");
> +
> +       return base_addr;
> +}
> +
> +static void reserve_secure_memory_region(void)
> +{
> +       struct dt_node *uv_node = find_uv_node();
> +       const struct dt_property *ranges;
> +       uint64_t *range, *rangesp, sm_size, addr;
> +       char buf[128];
> +       int i=0;
> +
> +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> +       if (!ranges)
> +               return;
> +
> +       for (rangesp = (uint64_t *)(ranges->prop + ranges->len),
> +                       range = (uint64_t *)ranges->prop;
> +                       range < rangesp;
> +                       range += 2) {
> +               addr = dt_get_number(range, 2);
> +               if (!addr)
> +                       break;
> +
> +               sm_size = dt_get_number(range + 1, 2);
> +               if (!sm_size)
> +                       break;
> +
> +               /* Remove Hostboot regions from secure memory 0 so we don't abort
> +                * on overlapping regions */
> +               if (i == 0) {
> +                       prlog(PR_INFO, "Secure region 0, removing HB region\n");
> +                       /* TODO: Check with Hostboot for memory map */
> +                       sm_size = sm_size - UV_HB_RESERVE_SIZE;
> +               }
> +
> +               snprintf(buf, 128, "ibm,secure-region-%d",i++);
> +               mem_reserve_fw(strdup(buf), addr, sm_size);
> +       }
> +
> +       return;
> +}
> +
> +static void reserve_uv_memory(struct uv_opal *uv_opal)
> +{
> +       if (uv_opal->uv_base_addr == UV_LOAD_BASE) {
> +               mem_reserve_fw("ibm,uv-code", UV_LOAD_BASE, UV_LOAD_MAX_SIZE);
> +       } else {
> +               reserve_secure_memory_region();
> +       }
Ditch the braces. That said, this appears to only be required because
of the descision to put the secure memory into reservations. If you
fix that this function can probably go away entirely.

> +}
> +
> +static void cpu_start_ultravisor(void *data)
> +{
> +       struct uv_opal *ptr = (struct uv_opal *)data;
> +       start_uv(ptr->uv_base_addr, ptr);
> +}
> +
> +int start_ultravisor(void)
> +{
> +       struct cpu_thread *cpu;
> +       struct cpu_job **jobs;
> +       int i=0;
> +
> +       prlog(PR_NOTICE, "UV: Starting Ultravisor at 0x%llx sys_fdt 0x%llx uv_fdt 0x%0llx\n",
> +                               uv_opal->uv_base_addr, uv_opal->sys_fdt, uv_opal->uv_fdt);
> +
> +       /* Alloc memory for Jobs */
> +       jobs = zalloc(sizeof(struct cpu_job*) * cpu_max_pir);
> +
> +       for_each_available_cpu(cpu) {
> +               if (cpu == this_cpu())
> +                       continue;
> +               jobs[i++] = cpu_queue_job(cpu, "start_ultravisor",
> +                                       cpu_start_ultravisor, (void *)uv_opal);
> +       }
> +
> +       cpu_start_ultravisor((void *)uv_opal);
> +
> +       /* wait for everyone to sync back */
> +       while (i > 0) {
> +               cpu_wait_job(jobs[--i], true);
> +       }
ditch the braces

> +
> +       /* free used stuff */
> +       free(jobs);
> +
> +       /* Check everything is fine */
> +       if (uv_opal->uv_ret_code) {
> +               return OPAL_HARDWARE;
> +       }
ditch the braces

> +
> +       return OPAL_SUCCESS;
> +}
> +
> +static int create_dtb_uv(void *uv_fdt)
> +{
> +       if (fdt_create(uv_fdt, UV_FDT_MAX_SIZE)) {
> +               prerror("UV: Failed to create uv_fdt\n");
> +               return 1;
> +       }
> +
> +       fdt_finish_reservemap(uv_fdt);
> +       fdt_begin_node(uv_fdt, "");
> +       fdt_property_string(uv_fdt, "description", "Ultravisor fdt");
> +       fdt_begin_node(uv_fdt, "ibm,uv-fdt");
> +       fdt_property_string(uv_fdt, "compatible", "ibm,uv-fdt");
> +       fdt_end_node(uv_fdt);
> +       fdt_end_node(uv_fdt);
> +       fdt_finish(uv_fdt);
> +
> +       return OPAL_SUCCESS;
> +}

> +static void free_uv(void)
> +{
> +       struct mem_region *region = find_mem_region("ibm,firmware-allocs-memory@0");
> +
> +       lock(&region->free_list_lock);
> +       mem_free(region, uv_image, __location__);
> +       unlock(&region->free_list_lock);
> +}

ibm,firmware-allocs-memory@0 contains everything allocated by skiboot
on node 0 using local_alloc()

> +static bool alloc_uv(void)
> +{
> +       struct proc_chip *chip = next_chip(NULL);
> +
> +       uv_image_size = MAX_COMPRESSED_UV_IMAGE_SIZE;
> +       if (!(uv_image = local_alloc(chip->id, uv_image_size, uv_image_size)))
> +               return false;
> +       memset(uv_image, 0, uv_image_size);
> +       return true;
> +}
> +
> +/* We could be running on Mambo, Cronus, or Hostboot
> + *
> + * Detect Mambo via chip quirk.  Mambo writes the uncompressed UV images
> + * directly to secure memory and passes secure memory location via device tree.
> + *
> + * Detect Cronus when HB decompress fails.  Cronus writes the uncompressed UV
> + * image to insecure memory and init_uv will copy from insecure to secure.
> + *
> + * Assume HB by waiting for decompress.  UV should have been loaded from FSP
> + * and decompressed earlier via uv_preload_image and uv_decompress_image.  The
> + * secure location of the UV provided by those functions in xz struct. */

> +void init_uv()

call it load_uv() since that's what it actually does.

> +{
> +       struct dt_node *node;
> +       const struct dt_property *base;
> +       uint64_t uv_src_addr, uv_pef_reg, uv_pef_size;
> +       void *uv_fdt;
> +
> +       prlog(PR_DEBUG, "UV: Init starting\n");
> +
> +       if (!is_msr_bit_set(MSR_S)) {
> +               prerror("UV: S bit not set\n");
> +               goto load_error;
that's not an error.

> +       }
> +
> +       if (!(uv_opal = zalloc(sizeof(struct uv_opal)))) {
> +               prerror("UV: Failed to allocate uv_opal\n");
> +               goto load_error;
> +       }
> +
> +
> +       if (!(node = find_uv_node())) {
> +               prerror("UV: Device tree node not found\n");
> +               goto load_error;
> +       }
> +

> +       if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
> +               prlog(PR_INFO, "UV: Mambo simulator detected\n");
> +
> +               if (!find_secure_mem_to_copy(&uv_pef_reg, &uv_pef_size)) {
> +                       prerror("UV: No secure memory configured, exiting\n");
> +                       goto load_error;
> +               }
> +
> +               goto start;
> +       }

Seems like a weird hack.

> +
> +       /* This would be null in case we are on Cronus */
> +       if (!uv_xz) {
> +
> +               prlog(PR_INFO, "UV: Platform load failed, detecting UV image via device tree\n");
> +
> +               if (!find_secure_mem_to_copy(&uv_pef_reg, &uv_pef_size)) {
> +                       prerror("UV: No secure memory configured, exiting\n");
> +                       goto load_error;
> +               }
> +
> +               if (!(uv_src_addr = find_uv_fw_base_addr(node))) {
> +                       prerror("UV: Couldn't find UV base address in device tree\n");
> +                       goto load_error;
> +               }
> +
> +               prlog(PR_INFO, "UV: Copying Ultravisor to protected memory 0x%llx from 0x%llx\n", uv_pef_reg, uv_src_addr);
> +
> +               memcpy((void *)uv_pef_reg, (void *)uv_src_addr, UV_LOAD_MAX_SIZE);
> +
> +               goto start;
> +       }
> +
> +       /* Hostboot path */
> +       wait_xz_decompress(uv_xz);
> +       if (uv_xz->status) {
> +               prerror("UV: Compressed Ultravisor image failed to decompress");
> +               goto load_error;
> +       }
> +
> +       /* the uncompressed location will be the base address of ultravisor
> +        * so fix up if it's already there */
> +       base = dt_find_property(node, "reg");
> +       if (base)
> +               dt_del_property(node, (struct dt_property *)base);
> +
> +       dt_add_property_u64(node, "reg", (uint64_t)uv_xz->dst);
> +
> +       uv_pef_reg = (uint64_t)uv_xz->dst;
> +       uv_pef_size = (uint64_t)uv_xz->dst_size;
> +
> +start:
> +       uv_opal->uv_base_addr = uv_pef_reg;
> +
> +       uv_opal->sys_fdt = (__be64)create_dtb(dt_root, false);
> +       if (!uv_opal->sys_fdt) {
> +               prerror("UV: Failed to create system fdt\n");
> +               goto load_error;
> +       }
> +
> +       uv_fdt = (void *)(uv_pef_reg + UV_LOAD_MAX_SIZE);
> +       if (create_dtb_uv(uv_fdt)) {
> +               prerror("UV: Failed to create uv fdt\n");
> +               goto load_error;
> +       }
> +       uv_opal->uv_fdt = (__be64)uv_fdt;
> +
> +       reserve_uv_memory(uv_opal);
> +
> +load_error:
> +       free_uv();
> +       free(uv_xz);
> +}
> +
> +static bool dt_append_memory_range(struct dt_node *node, __be64 start,
> +                                  __be64 len)
> +{
> +       const struct dt_property *ranges;
> +       size_t size;
> +       u32 *new_ranges;
> +       int i;
> +
> +       /* for Cronus boot the BML script creates secure-memory-ranges
> +        * for Mambo boot the ultra.tcl script create secure-memory ranges
> +        * for HostBoot, skiboot parses HDAT in hdata/memory.c and creates it here */
> +       ranges = dt_find_property(node, "secure-memory-ranges");
> +       if (!ranges) {
> +               prlog(PR_DEBUG, "Creating secure-memory-ranges.\n");
> +               ranges = dt_add_property_cells(node, "secure-memory-ranges",
> +                                              hi32(start), lo32(start),
> +                                              hi32(len), lo32(len));
Endian unsafe, also use dt_add_property_u64s()

> +               return true;
> +       }
> +
> +       prlog(PR_DEBUG, "Adding secure memory range range at 0x%llx of size: 0x%llx\n", start, len);
> +       /* Calculate the total size in bytes of the new property */
> +       size = ranges->len + 16;
> +       new_ranges = (u32 *)malloc(size);
> +       memcpy(new_ranges, ranges->prop, ranges->len);
> +
> +       i = ranges->len / 4;
> +       /* The ranges property will be of type <addr size ...> */
> +       new_ranges[i++] = hi32(start);
> +       new_ranges[i++] = lo32(start);
> +       new_ranges[i++] = hi32(len);
> +       new_ranges[i] = lo32(len);

Use a u64 array?

> +       /* Update our node with the new set of ranges */
> +       dt_del_property(node, (struct dt_property *)ranges);
> +       dt_add_property(node, "secure-memory-ranges", (void *)new_ranges, size);
> +
> +       return true;
> +}
> +
> +/*
> + * This code returns false on invalid memory ranges and in no-secure mode.
> + * It is the caller's responsibility of moving the memory to appropriate
> + * reserved areas.
> + */
> +bool uv_add_mem_range(__be64 start, __be64 end)
> +{
> +       struct dt_node *uv_node;
> +       bool ret = false;
> +
> +       if (!is_msr_bit_set(MSR_S))
> +               return ret;
> +
> +       /* Check if address range is secure */
> +       if (!((start & UV_SECURE_MEM_BIT) && (end & UV_SECURE_MEM_BIT))) {
> +               prlog(PR_DEBUG, "Invalid secure address range.\n");
> +               return ret;
> +       }
> +
> +       uv_node = find_uv_node();
> +       if (!uv_node) {
> +               prlog(PR_ERR, "Could not create uv node\n");
> +               return false;
> +       }
> +
> +       ret = dt_append_memory_range(uv_node, start, end - start + 1);
> +
> +       if (ret)
> +               prlog(PR_NOTICE, "Secure memory range added [0x%016llx..0x%015llx]\n", start, end);
> +
> +       return ret;
> +}
> +
> +/*
> + * Preload the UV image from PNOR partition
> + */
> +void uv_preload_image(void)
> +{
> +       int ret;
> +
> +       prlog(PR_INFO, "UV: Preload starting\n");
> +
> +       if (!alloc_uv()) {
> +               prerror("UV: Memory allocation failed\n");
> +               return;
> +       }
> +
> +       ret = start_preload_resource(RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,
> +                                    uv_image, &uv_image_size);
> +
> +       if (ret != OPAL_SUCCESS) {
> +               prerror("UV: platform load failed: %d\n", ret);
> +       }
> +}
> +
> +/*
> + * Decompress the UV image
> + *
> + * This function modifies the uv_image variable to point to the decompressed
> + * image location.
> + */
> +void uv_decompress_image(void)
> +{
> +       const struct dt_property *ranges;
> +       struct dt_node *uv_node;
> +       uint64_t *range;
> +
> +       if (uv_image == NULL) {
> +               prerror("UV: Preload hasn't started yet! Aborting.\n");
> +               return;
> +       }
> +
> +       if (wait_for_resource_loaded(RESOURCE_ID_UV_IMAGE,
> +                                    RESOURCE_SUBID_NONE) != OPAL_SUCCESS) {
> +               prerror("UV: Ultravisor image load failed\n");
> +               return;
> +       }
> +
> +       uv_node = dt_find_by_name(dt_root, "ibm,uv-firmware");
> +       if (!uv_node) {
> +               prerror("UV: Cannot find ibm,uv-firmware node\n");
> +               return;
> +       }
> +
> +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> +       if (!ranges) {
> +               prerror("UV: Cannot find secure-memory-ranges");
> +               return;
> +       }
> +
> +       uv_xz = malloc(sizeof(struct xz_decompress));
> +       if (!uv_xz) {
> +               prerror("UV: Cannot allocate memory for decompression of UV\n");
> +               return;
> +       }
> +
> +       /* the load area is the first secure memory range */
> +       range = (void *)ranges->prop;
> +       uv_xz->dst = (void *)dt_get_number(range, 2);
> +       uv_xz->dst_size = dt_get_number(range + 1, 2);
> +       uv_xz->src = uv_image;
> +       uv_xz->src_size = uv_image_size;
> +
> +       /* TODO security and integrity checks? */
> +       xz_start_decompress(uv_xz);
> +       if ((uv_xz->status != OPAL_PARTIAL) && (uv_xz->status != OPAL_SUCCESS))
> +               prerror("UV: XZ decompression failed status 0x%x\n", uv_xz->status);
> +}
> diff --git a/include/platform.h b/include/platform.h
> index 0b043856..259550d4 100644
> --- a/include/platform.h
> +++ b/include/platform.h
> @@ -17,6 +17,7 @@ enum resource_id {
>         RESOURCE_ID_INITRAMFS,
>         RESOURCE_ID_CAPP,
>         RESOURCE_ID_IMA_CATALOG,
> +       RESOURCE_ID_UV_IMAGE,
>         RESOURCE_ID_VERSION,
>         RESOURCE_ID_KERNEL_FW,
>  };
> diff --git a/include/processor.h b/include/processor.h
> index 352fd1ec..0a552998 100644
> --- a/include/processor.h
> +++ b/include/processor.h
> @@ -11,6 +11,7 @@
>  #define MSR_HV         PPC_BIT(3)      /* Hypervisor mode */
>  #define MSR_VEC                PPC_BIT(38)     /* VMX enable */
>  #define MSR_VSX                PPC_BIT(40)     /* VSX enable */
> +#define MSR_S          PPC_BIT(41)     /* Secure Mode enable */
>  #define MSR_EE         PPC_BIT(48)     /* External Int. Enable */
>  #define MSR_PR         PPC_BIT(49)             /* Problem state */
>  #define MSR_FP         PPC_BIT(50)     /* Floating Point Enable */
> @@ -368,6 +369,17 @@ static inline void st_le32(uint32_t *addr, uint32_t val)
>         asm volatile("stwbrx %0,0,%1" : : "r"(val), "r"(addr), "m"(*addr));
>  }
>

> +/*
> + * MSR bit check
> + */
> +static inline bool is_msr_bit_set(uint64_t bit)
> +{
> +       if (mfmsr() & bit)
> +               return true;
> +
> +       return false;
> +}
> +
>  #endif /* __TEST__ */

I'm going to take a stab in the dark and say this is going to break
all of our unit tests.

>  #endif /* __ASSEMBLY__ */
> diff --git a/include/ultravisor-api.h b/include/ultravisor-api.h
> new file mode 100644
> index 00000000..8a99b7c4
> --- /dev/null
> +++ b/include/ultravisor-api.h
> @@ -0,0 +1,18 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/* Copyright 2018-2019 IBM Corp. */
> +
> +#ifndef __ULTRAVISOR_API_H
> +#define __ULTRAVISOR_API_H
> +
> +struct uv_opal {
> +       __be32 magic;           /**< 'OPUV' 0x4F505556 OPUV_MAGIC */
> +       __be32 version;         /**< uv_opal struct version */
> +       __be32 uv_ret_code;     /**< 0 - Success, <0> : error. */
> +       __be32 uv_api_ver;      /**< Current uv api version. */
> +       __be64 uv_base_addr;    /**< Base address of UV in secure memory. */
> +       __be64 sys_fdt;         /**< System FDT. */
> +       __be64 uv_fdt;          /**< UV FDT in secure memory. */
> +       __be64 uv_mem;          /**< struct memcons */
> +};
> +
> +#endif /* __ULTRAVISOR_API_H */
> diff --git a/include/ultravisor.h b/include/ultravisor.h
> new file mode 100644
> index 00000000..b49121ce
> --- /dev/null
> +++ b/include/ultravisor.h
> @@ -0,0 +1,27 @@
> +// SPDX-License-Identifier: Apache-2.0
> +/* Copyright 2018-2019 IBM Corp. */
> +
> +#ifndef __ULTRAVISOR_H
> +#define __ULTRAVISOR_H
> +
> +#include <ultravisor-api.h>
> +
> +/* Bit 15 of an address should be set for it to be used as a secure memory area
> + * for the secure virtual machines */
> +#define UV_SECURE_MEM_BIT              (PPC_BIT(15))
> +#define MAX_COMPRESSED_UV_IMAGE_SIZE 0x40000 /* 256 Kilobytes */
> +#define UV_ACCESS_BIT          0x1ULL << 48
> +/* Address at which the Ultravisor is loaded for BML and Mambo */
> +#define UV_LOAD_BASE           0xC0000000
> +#define UV_LOAD_MAX_SIZE       0x200000
> +#define UV_FDT_MAX_SIZE                0x100000
> +#define UV_HB_RESERVE_SIZE     0x4000000;
> +
> +extern int start_uv(uint64_t entry, struct uv_opal *uv_opal);
> +extern bool uv_add_mem_range(__be64 start, __be64 end);
> +extern void uv_preload_image(void);
> +extern void uv_decompress_image(void);
> +extern void init_uv(void);
> +extern int start_ultravisor(void);
> +
> +#endif /* __ULTRAVISOR_H */
> --
> 2.21.0
>
> _______________________________________________
> Skiboot mailing list
> Skiboot@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/skiboot
Ryan Grimm Nov. 20, 2019, 9:49 p.m. UTC | #2
Oliver,

On Mon, 2019-11-18 at 10:49 +1100, Oliver O'Halloran wrote:
<snip>
> > diff --git a/hdata/memory.c b/hdata/memory.c
> > index 9af7ae71..25b8088d 100644
> > --- a/hdata/memory.c
> > +++ b/hdata/memory.c
> > @@ -10,6 +10,7 @@
> >  #include <types.h>
> >  #include <inttypes.h>
> >  #include <processor.h>
> > +#include <ultravisor.h>
> > 
> >  #include "spira.h"
> >  #include "hdata.h"
> > @@ -59,6 +60,8 @@ struct HDIF_ms_area_address_range {
> >  #define MS_CONTROLLER_MCS_ID(id)       GETFIELD(PPC_BITMASK32(4,
> > 7), id)
> >  #define MS_CONTROLLER_MCA_ID(id)       GETFIELD(PPC_BITMASK32(8,
> > 15), id)
> > 
> > +#define MS_ATTR_SMF                    (PPC_BIT32(23))
> > +
> >  struct HDIF_ms_area_id {
> >         __be16 id;
> >  #define MS_PTYPE_RISER_CARD    0x8000
> > @@ -163,6 +166,16 @@ static bool add_address_range(struct dt_node
> > *root,
> >                 return false;
> >         }
> > 
> > +       if (arange->mirror_attr & MS_ATTR_SMF) {
> > +               prlog(PR_DEBUG, "Found secure memory");
> > +               if (!uv_add_mem_range(reg[0],
> > cleanup_addr(be64_to_cpu(arange->end)))) {
> > +                       prlog(PR_INFO, "Failed to add secure memory
> > range to DT\n");
> > +                       mem_reserve_fw(name, reg[0], reg[1]);
> 
> The reservation facility is there to allow marking bits of otherwise
> normal memory as "special" so that skiboot and the kernel won't
> allocate over them and trash their contents. The secure memory ranges
> are completely disjoint from normal memory by design so IMO they
> should be top-level nodes, similar to the normal memory@<addr> nodes.
> There's a lot of code in this patch that exists purely to work around
> the descision to abuse reserved memory this way, so it should
> simplify
> the skiboot changes too.
> 

OK, we have been using the reservation system improperly.  And, yeah,
we do have little pieces of code here and there to fix things up, which
are sure to be fragile.

How about we have something like this in the doc, and use device_type
"secure_memory" so we don't have the kernel try to use it as regular
memory:

Skiboot parses secure memory from the HDAT tables and creates the
secure-memory device tree node, similar to a memory@ node except the
device_type is "secure_memory". For example:

.. code-block:: dts

        secure-memory@100fe00000000 {
                device_type = "secure_memory";
                ibm,chip-id = <0>;
                reg = < 0x100fe 0x0 0x2 0x0>;
        }

Regions of secure memory will be reserved by hostboot such as OCC,
HOMER, and SBE.  Skiboot will use the existing reserve infrastructure
to reserve them.
For example:

.. code-block::

        ibm,HCODE@100fffcaf0000
        ibm,OCC@100fffcdd0000
        ibm,RINGOVD@100fffcae0000
        ibm,WOFDATA@100fffcb90000
        ibm,arch-reg-data@100fffd700000
        ibm,hbrt-code-image@100fffcec0000
        ibm,hbrt-data@100fffd420000
        ibm,homer-image@100fffd800000
        ibm,homer-image@100fffdc00000
        ibm,occ-common-area@100ffff800000
        ibm,sbe-comm@100fffce90000
        ibm,sbe-comm@100fffceb0000
        ibm,sbe-ffdc@100fffce80000
        ibm,sbe-ffdc@100fffcea0000
        ibm,secure-crypt-algo-code@100fffce70000
        ibm,uvbwlist@100fffcad0000

For Mambo, ultra.tcl creates the secure-memory device tree node and is
currently defined at 8GB with size 8GB.  Mambo has no protection on
securememory, so a watchpoint could be used to ensure Skiboot does not
touch secure memory.

For BML, the BML script parses secure memory from the Cronus config
file and creates the secure-memory device tree node.


> > +                       return false;
> > +               } else
> > +                       return true;
> > +       }
> > +
> >         if (be16_to_cpu(id->flags) & MS_AREA_SHARED) {
> >                 mem = dt_find_by_name_addr(dt_root, name, reg[0]);
> >                 if (mem) {
> > @@ -676,9 +689,9 @@ static void get_hb_reserved_mem(struct
> > HDIF_common_hdr *ms_vpd)
> > 
> >                 /*
> >                  * Workaround broken HDAT reserve regions which are
> > -                * bigger than 512MBget_hb_reserved_mem
> > +                * bigger than 512MB and not secure memory
> >                  */
> > -               if ((end_addr - start_addr) > 0x20000000) {
> > +               if (((end_addr - start_addr) > 0x20000000) &&
> > !(start_addr & UV_SECURE_MEM_BIT)) {
> >                         prlog(PR_ERR, "MEM: Ignoring Bad HDAT
> > reserve: too big\n");
> >                         continue;
> >                 }
> > diff --git a/hw/Makefile.inc b/hw/Makefile.inc
> > index b708bdfe..848898b9 100644
> > --- a/hw/Makefile.inc
> > +++ b/hw/Makefile.inc
> > @@ -9,6 +9,7 @@ HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-
> > procedures.o
> >  HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o
> > capp.o
> >  HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o
> > cache-p9.o
> >  HW_OBJS += npu-opal.o npu3.o npu3-nvlink.o npu3-hw-procedures.o
> > +HW_OBJS += ultravisor.o
> >  HW=hw/built-in.a
> > 
> >  include $(SRC)/hw/fsp/Makefile.inc
> > diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
> > index 6fa6534f..829e56f4 100644
> > --- a/hw/fsp/fsp.c
> > +++ b/hw/fsp/fsp.c
> > @@ -114,6 +114,7 @@ static u64 fsp_hir_timeout;
> >  #define KERNEL_LID_PHYP                        0x80a00701
> >  #define KERNEL_LID_OPAL                        0x80f00101
> >  #define INITRAMFS_LID_OPAL             0x80f00102
> > +#define ULTRA_LID_OPAL                 0x80f00105
> > 
> >  /*
> >   * We keep track on last logged values for some things to print
> > only on
> > @@ -2375,6 +2376,7 @@ static struct {
> >  } fsp_lid_map[] = {
> >         {
> > RESOURCE_ID_KERNEL,   RESOURCE_SUBID_NONE,    KERNEL_LID_OPAL },
> >         {
> > RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,    INITRAMFS_LID_OPAL },
> > +       { RESOURCE_ID_UV_IMAGE,
> > RESOURCE_SUBID_NONE,    ULTRA_LID_OPAL },
> >         { RESOURCE_ID_IMA_CATALOG,IMA_CATALOG_NIMBUS,   0x80f00103
> > },
> >         { RESOURCE_ID_CAPP,     CAPP_IDX_MURANO_DD20,   0x80a02002
> > },
> >         { RESOURCE_ID_CAPP,     CAPP_IDX_MURANO_DD21,   0x80a02001
> > },
> > diff --git a/hw/ultravisor.c b/hw/ultravisor.c
> > new file mode 100644
> > index 00000000..8e3cceb4
> > --- /dev/null
> > +++ b/hw/ultravisor.c
> > @@ -0,0 +1,487 @@
> > +// SPDX-License-Identifier: Apache-2.0
> > +/* Copyright 2018-2019 IBM Corp. */
> > +
> > +#include <skiboot.h>
> > +#include <xscom.h>
> > +#include <chip.h>
> > +#include <device.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <inttypes.h>
> > +#include <ultravisor.h>
> > +#include <mem_region.h>
> > +#include <ultravisor-api.h>
> > +#include <libfdt/libfdt.h>
> > +
> > +static char *uv_image = NULL;
> > +static size_t uv_image_size;
> > +struct xz_decompress *uv_xz = NULL;
> > +static struct uv_opal *uv_opal;
> > +
> > +static struct dt_node *add_uv_dt_node(void)
> > +{
> > +       struct dt_node *dev, *uv;
> > +
> > +       dev = dt_new_check(dt_root, "ibm,ultravisor");
> > +       if (!dev)
> > +               return NULL;
> > +
> > +       dt_add_property_string(dev, "compatible",
> > "ibm,ultravisor");
> > +       uv = dt_new_check(dev, "firmware");
> > +       if (!uv) {
> > +               dt_free(dev);
> > +               return NULL;
> > +       }
> > +
> > +       dt_add_property_string(uv, "compatible", "firmware");
> 
> "firmware" ?
> 

Ok, that must be a typo.

> > +       return dev;
> > +}
> > +
> > +static struct dt_node *find_uv_node(void)
> > +{
> > +       struct dt_node *uv_node, *dt;
> > +
> > +       uv_node = dt_find_compatible_node(dt_root, NULL, "ibm,uv-
> > firmware");
> > +       if (!uv_node) {
> > +               prlog(PR_DEBUG, "ibm,uv-firmware compatible node
> > not found, creating");
> > +               dt = add_uv_dt_node();
> > +               if (!dt)
> > +                       return NULL;
> > +               uv_node = dt_find_compatible_node(dt_root, NULL,
> > "ibm,uv-firmware");
> > +       }
> > +
> > +       return uv_node;
> > +}
> > +
> > +static bool find_secure_mem_to_copy(uint64_t *target, uint64_t
> > *sz)
> > +{
> > +       struct dt_node *uv_node = find_uv_node();
> > +       const struct dt_property *ranges;
> > +       uint64_t uv_pef_reg;
> > +       uint64_t *range, sm_size, img_size = UV_LOAD_MAX_SIZE;
> > +
> > +       /*
> > +        * "uv-secure-memory" property could have multiple
> > +        * secure memory blocks. Pick first to load
> > +        * ultravisor in it.
> > +        */
> > +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> > +       if (!ranges)
> > +               return false;
> > +
> > +       range = (void *)ranges->prop;
> > +       do {
> > +               uv_pef_reg = dt_get_number(range, 2);
> > +               if (!uv_pef_reg)
> > +                       return false;
> > +
> > +               sm_size = dt_get_number(range + 1, 2);
> > +               if (sm_size > img_size)
> > +                       break;
> > +               range += 2;
> > +       } while (range);
> > +
> > +       *target = uv_pef_reg;
> > +       *sz = sm_size;
> > +       return true;
> > +}
> > +
> > +static uint64_t find_uv_fw_base_addr(struct dt_node *uv_node)
> > +{
> > +       uint64_t base_addr = 0;
> > +
> > +       if (dt_has_node_property(uv_node, "reg", NULL))
> > +               base_addr = dt_prop_get_u64(uv_node, "reg");
> > +
> > +       return base_addr;
> > +}
> > +
> > +static void reserve_secure_memory_region(void)
> > +{
> > +       struct dt_node *uv_node = find_uv_node();
> > +       const struct dt_property *ranges;
> > +       uint64_t *range, *rangesp, sm_size, addr;
> > +       char buf[128];
> > +       int i=0;
> > +
> > +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> > +       if (!ranges)
> > +               return;
> > +
> > +       for (rangesp = (uint64_t *)(ranges->prop + ranges->len),
> > +                       range = (uint64_t *)ranges->prop;
> > +                       range < rangesp;
> > +                       range += 2) {
> > +               addr = dt_get_number(range, 2);
> > +               if (!addr)
> > +                       break;
> > +
> > +               sm_size = dt_get_number(range + 1, 2);
> > +               if (!sm_size)
> > +                       break;
> > +
> > +               /* Remove Hostboot regions from secure memory 0 so
> > we don't abort
> > +                * on overlapping regions */
> > +               if (i == 0) {
> > +                       prlog(PR_INFO, "Secure region 0, removing
> > HB region\n");
> > +                       /* TODO: Check with Hostboot for memory map
> > */
> > +                       sm_size = sm_size - UV_HB_RESERVE_SIZE;
> > +               }
> > +
> > +               snprintf(buf, 128, "ibm,secure-region-%d",i++);
> > +               mem_reserve_fw(strdup(buf), addr, sm_size);
> > +       }
> > +
> > +       return;
> > +}
> > +
> > +static void reserve_uv_memory(struct uv_opal *uv_opal)
> > +{
> > +       if (uv_opal->uv_base_addr == UV_LOAD_BASE) {
> > +               mem_reserve_fw("ibm,uv-code", UV_LOAD_BASE,
> > UV_LOAD_MAX_SIZE);
> > +       } else {
> > +               reserve_secure_memory_region();
> > +       }
> 
> Ditch the braces. That said, this appears to only be required because
> of the descision to put the secure memory into reservations. If you
> fix that this function can probably go away entirely.
> 

K, yeah, we don't need the reserves either with changes above, so I
think we can get rid of all of this.

> > +}
> > +
> > +static void cpu_start_ultravisor(void *data)
> > +{
> > +       struct uv_opal *ptr = (struct uv_opal *)data;
> > +       start_uv(ptr->uv_base_addr, ptr);
> > +}
> > +
> > +int start_ultravisor(void)
> > +{
> > +       struct cpu_thread *cpu;
> > +       struct cpu_job **jobs;
> > +       int i=0;
> > +
> > +       prlog(PR_NOTICE, "UV: Starting Ultravisor at 0x%llx sys_fdt
> > 0x%llx uv_fdt 0x%0llx\n",
> > +                               uv_opal->uv_base_addr, uv_opal-
> > >sys_fdt, uv_opal->uv_fdt);
> > +
> > +       /* Alloc memory for Jobs */
> > +       jobs = zalloc(sizeof(struct cpu_job*) * cpu_max_pir);
> > +
> > +       for_each_available_cpu(cpu) {
> > +               if (cpu == this_cpu())
> > +                       continue;
> > +               jobs[i++] = cpu_queue_job(cpu, "start_ultravisor",
> > +                                       cpu_start_ultravisor, (void
> > *)uv_opal);
> > +       }
> > +
> > +       cpu_start_ultravisor((void *)uv_opal);
> > +
> > +       /* wait for everyone to sync back */
> > +       while (i > 0) {
> > +               cpu_wait_job(jobs[--i], true);
> > +       }
> 
> ditch the braces
> 

k

> > +
> > +       /* free used stuff */
> > +       free(jobs);
> > +
> > +       /* Check everything is fine */
> > +       if (uv_opal->uv_ret_code) {
> > +               return OPAL_HARDWARE;
> > +       }
> 
> ditch the braces
> 

k

> > +
> > +       return OPAL_SUCCESS;
> > +}
> > +
> > +static int create_dtb_uv(void *uv_fdt)
> > +{
> > +       if (fdt_create(uv_fdt, UV_FDT_MAX_SIZE)) {
> > +               prerror("UV: Failed to create uv_fdt\n");
> > +               return 1;
> > +       }
> > +
> > +       fdt_finish_reservemap(uv_fdt);
> > +       fdt_begin_node(uv_fdt, "");
> > +       fdt_property_string(uv_fdt, "description", "Ultravisor
> > fdt");
> > +       fdt_begin_node(uv_fdt, "ibm,uv-fdt");
> > +       fdt_property_string(uv_fdt, "compatible", "ibm,uv-fdt");
> > +       fdt_end_node(uv_fdt);
> > +       fdt_end_node(uv_fdt);
> > +       fdt_finish(uv_fdt);
> > +
> > +       return OPAL_SUCCESS;
> > +}
> > +static void free_uv(void)
> > +{
> > +       struct mem_region *region = find_mem_region("ibm,
> > firmware-allocs-memory@0");
> > +
> > +       lock(&region->free_list_lock);
> > +       mem_free(region, uv_image, __location__);
> > +       unlock(&region->free_list_lock);
> > +}
> 
> ibm,firmware-allocs-memory@0 contains everything allocated by skiboot
> on node 0 using local_alloc()
> 

OK, I think I wrote that code being unsure about how to do this.  

So, we can use local_alloc, but I'm struggling a little bit to figure
out how to free it.  I looked at examples of local_alloc, but it
doesn't seem any of the code in occ, phb4, or xive free their
allocations.

Is it up to the caller to deal with mem_regions and call mem_free with
the mem_region?

> > +static bool alloc_uv(void)
> > +{
> > +       struct proc_chip *chip = next_chip(NULL);
> > +
> > +       uv_image_size = MAX_COMPRESSED_UV_IMAGE_SIZE;
> > +       if (!(uv_image = local_alloc(chip->id, uv_image_size,
> > uv_image_size)))
> > +               return false;
> > +       memset(uv_image, 0, uv_image_size);
> > +       return true;
> > +}
> > +
> > +/* We could be running on Mambo, Cronus, or Hostboot
> > + *
> > + * Detect Mambo via chip quirk.  Mambo writes the uncompressed UV
> > images
> > + * directly to secure memory and passes secure memory location via
> > device tree.
> > + *
> > + * Detect Cronus when HB decompress fails.  Cronus writes the
> > uncompressed UV
> > + * image to insecure memory and init_uv will copy from insecure to
> > secure.
> > + *
> > + * Assume HB by waiting for decompress.  UV should have been
> > loaded from FSP
> > + * and decompressed earlier via uv_preload_image and
> > uv_decompress_image.  The
> > + * secure location of the UV provided by those functions in xz
> > struct. */
> > +void init_uv()
> 
> call it load_uv() since that's what it actually does.
> 
> > +{
> > +       struct dt_node *node;
> > +       const struct dt_property *base;
> > +       uint64_t uv_src_addr, uv_pef_reg, uv_pef_size;
> > +       void *uv_fdt;
> > +
> > +       prlog(PR_DEBUG, "UV: Init starting\n");
> > +
> > +       if (!is_msr_bit_set(MSR_S)) {
> > +               prerror("UV: S bit not set\n");
> > +               goto load_error;
> 
> that's not an error.
> 

OK, we plan on looking at these paths closely, as we have not tested or
thought about when we're running withour MSR_S.  

But, right, it's not an error.

> > +       }
> > +
> > +       if (!(uv_opal = zalloc(sizeof(struct uv_opal)))) {
> > +               prerror("UV: Failed to allocate uv_opal\n");
> > +               goto load_error;
> > +       }
> > +
> > +
> > +       if (!(node = find_uv_node())) {
> > +               prerror("UV: Device tree node not found\n");
> > +               goto load_error;
> > +       }
> > +
> > +       if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
> > +               prlog(PR_INFO, "UV: Mambo simulator detected\n");
> > +
> > +               if (!find_secure_mem_to_copy(&uv_pef_reg,
> > &uv_pef_size)) {
> > +                       prerror("UV: No secure memory configured,
> > exiting\n");
> > +                       goto load_error;
> > +               }
> > +
> > +               goto start;
> > +       }
> 
> Seems like a weird hack.
> 

Yeah, this is how the code evolved supporting Mambo, Cronus, and HB. 
Do you think we should break each up into its own function?  

> > +
> > +       /* This would be null in case we are on Cronus */
> > +       if (!uv_xz) {
> > +
> > +               prlog(PR_INFO, "UV: Platform load failed, detecting
> > UV image via device tree\n");
> > +
> > +               if (!find_secure_mem_to_copy(&uv_pef_reg,
> > &uv_pef_size)) {
> > +                       prerror("UV: No secure memory configured,
> > exiting\n");
> > +                       goto load_error;
> > +               }
> > +
> > +               if (!(uv_src_addr = find_uv_fw_base_addr(node))) {
> > +                       prerror("UV: Couldn't find UV base address
> > in device tree\n");
> > +                       goto load_error;
> > +               }
> > +
> > +               prlog(PR_INFO, "UV: Copying Ultravisor to protected
> > memory 0x%llx from 0x%llx\n", uv_pef_reg, uv_src_addr);
> > +
> > +               memcpy((void *)uv_pef_reg, (void *)uv_src_addr,
> > UV_LOAD_MAX_SIZE);
> > +
> > +               goto start;
> > +       }
> > +
> > +       /* Hostboot path */
> > +       wait_xz_decompress(uv_xz);
> > +       if (uv_xz->status) {
> > +               prerror("UV: Compressed Ultravisor image failed to
> > decompress");
> > +               goto load_error;
> > +       }
> > +
> > +       /* the uncompressed location will be the base address of
> > ultravisor
> > +        * so fix up if it's already there */
> > +       base = dt_find_property(node, "reg");
> > +       if (base)
> > +               dt_del_property(node, (struct dt_property *)base);
> > +
> > +       dt_add_property_u64(node, "reg", (uint64_t)uv_xz->dst);
> > +
> > +       uv_pef_reg = (uint64_t)uv_xz->dst;
> > +       uv_pef_size = (uint64_t)uv_xz->dst_size;
> > +
> > +start:
> > +       uv_opal->uv_base_addr = uv_pef_reg;
> > +
> > +       uv_opal->sys_fdt = (__be64)create_dtb(dt_root, false);
> > +       if (!uv_opal->sys_fdt) {
> > +               prerror("UV: Failed to create system fdt\n");
> > +               goto load_error;
> > +       }
> > +
> > +       uv_fdt = (void *)(uv_pef_reg + UV_LOAD_MAX_SIZE);
> > +       if (create_dtb_uv(uv_fdt)) {
> > +               prerror("UV: Failed to create uv fdt\n");
> > +               goto load_error;
> > +       }
> > +       uv_opal->uv_fdt = (__be64)uv_fdt;
> > +
> > +       reserve_uv_memory(uv_opal);
> > +
> > +load_error:
> > +       free_uv();
> > +       free(uv_xz);
> > +}
> > +
> > +static bool dt_append_memory_range(struct dt_node *node, __be64
> > start,
> > +                                  __be64 len)
> > +{
> > +       const struct dt_property *ranges;
> > +       size_t size;
> > +       u32 *new_ranges;
> > +       int i;
> > +
> > +       /* for Cronus boot the BML script creates secure-memory-
> > ranges
> > +        * for Mambo boot the ultra.tcl script create secure-memory 
> > ranges
> > +        * for HostBoot, skiboot parses HDAT in hdata/memory.c and
> > creates it here */
> > +       ranges = dt_find_property(node, "secure-memory-ranges");
> > +       if (!ranges) {
> > +               prlog(PR_DEBUG, "Creating secure-memory-
> > ranges.\n");
> > +               ranges = dt_add_property_cells(node, "secure-
> > memory-ranges",
> > +                                              hi32(start),
> > lo32(start),
> > +                                              hi32(len),
> > lo32(len));
> 
> Endian unsafe, also use dt_add_property_u64s()
> 

K

> > +               return true;
> > +       }
> > +
> > +       prlog(PR_DEBUG, "Adding secure memory range range at 0x%llx
> > of size: 0x%llx\n", start, len);
> > +       /* Calculate the total size in bytes of the new property */
> > +       size = ranges->len + 16;
> > +       new_ranges = (u32 *)malloc(size);
> > +       memcpy(new_ranges, ranges->prop, ranges->len);
> > +
> > +       i = ranges->len / 4;
> > +       /* The ranges property will be of type <addr size ...> */
> > +       new_ranges[i++] = hi32(start);
> > +       new_ranges[i++] = lo32(start);
> > +       new_ranges[i++] = hi32(len);
> > +       new_ranges[i] = lo32(len);
> 
> Use a u64 array?
> 
> > +       /* Update our node with the new set of ranges */
> > +       dt_del_property(node, (struct dt_property *)ranges);
> > +       dt_add_property(node, "secure-memory-ranges", (void
> > *)new_ranges, size);
> > +
> > +       return true;
> > +}
> > +
> > +/*
> > + * This code returns false on invalid memory ranges and in no-
> > secure mode.
> > + * It is the caller's responsibility of moving the memory to
> > appropriate
> > + * reserved areas.
> > + */
> > +bool uv_add_mem_range(__be64 start, __be64 end)
> > +{
> > +       struct dt_node *uv_node;
> > +       bool ret = false;
> > +
> > +       if (!is_msr_bit_set(MSR_S))
> > +               return ret;
> > +
> > +       /* Check if address range is secure */
> > +       if (!((start & UV_SECURE_MEM_BIT) && (end &
> > UV_SECURE_MEM_BIT))) {
> > +               prlog(PR_DEBUG, "Invalid secure address range.\n");
> > +               return ret;
> > +       }
> > +
> > +       uv_node = find_uv_node();
> > +       if (!uv_node) {
> > +               prlog(PR_ERR, "Could not create uv node\n");
> > +               return false;
> > +       }
> > +
> > +       ret = dt_append_memory_range(uv_node, start, end - start +
> > 1);
> > +
> > +       if (ret)
> > +               prlog(PR_NOTICE, "Secure memory range added
> > [0x%016llx..0x%015llx]\n", start, end);
> > +
> > +       return ret;
> > +}
> > +
> > +/*
> > + * Preload the UV image from PNOR partition
> > + */
> > +void uv_preload_image(void)
> > +{
> > +       int ret;
> > +
> > +       prlog(PR_INFO, "UV: Preload starting\n");
> > +
> > +       if (!alloc_uv()) {
> > +               prerror("UV: Memory allocation failed\n");
> > +               return;
> > +       }
> > +
> > +       ret = start_preload_resource(RESOURCE_ID_UV_IMAGE,
> > RESOURCE_SUBID_NONE,
> > +                                    uv_image, &uv_image_size);
> > +
> > +       if (ret != OPAL_SUCCESS) {
> > +               prerror("UV: platform load failed: %d\n", ret);
> > +       }
> > +}
> > +
> > +/*
> > + * Decompress the UV image
> > + *
> > + * This function modifies the uv_image variable to point to the
> > decompressed
> > + * image location.
> > + */
> > +void uv_decompress_image(void)
> > +{
> > +       const struct dt_property *ranges;
> > +       struct dt_node *uv_node;
> > +       uint64_t *range;
> > +
> > +       if (uv_image == NULL) {
> > +               prerror("UV: Preload hasn't started yet!
> > Aborting.\n");
> > +               return;
> > +       }
> > +
> > +       if (wait_for_resource_loaded(RESOURCE_ID_UV_IMAGE,
> > +                                    RESOURCE_SUBID_NONE) !=
> > OPAL_SUCCESS) {
> > +               prerror("UV: Ultravisor image load failed\n");
> > +               return;
> > +       }
> > +
> > +       uv_node = dt_find_by_name(dt_root, "ibm,uv-firmware");
> > +       if (!uv_node) {
> > +               prerror("UV: Cannot find ibm,uv-firmware node\n");
> > +               return;
> > +       }
> > +
> > +       ranges = dt_find_property(uv_node, "secure-memory-ranges");
> > +       if (!ranges) {
> > +               prerror("UV: Cannot find secure-memory-ranges");
> > +               return;
> > +       }
> > +
> > +       uv_xz = malloc(sizeof(struct xz_decompress));
> > +       if (!uv_xz) {
> > +               prerror("UV: Cannot allocate memory for
> > decompression of UV\n");
> > +               return;
> > +       }
> > +
> > +       /* the load area is the first secure memory range */
> > +       range = (void *)ranges->prop;
> > +       uv_xz->dst = (void *)dt_get_number(range, 2);
> > +       uv_xz->dst_size = dt_get_number(range + 1, 2);
> > +       uv_xz->src = uv_image;
> > +       uv_xz->src_size = uv_image_size;
> > +
> > +       /* TODO security and integrity checks? */
> > +       xz_start_decompress(uv_xz);
> > +       if ((uv_xz->status != OPAL_PARTIAL) && (uv_xz->status !=
> > OPAL_SUCCESS))
> > +               prerror("UV: XZ decompression failed status
> > 0x%x\n", uv_xz->status);
> > +}
> > diff --git a/include/platform.h b/include/platform.h
> > index 0b043856..259550d4 100644
> > --- a/include/platform.h
> > +++ b/include/platform.h
> > @@ -17,6 +17,7 @@ enum resource_id {
> >         RESOURCE_ID_INITRAMFS,
> >         RESOURCE_ID_CAPP,
> >         RESOURCE_ID_IMA_CATALOG,
> > +       RESOURCE_ID_UV_IMAGE,
> >         RESOURCE_ID_VERSION,
> >         RESOURCE_ID_KERNEL_FW,
> >  };
> > diff --git a/include/processor.h b/include/processor.h
> > index 352fd1ec..0a552998 100644
> > --- a/include/processor.h
> > +++ b/include/processor.h
> > @@ -11,6 +11,7 @@
> >  #define MSR_HV         PPC_BIT(3)      /* Hypervisor mode */
> >  #define MSR_VEC                PPC_BIT(38)     /* VMX enable */
> >  #define MSR_VSX                PPC_BIT(40)     /* VSX enable */
> > +#define MSR_S          PPC_BIT(41)     /* Secure Mode enable */
> >  #define MSR_EE         PPC_BIT(48)     /* External Int. Enable */
> >  #define MSR_PR         PPC_BIT(49)             /* Problem state */
> >  #define MSR_FP         PPC_BIT(50)     /* Floating Point Enable */
> > @@ -368,6 +369,17 @@ static inline void st_le32(uint32_t *addr,
> > uint32_t val)
> >         asm volatile("stwbrx %0,0,%1" : : "r"(val), "r"(addr),
> > "m"(*addr));
> >  }
> > 
> > +/*
> > + * MSR bit check
> > + */
> > +static inline bool is_msr_bit_set(uint64_t bit)
> > +{
> > +       if (mfmsr() & bit)
> > +               return true;
> > +
> > +       return false;
> > +}
> > +
> >  #endif /* __TEST__ */
> 
> I'm going to take a stab in the dark and say this is going to break
> all of our unit tests.
> 

OK, we can start running the unit tests, we haven't been doing that.

> >  #endif /* __ASSEMBLY__ */
> > diff --git a/include/ultravisor-api.h b/include/ultravisor-api.h
> > new file mode 100644
> > index 00000000..8a99b7c4
> > --- /dev/null
> > +++ b/include/ultravisor-api.h
> > @@ -0,0 +1,18 @@
> > +// SPDX-License-Identifier: Apache-2.0
> > +/* Copyright 2018-2019 IBM Corp. */
> > +
> > +#ifndef __ULTRAVISOR_API_H
> > +#define __ULTRAVISOR_API_H
> > +
> > +struct uv_opal {
> > +       __be32 magic;           /**< 'OPUV' 0x4F505556 OPUV_MAGIC
> > */
> > +       __be32 version;         /**< uv_opal struct version */
> > +       __be32 uv_ret_code;     /**< 0 - Success, <0> : error. */
> > +       __be32 uv_api_ver;      /**< Current uv api version. */
> > +       __be64 uv_base_addr;    /**< Base address of UV in secure
> > memory. */
> > +       __be64 sys_fdt;         /**< System FDT. */
> > +       __be64 uv_fdt;          /**< UV FDT in secure memory. */
> > +       __be64 uv_mem;          /**< struct memcons */
> > +};
> > +
> > +#endif /* __ULTRAVISOR_API_H */
> > diff --git a/include/ultravisor.h b/include/ultravisor.h
> > new file mode 100644
> > index 00000000..b49121ce
> > --- /dev/null
> > +++ b/include/ultravisor.h
> > @@ -0,0 +1,27 @@
> > +// SPDX-License-Identifier: Apache-2.0
> > +/* Copyright 2018-2019 IBM Corp. */
> > +
> > +#ifndef __ULTRAVISOR_H
> > +#define __ULTRAVISOR_H
> > +
> > +#include <ultravisor-api.h>
> > +
> > +/* Bit 15 of an address should be set for it to be used as a
> > secure memory area
> > + * for the secure virtual machines */
> > +#define UV_SECURE_MEM_BIT              (PPC_BIT(15))
> > +#define MAX_COMPRESSED_UV_IMAGE_SIZE 0x40000 /* 256 Kilobytes */
> > +#define UV_ACCESS_BIT          0x1ULL << 48
> > +/* Address at which the Ultravisor is loaded for BML and Mambo */
> > +#define UV_LOAD_BASE           0xC0000000
> > +#define UV_LOAD_MAX_SIZE       0x200000
> > +#define UV_FDT_MAX_SIZE                0x100000
> > +#define UV_HB_RESERVE_SIZE     0x4000000;
> > +
> > +extern int start_uv(uint64_t entry, struct uv_opal *uv_opal);
> > +extern bool uv_add_mem_range(__be64 start, __be64 end);
> > +extern void uv_preload_image(void);
> > +extern void uv_decompress_image(void);
> > +extern void init_uv(void);
> > +extern int start_ultravisor(void);
> > +
> > +#endif /* __ULTRAVISOR_H */
> > --
> > 2.21.0
> > 
> > _______________________________________________
> > Skiboot mailing list
> > Skiboot@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/skiboot
Oliver O'Halloran Nov. 21, 2019, 4:26 a.m. UTC | #3
On Thu, Nov 21, 2019 at 8:49 AM Ryan Grimm <grimm@linux.vnet.ibm.com> wrote:
>
> Oliver,
>
> On Mon, 2019-11-18 at 10:49 +1100, Oliver O'Halloran wrote:
> <snip>
> > > diff --git a/hdata/memory.c b/hdata/memory.c
> > > index 9af7ae71..25b8088d 100644
> > > --- a/hdata/memory.c
> > > +++ b/hdata/memory.c
> > > @@ -10,6 +10,7 @@
> > >  #include <types.h>
> > >  #include <inttypes.h>
> > >  #include <processor.h>
> > > +#include <ultravisor.h>
> > >
> > >  #include "spira.h"
> > >  #include "hdata.h"
> > > @@ -59,6 +60,8 @@ struct HDIF_ms_area_address_range {
> > >  #define MS_CONTROLLER_MCS_ID(id)       GETFIELD(PPC_BITMASK32(4,
> > > 7), id)
> > >  #define MS_CONTROLLER_MCA_ID(id)       GETFIELD(PPC_BITMASK32(8,
> > > 15), id)
> > >
> > > +#define MS_ATTR_SMF                    (PPC_BIT32(23))
> > > +
> > >  struct HDIF_ms_area_id {
> > >         __be16 id;
> > >  #define MS_PTYPE_RISER_CARD    0x8000
> > > @@ -163,6 +166,16 @@ static bool add_address_range(struct dt_node
> > > *root,
> > >                 return false;
> > >         }
> > >
> > > +       if (arange->mirror_attr & MS_ATTR_SMF) {
> > > +               prlog(PR_DEBUG, "Found secure memory");
> > > +               if (!uv_add_mem_range(reg[0],
> > > cleanup_addr(be64_to_cpu(arange->end)))) {
> > > +                       prlog(PR_INFO, "Failed to add secure memory
> > > range to DT\n");
> > > +                       mem_reserve_fw(name, reg[0], reg[1]);
> >
> > The reservation facility is there to allow marking bits of otherwise
> > normal memory as "special" so that skiboot and the kernel won't
> > allocate over them and trash their contents. The secure memory ranges
> > are completely disjoint from normal memory by design so IMO they
> > should be top-level nodes, similar to the normal memory@<addr> nodes.
> > There's a lot of code in this patch that exists purely to work around
> > the descision to abuse reserved memory this way, so it should
> > simplify
> > the skiboot changes too.
> >
>
> OK, we have been using the reservation system improperly.  And, yeah,
> we do have little pieces of code here and there to fix things up, which
> are sure to be fragile.
>
> How about we have something like this in the doc, and use device_type
> "secure_memory" so we don't have the kernel try to use it as regular
> memory:
>
> Skiboot parses secure memory from the HDAT tables and creates the
> secure-memory device tree node, similar to a memory@ node except the
> device_type is "secure_memory". For example:
>
> .. code-block:: dts
>
>         secure-memory@100fe00000000 {
>                 device_type = "secure_memory";
>                 ibm,chip-id = <0>;
>                 reg = < 0x100fe 0x0 0x2 0x0>;
>         }
>
> Regions of secure memory will be reserved by hostboot such as OCC,
> HOMER, and SBE.  Skiboot will use the existing reserve infrastructure
> to reserve them.
> For example:
>
> .. code-block::
>
>         ibm,HCODE@100fffcaf0000
>         ibm,OCC@100fffcdd0000
>         ibm,RINGOVD@100fffcae0000
>         ibm,WOFDATA@100fffcb90000
>         ibm,arch-reg-data@100fffd700000
>         ibm,hbrt-code-image@100fffcec0000
>         ibm,hbrt-data@100fffd420000
>         ibm,homer-image@100fffd800000
>         ibm,homer-image@100fffdc00000
>         ibm,occ-common-area@100ffff800000
>         ibm,sbe-comm@100fffce90000
>         ibm,sbe-comm@100fffceb0000
>         ibm,sbe-ffdc@100fffce80000
>         ibm,sbe-ffdc@100fffcea0000
>         ibm,secure-crypt-algo-code@100fffce70000
>         ibm,uvbwlist@100fffcad0000
>
> For Mambo, ultra.tcl creates the secure-memory device tree node and is
> currently defined at 8GB with size 8GB.  Mambo has no protection on
> securememory, so a watchpoint could be used to ensure Skiboot does not
> touch secure memory.
>
> For BML, the BML script parses secure memory from the Cronus config
> file and creates the secure-memory device tree node.

Looks ok to me. mpe might have some comments.

> > > *snip*
> > >
> > > +static void free_uv(void)
> > > +{
> > > +       struct mem_region *region = find_mem_region("ibm,
> > > firmware-allocs-memory@0");
> > > +
> > > +       lock(&region->free_list_lock);
> > > +       mem_free(region, uv_image, __location__);
> > > +       unlock(&region->free_list_lock);
> > > +}
> >
> > ibm,firmware-allocs-memory@0 contains everything allocated by skiboot
> > on node 0 using local_alloc()
> >
>
> OK, I think I wrote that code being unsure about how to do this.
>
> So, we can use local_alloc, but I'm struggling a little bit to figure
> out how to free it.  I looked at examples of local_alloc, but it
> doesn't seem any of the code in occ, phb4, or xive free their
> allocations.
>
> Is it up to the caller to deal with mem_regions and call mem_free with
> the mem_region?

Historically local_alloc() has only ever been used for static
allocations of things like the in-memory PHB tables. Up until now we
haven't needed the free those, so a local_free() just never got
written. Feel free to add one, but if you do then make sure you handle
returning the allocated memory back into the region it was allocated
from (i.e. fold it into an adjacent region with type ==
REGION_MEMORY).

> > > *snip*
> > > +{
> > > +       struct dt_node *node;
> > > +       const struct dt_property *base;
> > > +       uint64_t uv_src_addr, uv_pef_reg, uv_pef_size;
> > > +       void *uv_fdt;
> > > +
> > > +       prlog(PR_DEBUG, "UV: Init starting\n");
> > > +
> > > +       if (!is_msr_bit_set(MSR_S)) {
> > > +               prerror("UV: S bit not set\n");
> > > +               goto load_error;
> >
> > that's not an error.
> >
>
> OK, we plan on looking at these paths closely, as we have not tested or
> thought about when we're running withour MSR_S.

That's what I figured. It's not a big deal, we try not to add nuisance
prints and the testers treat skiboot printing anything at PR_ERR or
lower as an excuse to file a bug report. Make this sort of FYI print a
PR_DEBUG or higher so we don't see it in the normal case.

> > > +       }
> > > +
> > > +       if (!(uv_opal = zalloc(sizeof(struct uv_opal)))) {
> > > +               prerror("UV: Failed to allocate uv_opal\n");
> > > +               goto load_error;
> > > +       }
> > > +
> > > +
> > > +       if (!(node = find_uv_node())) {
> > > +               prerror("UV: Device tree node not found\n");
> > > +               goto load_error;
> > > +       }
> > > +
> > > +       if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
> > > +               prlog(PR_INFO, "UV: Mambo simulator detected\n");
> > > +
> > > +               if (!find_secure_mem_to_copy(&uv_pef_reg,
> > > &uv_pef_size)) {
> > > +                       prerror("UV: No secure memory configured,
> > > exiting\n");
> > > +                       goto load_error;
> > > +               }
> > > +
> > > +               goto start;
> > > +       }
> >
> > Seems like a weird hack.
> >
>
> Yeah, this is how the code evolved supporting Mambo, Cronus, and HB.
> Do you think we should break each up into its own function?

I think mambo should be handled the same way cronus is. The only
justification for the current hack seems to be that it lets you skip
copying the UV blob from insecure to secure memory. It's pretty hard
to care about the overhead of the copy and I'd rather keep the
differences between platforms to a minimum.

> > > diff --git a/include/processor.h b/include/processor.h
> > > index 352fd1ec..0a552998 100644
> > > --- a/include/processor.h
> > > +++ b/include/processor.h
> > > @@ -11,6 +11,7 @@
> > >  #define MSR_HV         PPC_BIT(3)      /* Hypervisor mode */
> > >  #define MSR_VEC                PPC_BIT(38)     /* VMX enable */
> > >  #define MSR_VSX                PPC_BIT(40)     /* VSX enable */
> > > +#define MSR_S          PPC_BIT(41)     /* Secure Mode enable */
> > >  #define MSR_EE         PPC_BIT(48)     /* External Int. Enable */
> > >  #define MSR_PR         PPC_BIT(49)             /* Problem state */
> > >  #define MSR_FP         PPC_BIT(50)     /* Floating Point Enable */
> > > @@ -368,6 +369,17 @@ static inline void st_le32(uint32_t *addr,
> > > uint32_t val)
> > >         asm volatile("stwbrx %0,0,%1" : : "r"(val), "r"(addr),
> > > "m"(*addr));
> > >  }
> > >
> > > +/*
> > > + * MSR bit check
> > > + */
> > > +static inline bool is_msr_bit_set(uint64_t bit)
> > > +{
> > > +       if (mfmsr() & bit)
> > > +               return true;
> > > +
> > > +       return false;
> > > +}
> > > +
> > >  #endif /* __TEST__ */
> >
> > I'm going to take a stab in the dark and say this is going to break
> > all of our unit tests.
> >
>
> OK, we can start running the unit tests, we haven't been doing that.

Nobody ever does :(
Ryan Grimm Dec. 2, 2019, 8:36 p.m. UTC | #4
Oliver, Michael, 

Questions about implementation below and generally how to search for
devices in the device tree.  I've found myself and other team members
confused about how to do so.

On Thu, 2019-11-21 at 15:26 +1100, Oliver O'Halloran wrote:
*snip*
> > 
> > How about we have something like this in the doc, and use
> > device_type
> > "secure_memory" so we don't have the kernel try to use it as
> > regular
> > memory:
> > 
> > Skiboot parses secure memory from the HDAT tables and creates the
> > secure-memory device tree node, similar to a memory@ node except
> > the
> > device_type is "secure_memory". For example:
> > 
> > .. code-block:: dts
> > 
> >         secure-memory@100fe00000000 {
> >                 device_type = "secure_memory";
> >                 ibm,chip-id = <0>;
> >                 reg = < 0x100fe 0x0 0x2 0x0>;
> >         }
> > 
> > Regions of secure memory will be reserved by hostboot such as OCC,
> > HOMER, and SBE.  Skiboot will use the existing reserve
> > infrastructure
> > to reserve them.
> > For example:
> > 
> > .. code-block::
> > 
> >         ibm,HCODE@100fffcaf0000
> >         ibm,OCC@100fffcdd0000
> >         ibm,RINGOVD@100fffcae0000
> >         ibm,WOFDATA@100fffcb90000
> >         ibm,arch-reg-data@100fffd700000
> >         ibm,hbrt-code-image@100fffcec0000
> >         ibm,hbrt-data@100fffd420000
> >         ibm,homer-image@100fffd800000
> >         ibm,homer-image@100fffdc00000
> >         ibm,occ-common-area@100ffff800000
> >         ibm,sbe-comm@100fffce90000
> >         ibm,sbe-comm@100fffceb0000
> >         ibm,sbe-ffdc@100fffce80000
> >         ibm,sbe-ffdc@100fffcea0000
> >         ibm,secure-crypt-algo-code@100fffce70000
> >         ibm,uvbwlist@100fffcad0000
> > 
> > For Mambo, ultra.tcl creates the secure-memory device tree node and
> > is
> > currently defined at 8GB with size 8GB.  Mambo has no protection on
> > securememory, so a watchpoint could be used to ensure Skiboot does
> > not
> > touch secure memory.
> > 
> > For BML, the BML script parses secure memory from the Cronus config
> > file and creates the secure-memory device tree node.
> 
> Looks ok to me. mpe might have some comments.
> 

How should Skiboot search for secure-memory in the device
tree?   Should we be looking at device_type the way the kernel does
with of_find_node_by_type?

Skiboot searches for some device nodes by compatible, and this is
stated in https://elinux.org/Device_Tree_Usage :

"Every node in the tree that represents a device is required to have 
the compatible property. compatible is the key an operating system uses
to decidewhich device driver to bind to a device."

I looked at memory@ of a running system and it has no compatible
property, so is memory not considered a device like a cpu, bus, or
ethernet adapter?

When is it correct to search by name or by path?

Thanks,
Ryan
Michael Ellerman Dec. 16, 2019, 11:29 a.m. UTC | #5
Ryan Grimm <grimm@linux.vnet.ibm.com> writes:
> Oliver, Michael, 
>
> Questions about implementation below and generally how to search for
> devices in the device tree.  I've found myself and other team members
> confused about how to do so.
>
> On Thu, 2019-11-21 at 15:26 +1100, Oliver O'Halloran wrote:
> *snip*
>> > 
>> > How about we have something like this in the doc, and use
>> > device_type
>> > "secure_memory" so we don't have the kernel try to use it as
>> > regular
>> > memory:
>> > 
>> > Skiboot parses secure memory from the HDAT tables and creates the
>> > secure-memory device tree node, similar to a memory@ node except
>> > the
>> > device_type is "secure_memory". For example:
>> > 
>> > .. code-block:: dts
>> > 
>> >         secure-memory@100fe00000000 {
>> >                 device_type = "secure_memory";
>> >                 ibm,chip-id = <0>;
>> >                 reg = < 0x100fe 0x0 0x2 0x0>;
>> >         }
>> > 
>> > Regions of secure memory will be reserved by hostboot such as OCC,
>> > HOMER, and SBE.  Skiboot will use the existing reserve
>> > infrastructure
>> > to reserve them.
>> > For example:
>> > 
>> > .. code-block::
>> > 
>> >         ibm,HCODE@100fffcaf0000
>> >         ibm,OCC@100fffcdd0000
>> >         ibm,RINGOVD@100fffcae0000
>> >         ibm,WOFDATA@100fffcb90000
>> >         ibm,arch-reg-data@100fffd700000
>> >         ibm,hbrt-code-image@100fffcec0000
>> >         ibm,hbrt-data@100fffd420000
>> >         ibm,homer-image@100fffd800000
>> >         ibm,homer-image@100fffdc00000
>> >         ibm,occ-common-area@100ffff800000
>> >         ibm,sbe-comm@100fffce90000
>> >         ibm,sbe-comm@100fffceb0000
>> >         ibm,sbe-ffdc@100fffce80000
>> >         ibm,sbe-ffdc@100fffcea0000
>> >         ibm,secure-crypt-algo-code@100fffce70000
>> >         ibm,uvbwlist@100fffcad0000
>> > 
>> > For Mambo, ultra.tcl creates the secure-memory device tree node and
>> > is
>> > currently defined at 8GB with size 8GB.  Mambo has no protection on
>> > securememory, so a watchpoint could be used to ensure Skiboot does
>> > not
>> > touch secure memory.
>> > 
>> > For BML, the BML script parses secure memory from the Cronus config
>> > file and creates the secure-memory device tree node.
>> 
>> Looks ok to me. mpe might have some comments.
>
> How should Skiboot search for secure-memory in the device
> tree?   Should we be looking at device_type the way the kernel does
> with of_find_node_by_type?

Probably not.

device_type is like the "type" or "class" in the object oriented sense,
it is meant to describe what methods you can invoke on the node.

If you look at IEEE 1275 it says:

  “device_type”
  Standard property name to specify the implemented interface.
  ...
  Specifies the “device type” of this package, thus implying a specific
  set of package class methods implemented by this package.


So for a new binding we're writing today, which will only ever be used
with a FDT and not a real OF, it probably doesn't make much sense to use
device_type.

> Skiboot searches for some device nodes by compatible, and this is
> stated in https://elinux.org/Device_Tree_Usage :
>
> "Every node in the tree that represents a device is required to have 
> the compatible property. compatible is the key an operating system uses
> to decidewhich device driver to bind to a device."

In general the docs on elinux.org are quite FDT focused, so they are not
always correct when looking at Power because we still run on systems
that have actual OF, or our device trees evolved from systems that had
real OF.

However in this case that advice is definitely correct, all devices
should have a compatible property.

> I looked at memory@ of a running system and it has no compatible
> property, so is memory not considered a device like a cpu, bus, or
> ethernet adapter?

The quote above is talking about "which device driver to bind", but
historically there is no device driver for memory, it's just memory.

These days that's not quite so true, with persistent memory and device
memory and so on.

But the memory@x nodes come from a time when memory was wired pretty
straight into the CPU, so there was no need for a compatible or
anything fancy.

> When is it correct to search by name or by path?

For anything newly designed, where we control what is created in the
tree, we should not need to search by name or path. Searching by
compatible is always superior.

In practice we have systems that have badly designed device trees that
we can't fix, so we have to search by name or path.

Searching by name is OK, but compatible is superior because a node can
have multiple compatible values, but it can only have one name.

Searching by path sucks because it makes the structure of the tree API
which can then never be changed.

cheers
diff mbox series

Patch

diff --git a/asm/head.S b/asm/head.S
index e78dc520..18ce3044 100644
--- a/asm/head.S
+++ b/asm/head.S
@@ -1065,3 +1065,25 @@  start_kernel_secondary:
 	mtctr	%r3
 	mfspr	%r3,SPR_PIR
 	bctr
+
+.global start_uv
+start_uv:
+	mflr    %r0
+	std     %r0,16(%r1)
+	sync
+	icbi    0,%r3
+	sync
+	isync
+	mtctr   %r3
+	mr      %r3,%r4
+	LOAD_IMM64(%r8,SKIBOOT_BASE);
+	LOAD_IMM32(%r10, opal_entry - __head)
+	add     %r9,%r8,%r10
+	LOAD_IMM32(%r6, EPAPR_MAGIC)
+	addi    %r7,%r5,1
+	li      %r4,0
+	li      %r5,0
+	bctrl
+	ld      %r0,16(%r1)
+	mtlr    %r0
+	blr
diff --git a/core/flash.c b/core/flash.c
index 203b695d..a9c25486 100644
--- a/core/flash.c
+++ b/core/flash.c
@@ -42,6 +42,7 @@  static struct {
 	{ RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,		"ROOTFS" },
 	{ RESOURCE_ID_CAPP,	RESOURCE_SUBID_SUPPORTED,	"CAPP" },
 	{ RESOURCE_ID_IMA_CATALOG,  RESOURCE_SUBID_SUPPORTED,	"IMA_CATALOG" },
+	{ RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,		"UVISOR" },
 	{ RESOURCE_ID_VERSION,	RESOURCE_SUBID_NONE,		"VERSION" },
 	{ RESOURCE_ID_KERNEL_FW,	RESOURCE_SUBID_NONE,		"BOOTKERNFW" },
 };
diff --git a/core/init.c b/core/init.c
index 25d827f2..ca83df7e 100644
--- a/core/init.c
+++ b/core/init.c
@@ -44,6 +44,7 @@ 
 #include <sbe-p9.h>
 #include <debug_descriptor.h>
 #include <occ.h>
+#include <ultravisor.h>
 
 enum proc_gen proc_gen;
 unsigned int pcie_max_link_speed;
@@ -1203,6 +1204,11 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	pci_nvram_init();
 
 	preload_capp_ucode();
+
+	/* preload and decompress ultravisor image */
+	uv_preload_image();
+	uv_decompress_image();
+
 	start_preload_kernel();
 
 	/* Catalog decompression routine */
@@ -1258,6 +1264,9 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	/* Add the list of interrupts going to OPAL */
 	add_opal_interrupts();
 
+	/* Init uiltravisor software */
+	init_uv();
+
 	/* Now release parts of memory nodes we haven't used ourselves... */
 	mem_region_release_unused();
 
@@ -1275,6 +1284,8 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 
 	checksum_romem();
 
+	start_ultravisor();
+
 	load_and_boot_kernel(false);
 }
 
diff --git a/hdata/memory.c b/hdata/memory.c
index 9af7ae71..25b8088d 100644
--- a/hdata/memory.c
+++ b/hdata/memory.c
@@ -10,6 +10,7 @@ 
 #include <types.h>
 #include <inttypes.h>
 #include <processor.h>
+#include <ultravisor.h>
 
 #include "spira.h"
 #include "hdata.h"
@@ -59,6 +60,8 @@  struct HDIF_ms_area_address_range {
 #define MS_CONTROLLER_MCS_ID(id)	GETFIELD(PPC_BITMASK32(4, 7), id)
 #define MS_CONTROLLER_MCA_ID(id)	GETFIELD(PPC_BITMASK32(8, 15), id)
 
+#define MS_ATTR_SMF			(PPC_BIT32(23))
+
 struct HDIF_ms_area_id {
 	__be16 id;
 #define MS_PTYPE_RISER_CARD	0x8000
@@ -163,6 +166,16 @@  static bool add_address_range(struct dt_node *root,
 		return false;
 	}
 
+	if (arange->mirror_attr & MS_ATTR_SMF) {
+		prlog(PR_DEBUG, "Found secure memory");
+		if (!uv_add_mem_range(reg[0], cleanup_addr(be64_to_cpu(arange->end)))) {
+			prlog(PR_INFO, "Failed to add secure memory range to DT\n");
+			mem_reserve_fw(name, reg[0], reg[1]);
+			return false;
+		} else
+			return true;
+	}
+
 	if (be16_to_cpu(id->flags) & MS_AREA_SHARED) {
 		mem = dt_find_by_name_addr(dt_root, name, reg[0]);
 		if (mem) {
@@ -676,9 +689,9 @@  static void get_hb_reserved_mem(struct HDIF_common_hdr *ms_vpd)
 
 		/*
 		 * Workaround broken HDAT reserve regions which are
-		 * bigger than 512MB
+		 * bigger than 512MB and not secure memory
 		 */
-		if ((end_addr - start_addr) > 0x20000000) {
+		if (((end_addr - start_addr) > 0x20000000) && !(start_addr & UV_SECURE_MEM_BIT)) {
 			prlog(PR_ERR, "MEM: Ignoring Bad HDAT reserve: too big\n");
 			continue;
 		}
diff --git a/hw/Makefile.inc b/hw/Makefile.inc
index b708bdfe..848898b9 100644
--- a/hw/Makefile.inc
+++ b/hw/Makefile.inc
@@ -9,6 +9,7 @@  HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o
 HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o
 HW_OBJS += occ-sensor.o vas.o sbe-p8.o dio-p9.o lpc-port80h.o cache-p9.o
 HW_OBJS += npu-opal.o npu3.o npu3-nvlink.o npu3-hw-procedures.o
+HW_OBJS += ultravisor.o
 HW=hw/built-in.a
 
 include $(SRC)/hw/fsp/Makefile.inc
diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
index 6fa6534f..829e56f4 100644
--- a/hw/fsp/fsp.c
+++ b/hw/fsp/fsp.c
@@ -114,6 +114,7 @@  static u64 fsp_hir_timeout;
 #define KERNEL_LID_PHYP			0x80a00701
 #define KERNEL_LID_OPAL			0x80f00101
 #define INITRAMFS_LID_OPAL		0x80f00102
+#define ULTRA_LID_OPAL			0x80f00105
 
 /*
  * We keep track on last logged values for some things to print only on
@@ -2375,6 +2376,7 @@  static struct {
 } fsp_lid_map[] = {
 	{ RESOURCE_ID_KERNEL,	RESOURCE_SUBID_NONE,	KERNEL_LID_OPAL },
 	{ RESOURCE_ID_INITRAMFS,RESOURCE_SUBID_NONE,	INITRAMFS_LID_OPAL },
+	{ RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,	ULTRA_LID_OPAL },
 	{ RESOURCE_ID_IMA_CATALOG,IMA_CATALOG_NIMBUS,	0x80f00103 },
 	{ RESOURCE_ID_CAPP,	CAPP_IDX_MURANO_DD20,	0x80a02002 },
 	{ RESOURCE_ID_CAPP,	CAPP_IDX_MURANO_DD21,	0x80a02001 },
diff --git a/hw/ultravisor.c b/hw/ultravisor.c
new file mode 100644
index 00000000..8e3cceb4
--- /dev/null
+++ b/hw/ultravisor.c
@@ -0,0 +1,487 @@ 
+// SPDX-License-Identifier: Apache-2.0
+/* Copyright 2018-2019 IBM Corp. */
+
+#include <skiboot.h>
+#include <xscom.h>
+#include <chip.h>
+#include <device.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <ultravisor.h>
+#include <mem_region.h>
+#include <ultravisor-api.h>
+#include <libfdt/libfdt.h>
+
+static char *uv_image = NULL;
+static size_t uv_image_size;
+struct xz_decompress *uv_xz = NULL;
+static struct uv_opal *uv_opal;
+
+static struct dt_node *add_uv_dt_node(void)
+{
+	struct dt_node *dev, *uv;
+
+	dev = dt_new_check(dt_root, "ibm,ultravisor");
+	if (!dev)
+		return NULL;
+
+	dt_add_property_string(dev, "compatible", "ibm,ultravisor");
+	uv = dt_new_check(dev, "firmware");
+	if (!uv) {
+		dt_free(dev);
+		return NULL;
+	}
+
+	dt_add_property_string(uv, "compatible", "firmware");
+	return dev;
+}
+
+static struct dt_node *find_uv_node(void)
+{
+	struct dt_node *uv_node, *dt;
+
+	uv_node = dt_find_compatible_node(dt_root, NULL, "ibm,uv-firmware");
+	if (!uv_node) {
+		prlog(PR_DEBUG, "ibm,uv-firmware compatible node not found, creating");
+		dt = add_uv_dt_node();
+		if (!dt)
+			return NULL;
+		uv_node = dt_find_compatible_node(dt_root, NULL, "ibm,uv-firmware");
+	}
+
+	return uv_node;
+}
+
+static bool find_secure_mem_to_copy(uint64_t *target, uint64_t *sz)
+{
+	struct dt_node *uv_node = find_uv_node();
+	const struct dt_property *ranges;
+	uint64_t uv_pef_reg;
+	uint64_t *range, sm_size, img_size = UV_LOAD_MAX_SIZE;
+
+	/*
+	 * "uv-secure-memory" property could have multiple
+	 * secure memory blocks. Pick first to load
+	 * ultravisor in it.
+	 */
+	ranges = dt_find_property(uv_node, "secure-memory-ranges");
+	if (!ranges)
+		return false;
+
+	range = (void *)ranges->prop;
+	do {
+		uv_pef_reg = dt_get_number(range, 2);
+		if (!uv_pef_reg)
+			return false;
+
+		sm_size = dt_get_number(range + 1, 2);
+		if (sm_size > img_size)
+			break;
+		range += 2;
+	} while (range);
+
+	*target = uv_pef_reg;
+	*sz = sm_size;
+	return true;
+}
+
+static uint64_t find_uv_fw_base_addr(struct dt_node *uv_node)
+{
+	uint64_t base_addr = 0;
+
+	if (dt_has_node_property(uv_node, "reg", NULL))
+		base_addr = dt_prop_get_u64(uv_node, "reg");
+
+	return base_addr;
+}
+
+static void reserve_secure_memory_region(void)
+{
+	struct dt_node *uv_node = find_uv_node();
+	const struct dt_property *ranges;
+	uint64_t *range, *rangesp, sm_size, addr;
+	char buf[128];
+	int i=0;
+
+	ranges = dt_find_property(uv_node, "secure-memory-ranges");
+	if (!ranges)
+		return;
+
+	for (rangesp = (uint64_t *)(ranges->prop + ranges->len),
+			range = (uint64_t *)ranges->prop;
+			range < rangesp;
+			range += 2) {
+		addr = dt_get_number(range, 2);
+		if (!addr)
+			break;
+
+		sm_size = dt_get_number(range + 1, 2);
+		if (!sm_size)
+			break;
+
+		/* Remove Hostboot regions from secure memory 0 so we don't abort
+		 * on overlapping regions */
+		if (i == 0) {
+			prlog(PR_INFO, "Secure region 0, removing HB region\n");
+			/* TODO: Check with Hostboot for memory map */
+			sm_size = sm_size - UV_HB_RESERVE_SIZE;
+		}
+
+		snprintf(buf, 128, "ibm,secure-region-%d",i++);
+		mem_reserve_fw(strdup(buf), addr, sm_size);
+	}
+
+	return;
+}
+
+static void reserve_uv_memory(struct uv_opal *uv_opal)
+{
+	if (uv_opal->uv_base_addr == UV_LOAD_BASE) {
+		mem_reserve_fw("ibm,uv-code", UV_LOAD_BASE, UV_LOAD_MAX_SIZE);
+	} else {
+		reserve_secure_memory_region();
+	}
+}
+
+static void cpu_start_ultravisor(void *data)
+{
+	struct uv_opal *ptr = (struct uv_opal *)data;
+	start_uv(ptr->uv_base_addr, ptr);
+}
+
+int start_ultravisor(void)
+{
+	struct cpu_thread *cpu;
+	struct cpu_job **jobs;
+	int i=0;
+
+	prlog(PR_NOTICE, "UV: Starting Ultravisor at 0x%llx sys_fdt 0x%llx uv_fdt 0x%0llx\n",
+				uv_opal->uv_base_addr, uv_opal->sys_fdt, uv_opal->uv_fdt);
+
+	/* Alloc memory for Jobs */
+	jobs = zalloc(sizeof(struct cpu_job*) * cpu_max_pir);
+
+	for_each_available_cpu(cpu) {
+		if (cpu == this_cpu())
+			continue;
+		jobs[i++] = cpu_queue_job(cpu, "start_ultravisor",
+					cpu_start_ultravisor, (void *)uv_opal);
+	}
+
+	cpu_start_ultravisor((void *)uv_opal);
+
+	/* wait for everyone to sync back */
+	while (i > 0) {
+		cpu_wait_job(jobs[--i], true);
+	}
+
+	/* free used stuff */
+	free(jobs);
+
+	/* Check everything is fine */
+	if (uv_opal->uv_ret_code) {
+		return OPAL_HARDWARE;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+static int create_dtb_uv(void *uv_fdt)
+{
+	if (fdt_create(uv_fdt, UV_FDT_MAX_SIZE)) {
+		prerror("UV: Failed to create uv_fdt\n");
+		return 1;
+	}
+
+	fdt_finish_reservemap(uv_fdt);
+	fdt_begin_node(uv_fdt, "");
+	fdt_property_string(uv_fdt, "description", "Ultravisor fdt");
+	fdt_begin_node(uv_fdt, "ibm,uv-fdt");
+	fdt_property_string(uv_fdt, "compatible", "ibm,uv-fdt");
+	fdt_end_node(uv_fdt);
+	fdt_end_node(uv_fdt);
+	fdt_finish(uv_fdt);
+
+	return OPAL_SUCCESS;
+}
+
+static void free_uv(void)
+{
+	struct mem_region *region = find_mem_region("ibm,firmware-allocs-memory@0");
+
+	lock(&region->free_list_lock);
+	mem_free(region, uv_image, __location__);
+	unlock(&region->free_list_lock);
+}
+
+static bool alloc_uv(void)
+{
+	struct proc_chip *chip = next_chip(NULL);
+
+	uv_image_size = MAX_COMPRESSED_UV_IMAGE_SIZE;
+	if (!(uv_image = local_alloc(chip->id, uv_image_size, uv_image_size)))
+		return false;
+	memset(uv_image, 0, uv_image_size);
+	return true;
+}
+
+/* We could be running on Mambo, Cronus, or Hostboot
+ *
+ * Detect Mambo via chip quirk.  Mambo writes the uncompressed UV images
+ * directly to secure memory and passes secure memory location via device tree.
+ *
+ * Detect Cronus when HB decompress fails.  Cronus writes the uncompressed UV
+ * image to insecure memory and init_uv will copy from insecure to secure.
+ *
+ * Assume HB by waiting for decompress.  UV should have been loaded from FSP
+ * and decompressed earlier via uv_preload_image and uv_decompress_image.  The
+ * secure location of the UV provided by those functions in xz struct. */
+void init_uv()
+{
+	struct dt_node *node;
+	const struct dt_property *base;
+	uint64_t uv_src_addr, uv_pef_reg, uv_pef_size;
+	void *uv_fdt;
+
+	prlog(PR_DEBUG, "UV: Init starting\n");
+
+	if (!is_msr_bit_set(MSR_S)) {
+		prerror("UV: S bit not set\n");
+		goto load_error;
+	}
+
+	if (!(uv_opal = zalloc(sizeof(struct uv_opal)))) {
+		prerror("UV: Failed to allocate uv_opal\n");
+		goto load_error;
+	}
+
+
+	if (!(node = find_uv_node())) {
+		prerror("UV: Device tree node not found\n");
+		goto load_error;
+	}
+
+	if (proc_chip_quirks & QUIRK_MAMBO_CALLOUTS) {
+		prlog(PR_INFO, "UV: Mambo simulator detected\n");
+
+		if (!find_secure_mem_to_copy(&uv_pef_reg, &uv_pef_size)) {
+			prerror("UV: No secure memory configured, exiting\n");
+			goto load_error;
+		}
+
+		goto start;
+	}
+
+	/* This would be null in case we are on Cronus */
+	if (!uv_xz) {
+
+		prlog(PR_INFO, "UV: Platform load failed, detecting UV image via device tree\n");
+
+		if (!find_secure_mem_to_copy(&uv_pef_reg, &uv_pef_size)) {
+			prerror("UV: No secure memory configured, exiting\n");
+			goto load_error;
+		}
+
+		if (!(uv_src_addr = find_uv_fw_base_addr(node))) {
+			prerror("UV: Couldn't find UV base address in device tree\n");
+			goto load_error;
+		}
+
+		prlog(PR_INFO, "UV: Copying Ultravisor to protected memory 0x%llx from 0x%llx\n", uv_pef_reg, uv_src_addr);
+
+		memcpy((void *)uv_pef_reg, (void *)uv_src_addr, UV_LOAD_MAX_SIZE);
+
+		goto start;
+	}
+
+	/* Hostboot path */
+	wait_xz_decompress(uv_xz);
+	if (uv_xz->status) {
+		prerror("UV: Compressed Ultravisor image failed to decompress");
+		goto load_error;
+	}
+
+	/* the uncompressed location will be the base address of ultravisor
+	 * so fix up if it's already there */
+	base = dt_find_property(node, "reg");
+	if (base)
+		dt_del_property(node, (struct dt_property *)base);
+
+	dt_add_property_u64(node, "reg", (uint64_t)uv_xz->dst);
+
+	uv_pef_reg = (uint64_t)uv_xz->dst;
+	uv_pef_size = (uint64_t)uv_xz->dst_size;
+
+start:
+	uv_opal->uv_base_addr = uv_pef_reg;
+
+	uv_opal->sys_fdt = (__be64)create_dtb(dt_root, false);
+	if (!uv_opal->sys_fdt) {
+		prerror("UV: Failed to create system fdt\n");
+		goto load_error;
+	}
+
+	uv_fdt = (void *)(uv_pef_reg + UV_LOAD_MAX_SIZE);
+	if (create_dtb_uv(uv_fdt)) {
+		prerror("UV: Failed to create uv fdt\n");
+		goto load_error;
+	}
+	uv_opal->uv_fdt = (__be64)uv_fdt;
+
+	reserve_uv_memory(uv_opal);
+
+load_error:
+	free_uv();
+	free(uv_xz);
+}
+
+static bool dt_append_memory_range(struct dt_node *node, __be64 start,
+				   __be64 len)
+{
+	const struct dt_property *ranges;
+	size_t size;
+	u32 *new_ranges;
+	int i;
+
+	/* for Cronus boot the BML script creates secure-memory-ranges
+	 * for Mambo boot the ultra.tcl script create secure-memory ranges
+	 * for HostBoot, skiboot parses HDAT in hdata/memory.c and creates it here */
+	ranges = dt_find_property(node, "secure-memory-ranges");
+	if (!ranges) {
+		prlog(PR_DEBUG, "Creating secure-memory-ranges.\n");
+		ranges = dt_add_property_cells(node, "secure-memory-ranges",
+					       hi32(start), lo32(start),
+					       hi32(len), lo32(len));
+		return true;
+	}
+
+	prlog(PR_DEBUG, "Adding secure memory range range at 0x%llx of size: 0x%llx\n", start, len);
+	/* Calculate the total size in bytes of the new property */
+	size = ranges->len + 16;
+	new_ranges = (u32 *)malloc(size);
+	memcpy(new_ranges, ranges->prop, ranges->len);
+
+	i = ranges->len / 4;
+	/* The ranges property will be of type <addr size ...> */
+	new_ranges[i++] = hi32(start);
+	new_ranges[i++] = lo32(start);
+	new_ranges[i++] = hi32(len);
+	new_ranges[i] = lo32(len);
+
+	/* Update our node with the new set of ranges */
+	dt_del_property(node, (struct dt_property *)ranges);
+	dt_add_property(node, "secure-memory-ranges", (void *)new_ranges, size);
+
+	return true;
+}
+
+/*
+ * This code returns false on invalid memory ranges and in no-secure mode.
+ * It is the caller's responsibility of moving the memory to appropriate
+ * reserved areas.
+ */
+bool uv_add_mem_range(__be64 start, __be64 end)
+{
+	struct dt_node *uv_node;
+	bool ret = false;
+
+	if (!is_msr_bit_set(MSR_S))
+		return ret;
+
+	/* Check if address range is secure */
+	if (!((start & UV_SECURE_MEM_BIT) && (end & UV_SECURE_MEM_BIT))) {
+		prlog(PR_DEBUG, "Invalid secure address range.\n");
+		return ret;
+	}
+
+	uv_node = find_uv_node();
+	if (!uv_node) {
+		prlog(PR_ERR, "Could not create uv node\n");
+		return false;
+	}
+
+	ret = dt_append_memory_range(uv_node, start, end - start + 1);
+
+	if (ret)
+		prlog(PR_NOTICE, "Secure memory range added [0x%016llx..0x%015llx]\n", start, end);
+
+	return ret;
+}
+
+/*
+ * Preload the UV image from PNOR partition
+ */
+void uv_preload_image(void)
+{
+	int ret;
+
+	prlog(PR_INFO, "UV: Preload starting\n");
+
+	if (!alloc_uv()) {
+		prerror("UV: Memory allocation failed\n");
+		return;
+	}
+
+	ret = start_preload_resource(RESOURCE_ID_UV_IMAGE, RESOURCE_SUBID_NONE,
+				     uv_image, &uv_image_size);
+
+	if (ret != OPAL_SUCCESS) {
+		prerror("UV: platform load failed: %d\n", ret);
+	}
+}
+
+/*
+ * Decompress the UV image
+ *
+ * This function modifies the uv_image variable to point to the decompressed
+ * image location.
+ */
+void uv_decompress_image(void)
+{
+	const struct dt_property *ranges;
+	struct dt_node *uv_node;
+	uint64_t *range;
+
+	if (uv_image == NULL) {
+		prerror("UV: Preload hasn't started yet! Aborting.\n");
+		return;
+	}
+
+	if (wait_for_resource_loaded(RESOURCE_ID_UV_IMAGE,
+				     RESOURCE_SUBID_NONE) != OPAL_SUCCESS) {
+		prerror("UV: Ultravisor image load failed\n");
+		return;
+	}
+
+	uv_node = dt_find_by_name(dt_root, "ibm,uv-firmware");
+	if (!uv_node) {
+		prerror("UV: Cannot find ibm,uv-firmware node\n");
+		return;
+	}
+
+	ranges = dt_find_property(uv_node, "secure-memory-ranges");
+	if (!ranges) {
+		prerror("UV: Cannot find secure-memory-ranges");
+		return;
+	}
+
+	uv_xz = malloc(sizeof(struct xz_decompress));
+	if (!uv_xz) {
+		prerror("UV: Cannot allocate memory for decompression of UV\n");
+		return;
+	}
+
+	/* the load area is the first secure memory range */
+	range = (void *)ranges->prop;
+	uv_xz->dst = (void *)dt_get_number(range, 2);
+	uv_xz->dst_size = dt_get_number(range + 1, 2);
+	uv_xz->src = uv_image;
+	uv_xz->src_size = uv_image_size;
+
+	/* TODO security and integrity checks? */
+	xz_start_decompress(uv_xz);
+	if ((uv_xz->status != OPAL_PARTIAL) && (uv_xz->status != OPAL_SUCCESS))
+		prerror("UV: XZ decompression failed status 0x%x\n", uv_xz->status);
+}
diff --git a/include/platform.h b/include/platform.h
index 0b043856..259550d4 100644
--- a/include/platform.h
+++ b/include/platform.h
@@ -17,6 +17,7 @@  enum resource_id {
 	RESOURCE_ID_INITRAMFS,
 	RESOURCE_ID_CAPP,
 	RESOURCE_ID_IMA_CATALOG,
+	RESOURCE_ID_UV_IMAGE,
 	RESOURCE_ID_VERSION,
 	RESOURCE_ID_KERNEL_FW,
 };
diff --git a/include/processor.h b/include/processor.h
index 352fd1ec..0a552998 100644
--- a/include/processor.h
+++ b/include/processor.h
@@ -11,6 +11,7 @@ 
 #define MSR_HV		PPC_BIT(3)	/* Hypervisor mode */
 #define MSR_VEC		PPC_BIT(38)	/* VMX enable */
 #define MSR_VSX		PPC_BIT(40)	/* VSX enable */
+#define MSR_S		PPC_BIT(41)	/* Secure Mode enable */
 #define MSR_EE		PPC_BIT(48)	/* External Int. Enable */
 #define MSR_PR		PPC_BIT(49)       	/* Problem state */
 #define MSR_FP		PPC_BIT(50)	/* Floating Point Enable */
@@ -368,6 +369,17 @@  static inline void st_le32(uint32_t *addr, uint32_t val)
 	asm volatile("stwbrx %0,0,%1" : : "r"(val), "r"(addr), "m"(*addr));
 }
 
+/*
+ * MSR bit check
+ */
+static inline bool is_msr_bit_set(uint64_t bit)
+{
+	if (mfmsr() & bit)
+		return true;
+
+	return false;
+}
+
 #endif /* __TEST__ */
 
 #endif /* __ASSEMBLY__ */
diff --git a/include/ultravisor-api.h b/include/ultravisor-api.h
new file mode 100644
index 00000000..8a99b7c4
--- /dev/null
+++ b/include/ultravisor-api.h
@@ -0,0 +1,18 @@ 
+// SPDX-License-Identifier: Apache-2.0
+/* Copyright 2018-2019 IBM Corp. */
+
+#ifndef __ULTRAVISOR_API_H
+#define __ULTRAVISOR_API_H
+
+struct uv_opal {
+	__be32 magic;		/**< 'OPUV' 0x4F505556 OPUV_MAGIC */
+	__be32 version;		/**< uv_opal struct version */
+	__be32 uv_ret_code;	/**< 0 - Success, <0> : error. */
+	__be32 uv_api_ver;	/**< Current uv api version. */
+	__be64 uv_base_addr;	/**< Base address of UV in secure memory. */
+	__be64 sys_fdt;		/**< System FDT. */
+	__be64 uv_fdt;		/**< UV FDT in secure memory. */
+	__be64 uv_mem;		/**< struct memcons */
+};
+
+#endif /* __ULTRAVISOR_API_H */
diff --git a/include/ultravisor.h b/include/ultravisor.h
new file mode 100644
index 00000000..b49121ce
--- /dev/null
+++ b/include/ultravisor.h
@@ -0,0 +1,27 @@ 
+// SPDX-License-Identifier: Apache-2.0
+/* Copyright 2018-2019 IBM Corp. */
+
+#ifndef __ULTRAVISOR_H
+#define __ULTRAVISOR_H
+
+#include <ultravisor-api.h>
+
+/* Bit 15 of an address should be set for it to be used as a secure memory area
+ * for the secure virtual machines */
+#define UV_SECURE_MEM_BIT              (PPC_BIT(15))
+#define MAX_COMPRESSED_UV_IMAGE_SIZE 0x40000 /* 256 Kilobytes */
+#define UV_ACCESS_BIT		0x1ULL << 48
+/* Address at which the Ultravisor is loaded for BML and Mambo */
+#define UV_LOAD_BASE		0xC0000000
+#define UV_LOAD_MAX_SIZE	0x200000
+#define UV_FDT_MAX_SIZE		0x100000
+#define UV_HB_RESERVE_SIZE	0x4000000;
+
+extern int start_uv(uint64_t entry, struct uv_opal *uv_opal);
+extern bool uv_add_mem_range(__be64 start, __be64 end);
+extern void uv_preload_image(void);
+extern void uv_decompress_image(void);
+extern void init_uv(void);
+extern int start_ultravisor(void);
+
+#endif /* __ULTRAVISOR_H */