Message ID | b28c7514d2e7280c5c84f1ca818677011c52b2b0.1513579137.git-series.andrew.donnellan@au1.ibm.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Initial OpenCAPI 3.0 Support for P9 | expand |
FYI, I didn't re-check all the register and bit settings for the various procedures. I had checked some in the past when debugging, and I'm willing to accept that getting it working is good enough. A few nitpicks below. Le 18/12/2017 à 08:07, Andrew Donnellan a écrit : > Scan the device tree for NPUs with OpenCAPI links and configure the NPU per > the initialisation sequence in the NPU OpenCAPI workbook. > > Training of individual links and setup of per-AFU/link configuration will > be in a later patch. > > Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > --- > core/init.c | 2 +- > hw/Makefile.inc | 3 +- > hw/npu2-opencapi.c | 794 +++++++++++++++++++++++++++++++++++++++++++++- > hw/npu2.c | 33 +- > include/npu2-regs.h | 88 +++++- > include/npu2.h | 2 +- > include/skiboot.h | 1 +- > 7 files changed, 910 insertions(+), 13 deletions(-) > create mode 100644 hw/npu2-opencapi.c > > diff --git a/core/init.c b/core/init.c > index 9eb8e25..1522ebe 100644 > --- a/core/init.c > +++ b/core/init.c > @@ -1048,6 +1048,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) > /* Probe NPUs */ > probe_npu(); > probe_npu2(); > + /* TODO: Eventually, we'll do NVLink and OpenCAPI together */ > + probe_npu2_opencapi(); > > /* Initialize PCI */ > pci_init_slots(); > diff --git a/hw/Makefile.inc b/hw/Makefile.inc > index 27d8a38..cfc8646 100644 > --- a/hw/Makefile.inc > +++ b/hw/Makefile.inc > @@ -7,7 +7,8 @@ HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o > HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o > HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o > HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o > -HW_OBJS += npu2-common.o phys-map.o sbe-p9.o capp.o occ-sensor.o vas.o p9-adu.o > +HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o occ-sensor.o > +HW_OBJS += vas.o p9-adu.o > HW=hw/built-in.o > > # FIXME hack this for now > diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c > new file mode 100644 > index 0000000..55d5cd9 > --- /dev/null > +++ b/hw/npu2-opencapi.c > @@ -0,0 +1,794 @@ > +/* Copyright 2013-2017 IBM Corp. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or > + * implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +/* > + * Support for OpenCAPI on POWER9 NPUs > + * > + * This file provides support for OpenCAPI as implemented on POWER9. > + * > + * At present, we initialise the NPU separately from the NVLink code in npu2.c. > + * As such, we don't currently support mixed NVLink and OpenCAPI configurations > + * on the same NPU for machines such as Witherspoon. > + * > + * Procedure references in this file are to the POWER9 OpenCAPI NPU Workbook > + * (IBM internal document). > + * > + * TODO: > + * - Support for mixed NVLink and OpenCAPI on the same NPU > + * - Support for link ganging (one AFU using multiple links) > + * - Link reset and error handling > + * - Presence detection > + * - Consume HDAT NPU information > + * - LPC Memory support > + */ > + > +#include <skiboot.h> > +#include <xscom.h> > +#include <io.h> > +#include <timebase.h> > +#include <pci.h> > +#include <pci-cfg.h> > +#include <pci-slot.h> > +#include <interrupts.h> > +#include <opal.h> > +#include <opal-api.h> > +#include <npu2.h> > +#include <npu2-regs.h> > +#include <phys-map.h> > +#include <xive.h> > +#include <p9-adu.h> > +#include <i2c.h> > + > +#define NPU_IRQ_LEVELS 35 > + > +static inline uint64_t index_to_stack(uint64_t index) { > + switch (index) { > + case 2: > + case 3: > + return NPU2_STACK_STCK_1; > + break; > + case 4: > + case 5: > + return NPU2_STACK_STCK_2; > + break; > + default: > + assert(false); > + } > +} > + > +static inline uint64_t index_to_stacku(uint64_t index) { > + switch (index) { > + case 2: > + case 3: > + return NPU2_STACK_STCK_1U; > + break; > + case 4: > + case 5: > + return NPU2_STACK_STCK_2U; > + break; > + default: > + assert(false); > + } > +} > + > +static inline uint64_t index_to_block(uint64_t index) { > + switch (index) { > + case 2: > + case 4: > + return NPU2_BLOCK_OTL0; > + break; > + case 3: > + case 5: > + return NPU2_BLOCK_OTL1; > + break; > + default: > + assert(false); > + } > +} > + > +/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */ > + > +static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base, > + int index, enum npu2_dev_type type) > +{ > + /* Step 1 - Set Transport MUX controls to select correct OTL or NTL */ > + uint64_t reg; > + uint64_t field; > + > + /* TODO: Rework this to select for NVLink too */ > + assert(type == NPU2_DEV_TYPE_OPENCAPI); > + > + prlog(PR_DEBUG, "OCAPI: %s: Setting transport mux controls\n", __func__); > + > + /* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */ > + reg = npu2_scom_read(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0, > + NPU2_MISC_DA_LEN_8B); > + switch (index) { > + case 0: > + case 1: > + /* not valid for OpenCAPI */ > + assert(false); > + break; > + case 2: /* OTL1.0 */ > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg); > + field &= ~0b100; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg, > + field); > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg); > + field |= 0b10; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg, > + field); > + break; > + case 3: /* OTL1.1 */ > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg); > + field &= ~0b010; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg, > + field); > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg); > + field |= 0b01; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg, > + field); > + break; > + case 4: /* OTL2.0 */ > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg); > + field |= 0b10; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg, > + field); > + break; > + case 5: /* OTL2.1 */ > + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg); > + field |= 0b01; > + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg, > + field); > + break; > + default: > + assert(false); > + } > + npu2_scom_write(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0, > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* > + * PowerBus Optical Miscellaneous Config Register - select > + * OpenCAPI for b4/5 and A-Link for b3 > + */ > + xscom_read(gcid, PU_IOE_PB_MISC_CFG, ®); > + switch (index) { > + case 0: > + case 1: > + case 2: > + case 3: > + break; > + case 4: > + reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB, reg, 1); > + break; > + case 5: > + reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB, reg, 1); > + break; > + } > + xscom_write(gcid, PU_IOE_PB_MISC_CFG, reg); > +} > + > +static void enable_odl_phy_mux(uint32_t gcid, int index) > +{ > + uint64_t reg; > + uint64_t phy_config_scom; > + prlog(PR_DEBUG, "OCAPI: %s: Enabling ODL to PHY MUXes\n", __func__); > + /* Step 2 - Enable MUXes for ODL to PHY connection */ > + switch (index) { > + case 2: > + case 3: > + phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG; > + break; > + case 4: > + case 5: > + phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG; > + break; > + default: > + assert(false); > + } > + > + /* PowerBus OLL PHY Training Config Register */ > + xscom_read(gcid, phy_config_scom, ®); > + > + /* Enable ODLs to use shared PHYs */ > + reg |= OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED; > + reg |= OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED; > + > + /* > + * Swap ODL1 to use brick 2 lanes instead of brick 1 lanes if using a > + * 22-pin cable for OpenCAPI connection. > + */ > + if (platform.ocapi->odl_phy_swap) > + reg |= OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP; > + else > + reg &= ~OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP; > + > + /* Disable A-Link link layers */ > + reg &= ~OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED; > + reg &= ~OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED; > + > + /* Disable NV-Link link layers */ > + reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED; > + reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED; > + reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED; > + xscom_write(gcid, phy_config_scom, reg); > +} > + > +static void disable_alink_fp(uint32_t gcid) > +{ > + uint64_t reg = 0; > + > + prlog(PR_DEBUG, "OCAPI: %s: Disabling A-Link framer/parsers\n", __func__); > + /* Step 3 - Disable A-Link framers/parsers */ > + /* TODO: Confirm if needed on OPAL system */ > + > + reg |= PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE; > + reg |= PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE; > + reg |= PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE; > + reg |= PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE; > + xscom_write(gcid, PU_IOE_PB_FP01_CFG, reg); > + xscom_write(gcid, PU_IOE_PB_FP23_CFG, reg); > + xscom_write(gcid, PU_IOE_PB_FP45_CFG, reg); > + xscom_write(gcid, PU_IOE_PB_FP67_CFG, reg); > +} > + > +static void set_pb_hp_opencapi(uint32_t gcid, int index) > +{ > + /* Step 4 - Set PowerBus HotPlug Mode Registers */ > + uint64_t reg; > + > + prlog(PR_DEBUG, "OCAPI: %s: Setting PowerBus Hotplug Mode registers\n", __func__); > + > + xscom_read(gcid, PB_WEST_HP_MODE_CURR, ®); > + switch (index) { > + case 2: > + case 3: > + /* Configure OPT0 as an OpenCAPI link */ > + reg = SETFIELD(PPC_BITMASK(32, 33), reg, 0b01); > + break; > + case 4: > + case 5: > + /* Configure OPT3 as an OpenCAPI link */ > + reg = SETFIELD(PPC_BITMASK(38, 39), reg, 0b01); > + break; > + default: > + assert(false); > + } > + > + p9_adu_set_pb_hp_mode(gcid, reg); > +} > + > +static void enable_xsl_clocks(uint32_t gcid, uint32_t scom_base, int index) > +{ > + /* Step 5 - Enable Clocks in XSL */ > + > + prlog(PR_DEBUG, "OCAPI: %s: Enable clocks in XSL\n", __func__); > + > + npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(index_to_stack(index), > + NPU2_BLOCK_XSL, > + NPU2_XSL_WRAP_CFG), > + NPU2_MISC_DA_LEN_8B, NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE); > +} > + > +#define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */ > + > +static int set_fence_control(uint32_t gcid, uint32_t scom_base, > + int index, uint8_t status) > +{ > + int stack, block; > + uint64_t reg, status_field; > + uint8_t status_val; > + uint64_t fence_control; > + uint64_t timeout = mftb() + msecs_to_tb(CQ_CTL_STATUS_TIMEOUT); > + > + stack = index_to_stack(index); > + block = index_to_block(index); > + > + fence_control = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, > + block == NPU2_BLOCK_OTL0 ? > + NPU2_CQ_CTL_FENCE_CONTROL_0 : > + NPU2_CQ_CTL_FENCE_CONTROL_1); > + > + reg = SETFIELD(NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE, 0ull, status); > + npu2_scom_write(gcid, scom_base, fence_control, > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* Wait for fence status to update */ > + if (index_to_block(index) == NPU2_BLOCK_OTL0) > + status_field = NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED; > + else > + status_field = NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED; > + > + do { > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(index_to_stack(index), > + NPU2_BLOCK_CTL, > + NPU2_CQ_CTL_STATUS), > + NPU2_MISC_DA_LEN_8B); > + status_val = GETFIELD(status_field, reg); > + if (status_val == status) > + return OPAL_SUCCESS; > + time_wait_ms(1); > + } while (tb_compare(mftb(), timeout) == TB_ABEFOREB); > + > + /** > + * @fwts-label OCAPIFenceStatusTimeout > + * @fwts-advice The NPU fence status did not update as expected. This > + * could be the result of a firmware or hardware bug. OpenCAPI > + * functionality could be broken. > + */ > + prlog(PR_ERR, > + "OCAPI: Fence status for brick %d stuck: expected 0x%x, got 0x%x\n", > + index, status, status_val); > + return OPAL_HARDWARE; > +} > + > +static void set_npcq_config(uint32_t gcid, uint32_t scom_base, int index) > +{ > + uint64_t reg, stack, block; > + > + prlog(PR_DEBUG, "OCAPI: %s: Set NPCQ Config\n", __func__); > + /* Step 6 - Set NPCQ configuration */ > + /* CQ_CTL Misc Config Register #0 */ > + stack = index_to_stack(index); > + block = index_to_block(index); > + > + /* Enable OTL */ > + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block), > + NPU2_MISC_DA_LEN_8B, NPU2_OTL_CONFIG0_EN); > + set_fence_control(gcid, scom_base, index, 0b01); > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, > + NPU2_CQ_CTL_MISC_CFG), > + NPU2_MISC_DA_LEN_8B); > + /* Set OCAPI mode */ > + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE; > + if (block == NPU2_BLOCK_OTL0) > + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE; > + else > + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, > + NPU2_CQ_CTL_MISC_CFG), > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* NPU Fenced */ > + set_fence_control(gcid, scom_base, index, 0b11); > + > + /* NPU Half Fenced */ > + set_fence_control(gcid, scom_base, index, 0b10); > + > + /* CQ_DAT Misc Config Register #1 */ > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT, > + NPU2_CQ_DAT_MISC_CFG), > + NPU2_MISC_DA_LEN_8B); > + /* Set OCAPI mode for bricks 2-5 */ > + reg |= NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT, > + NPU2_CQ_DAT_MISC_CFG), > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* CQ_SM Misc Config Register #0 */ > + for (uint64_t block = NPU2_BLOCK_SM_0; > + block <= NPU2_BLOCK_SM_3; block++) { > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_CQ_SM_MISC_CFG0), > + NPU2_MISC_DA_LEN_8B); > + /* Set OCAPI mode for bricks 2-5 */ > + reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_CQ_SM_MISC_CFG0), > + NPU2_MISC_DA_LEN_8B, reg); > + } > +} > + > +static void enable_xsl_xts_interfaces(uint32_t gcid, uint32_t scom_base, int index) > +{ > + uint64_t reg; > + > + prlog(PR_DEBUG, "OCAPI: %s: Enable XSL-XTS Interfaces\n", __func__); > + /* Step 7 - Enable XSL-XTS interfaces */ > + /* XTS Config Register - Enable XSL-XTS interface */ > + reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B); > + reg |= NPU2_XTS_CFG_OPENCAPI; > + npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B, reg); > + > + /* XTS Config2 Register - Enable XSL1/2 */ > + reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B); > + switch (index_to_stack(index)) { > + case NPU2_STACK_STCK_1: > + reg |= NPU2_XTS_CFG2_XSL1_ENA; > + break; > + case NPU2_STACK_STCK_2: > + reg |= NPU2_XTS_CFG2_XSL2_ENA; > + break; > + } > + npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B, reg); > +} > + > +static void enable_sm_allocation(uint32_t gcid, uint32_t scom_base, int index) > +{ > + uint64_t reg; > + int stack = index_to_stack(index); > + > + prlog(PR_DEBUG, "OCAPI: %s: Enable State Machine Allocation\n", __func__); > + /* Step 8 - Enable state-machine allocation */ > + /* Low-Water Marks Registers - Enable state machine allocation */ > + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { The 'block' declaration is following an usual coding style. > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_LOW_WATER_MARKS), > + NPU2_MISC_DA_LEN_8B); > + reg |= NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_LOW_WATER_MARKS), > + NPU2_MISC_DA_LEN_8B, reg); > + } > +} > + > +static void enable_pb_snooping(uint32_t gcid, uint32_t scom_base, int index) > +{ > + uint64_t reg; > + int stack = index_to_stack(index); > + > + prlog(PR_DEBUG, "OCAPI: %s: Enable PowerBus snooping\n", __func__); > + /* Step 9 - Enable PowerBus snooping */ > + /* CQ_SM Misc Config Register #0 - Enable PowerBus snooping */ > + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { Same here. > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_CQ_SM_MISC_CFG0), > + NPU2_MISC_DA_LEN_8B); > + reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, block, > + NPU2_CQ_SM_MISC_CFG0), > + NPU2_MISC_DA_LEN_8B, reg); > + } > +} > + > +static void brick_config(uint32_t gcid, uint32_t scom_base, int index) > +{ > + set_transport_mux_controls(gcid, scom_base, index, NPU2_DEV_TYPE_OPENCAPI); > + enable_odl_phy_mux(gcid, index); > + disable_alink_fp(gcid); > + set_pb_hp_opencapi(gcid, index); > + enable_xsl_clocks(gcid, scom_base, index); > + set_npcq_config(gcid, scom_base, index); > + enable_xsl_xts_interfaces(gcid, scom_base, index); > + enable_sm_allocation(gcid, scom_base, index); > + enable_pb_snooping(gcid, scom_base, index); > +} > + > +/* Procedure 13.1.3.5 - TL Configuration */ > +static void tl_config(uint32_t gcid, uint32_t scom_base, uint64_t index) > +{ > + uint64_t reg; > + uint64_t stack = index_to_stack(index); > + uint64_t block = index_to_block(index); > + > + prlog(PR_DEBUG, "OCAPI: %s: TL Configuration\n", __func__); > + /* OTL Config 0 Register */ > + reg = 0; > + /* OTL Enable */ > + reg |= NPU2_OTL_CONFIG0_EN; > + /* Block PE Handle from ERAT Index */ > + reg |= NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE; > + /* OTL Brick ID */ > + reg = SETFIELD(NPU2_OTL_CONFIG0_BRICKID, reg, index - 2); > + /* ERAT Hash 0 */ > + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_0, reg, 0b011001); > + /* ERAT Hash 1 */ > + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_1, reg, 0b000111); > + /* ERAT Hash 2 */ > + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_2, reg, 0b101100); > + /* ERAT Hash 3 */ > + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_3, reg, 0b100110); > + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block), > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* OTL Config 1 Register */ > + reg = 0; > + /* > + * We leave Template 1-3 bits at 0 to force template 0 as required > + * for unknown devices. > + * > + * Template 0 Transmit Rate is set to most conservative setting which > + * will always be supported. Other Template Transmit rates are left > + * unset and will be set later by OS. > + */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_TEMP0_RATE, reg, 0b1111); > + /* Extra wait cycles TXI-TXO - varied from workbook */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_DRDY_WAIT, reg, 0b001); > + /* Minimum Frequency to Return TLX Credits to AFU */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_CRET_FREQ, reg, 0b001); > + /* Frequency to add age to Transmit Requests */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_AGE_FREQ, reg, 0b11000); > + /* Response High Priority Threshold */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RS2_HPWAIT, reg, 0b011011); > + /* 4-slot Request High Priority Threshold */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT, reg, 0b011011); > + /* 6-slot Request High Priority */ > + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT, reg, 0b011011); > + /* Stop the OCAPI Link on Uncorrectable Error > + * TODO: Confirm final value - disabled for debug */ > + > + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG1(stack, block), > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* TLX Credit Configuration Register */ > + reg = 0; > + /* VC0/VC3/DCP0/DCP1 credits to send to AFU */ > + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC0_CREDITS, reg, 0x40); > + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC3_CREDITS, reg, 0x40); > + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP0_CREDITS, reg, 0x80); > + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP1_CREDITS, reg, 0x80); > + npu2_scom_write(gcid, scom_base, NPU2_OTL_TLX_CREDITS(stack, block), > + NPU2_MISC_DA_LEN_8B, reg); > +} > + > +/* Detect Nimbus DD2.0 and DD2.01 */ > +static int get_nimbus_level(void) > +{ > + struct proc_chip *chip = next_chip(NULL); > + > + if (chip && chip->type == PROC_CHIP_P9_NIMBUS) > + return chip->ec_level & 0xff; > + return -1; > +} > + > +/* Procedure 13.1.3.6 - Address Translation Configuration */ > +static void address_translation_config(uint32_t gcid, uint32_t scom_base, > + uint64_t index) > +{ > + int chip_level; > + uint64_t reg; > + uint64_t stack = index_to_stack(index); > + > + prlog(PR_DEBUG, "OCAPI: %s: Address Translation Configuration\n", __func__); > + /* PSL_SCNTL_A0 Register */ > + /* > + * ERAT shared between multiple AFUs > + * > + * The workbook has this bit around the wrong way from the hardware. > + * > + * TODO: handle correctly with link ganging > + */ > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, > + NPU2_XSL_PSL_SCNTL_A0), > + NPU2_MISC_DA_LEN_8B); > + reg |= NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, > + NPU2_XSL_PSL_SCNTL_A0), > + NPU2_MISC_DA_LEN_8B, reg); > + > + chip_level = get_nimbus_level(); > + if (chip_level == 0x20) { > + /* > + * Errata HW408041 (section 15.1.10 of NPU workbook) > + * "RA mismatch when both tlbie and checkout response > + * are seen in same cycle" > + */ > + /* XSL_GP Register - Bloom Filter Disable */ > + reg = npu2_scom_read(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), > + NPU2_MISC_DA_LEN_8B); > + /* To update XSL_GP, we must first write a magic value to it */ > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), > + NPU2_MISC_DA_LEN_8B, 0x0523790323000000); > + reg &= ~NPU2_XSL_GP_BLOOM_FILTER_ENABLE; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), > + NPU2_MISC_DA_LEN_8B, reg); > + } > + > + if (chip_level == 0x20 || chip_level == 0x21) { > + /* > + * DD2.0/2.1 EOA Bug. Fixed in DD2.2 > + */ > + reg = 0x32F8000000000001; > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, > + NPU2_XSL_DEF), > + NPU2_MISC_DA_LEN_8B, reg); > + } > +} > + > +/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar > + * wrapper for the PHY BARs yet */ > +static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg, > + uint64_t addr, uint64_t size) > +{ > + uint64_t val; > + int block; > + switch (NPU2_REG(reg)) { > + case NPU2_PHY_BAR: > + val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21); > + val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1); > + break; > + case NPU2_NTL0_BAR: > + case NPU2_NTL1_BAR: > + val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16); > + val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16)); > + val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1); > + break; > + case NPU2_GENID_BAR: > + val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16); > + val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1); > + break; > + default: > + val = 0ul; > + } > + > + for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { > + npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg), > + NPU2_MISC_DA_LEN_8B, val); > + prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n", > + NPU2_REG_OFFSET(0, block, reg), val); > + } > +} > + > +static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base, > + uint64_t reg[]) > +{ > + uint64_t addr, size; > + > + prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__); > + phys_map_get(gcid, NPU_PHY, 0, &addr, &size); > + write_bar(gcid, scom_base, > + NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), > + addr, size); > + prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__); > + phys_map_get(gcid, NPU_PHY, 1, &addr, &size); > + write_bar(gcid, scom_base, > + NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), > + addr, size); > + > + prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__); > + phys_map_get(gcid, NPU_REGS, 0, &addr, &size); > + write_bar(gcid, scom_base, > + NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), > + addr, size); > + reg[0] = addr; > + reg[1] = size; > +} > + > +static int setup_irq(struct npu2 *p) > +{ > + uint64_t reg, mmio_addr; > + uint32_t base; > + > + base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64); Mea culpa: we should use a macro instead of "64". That's likely temporary anyway, as it should be merged down the road with nvlink (Alistair's has just sent a related patch). > + if (base == XIVE_IRQ_ERROR) { > + /** > + * @fwts-label OCAPIIRQAllocationFailed > + * @fwts-advice OpenCAPI IRQ setup failed. This is probably > + * a firmware bug. OpenCAPI functionality will be broken. > + */ > + prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n"); > + return -1; > + } > + p->irq_base = base; > + > + xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL); > + mmio_addr = (uint64_t ) xive_get_trigger_port(base); > + prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr); > + reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13; > + npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE, > + NPU2_MISC_DA_LEN_8B, reg); > + /* > + * setup page size = 64k > + * > + * OS type is set to AIX: opal also runs with 2 pages per interrupt, > + * so to cover the max offset for 35 levels of interrupt, we need > + * bits 41 to 46, which is what the AIX setting does. There's no > + * other meaning for that AIX setting. > + */ > + reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG, > + NPU2_MISC_DA_LEN_8B); > + reg |= NPU2_MISC_CFG_IPI_PS; > + reg &= ~NPU2_MISC_CFG_IPI_OS; > + npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG, > + NPU2_MISC_DA_LEN_8B, reg); > + > + /* enable translation interrupts for all bricks */ > + reg = npu2_scom_read(p->chip_id, p->xscom_base, > + NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B); > + reg |= PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3); > + npu2_scom_write(p->chip_id, p->xscom_base, > + NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B, > + reg); > + return 0; > +} > + > +static void npu2_opencapi_probe(struct dt_node *dn) > +{ > + struct dt_node *link; > + char *path; > + uint32_t gcid, index, links, scom_base; > + uint64_t reg[2]; > + uint64_t dev_index; > + struct npu2 *n; > + int rc; > + > + path = dt_get_path(dn); > + gcid = dt_get_chip_id(dn); > + index = dt_prop_get_u32(dn, "ibm,npu-index"); > + links = dt_prop_get_u32(dn, "ibm,npu-links"); > + > + /* Don't try to init when we have an NVLink link */ > + dt_for_each_compatible(dn, link, "ibm,npu-link") { > + prlog(PR_DEBUG, "OCAPI: NPU%d: NVLink link found, skipping\n", > + index); > + return; > + } > + > + prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n", > + gcid, index, links, path); > + free(path); > + > + scom_base = dt_get_address(dn, 0, NULL); > + prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base); > + > + setup_global_mmio_bar(gcid, scom_base, reg); > + > + n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev)); > + n->devices = (struct npu2_dev *)(n + 1); > + n->chip_id = gcid; > + n->xscom_base = scom_base; > + n->regs = (void *)reg[0]; > + n->dt_node = dn; > + > + dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") { > + dev_index = dt_prop_get_u32(link, "ibm,npu-link-index"); > + prlog(PR_INFO, "OCAPI: Configuring link index %lld\n", > + dev_index); > + > + /* Procedure 13.1.3.1 - Select OCAPI vs NVLink */ > + brick_config(gcid, scom_base, dev_index); > + > + /* Procedure 13.1.3.5 - Transaction Layer Configuration */ > + tl_config(gcid, scom_base, dev_index); > + > + /* Procedure 13.1.3.6 - Address Translation Configuration */ > + address_translation_config(gcid, scom_base, dev_index); > + } > + > + /* Procedure 13.1.3.10 - Interrupt Configuration */ > + rc = setup_irq(n); > + if (rc) > + goto failed; > + > + return; > +failed: > + free(n); > +} > + > +void probe_npu2_opencapi(void) > +{ > + struct dt_node *np_npu; > + > + dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu") > + npu2_opencapi_probe(np_npu); > +} > diff --git a/hw/npu2.c b/hw/npu2.c > index 12d239a..218ac86 100644 > --- a/hw/npu2.c > +++ b/hw/npu2.c > @@ -1274,7 +1274,7 @@ static void npu2_probe_phb(struct dt_node *dn) > { > struct proc_chip *proc_chip; > struct dt_node *np; > - uint32_t gcid, scom, index, phb_index, links; > + uint32_t gcid, scom, index, phb_index, links = 0; > uint64_t reg[2], mm_win[2]; > char *path; > > @@ -1288,6 +1288,26 @@ static void npu2_probe_phb(struct dt_node *dn) > return; > } > > + dt_for_each_compatible(dn, np, "ibm,npu-link") { > + links++; > + } > + > + index = dt_prop_get_u32(dn, "ibm,npu-index"); > + phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); > + > + prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n", > + gcid, index, links, path); > + free(path); > + > + /* Retrieve scom base address */ > + scom = dt_get_address(dn, 0, NULL); > + prlog(PR_INFO, " SCOM Base: %08x\n", scom); > + > + if (!links) { > + prlog(PR_INFO, " No NVLink links found\n"); > + return; > + } > + > if (!is_p9dd1()) { > /* TODO: Clean this up with register names, etc. when we get > * time. This just turns NVLink mode on in each brick and should > @@ -1324,17 +1344,6 @@ static void npu2_probe_phb(struct dt_node *dn) > xscom_write_mask(gcid, 0x5011530, PPC_BIT(0), PPC_BIT(0)); > } > > - index = dt_prop_get_u32(dn, "ibm,npu-index"); > - phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); > - links = dt_prop_get_u32(dn, "ibm,npu-links"); > - prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n", > - gcid, index, links, path); > - free(path); > - > - /* Retrieve scom base address */ > - scom = dt_get_address(dn, 0, NULL); > - prlog(PR_INFO, " SCOM Base: %08x\n", scom); > - > /* Reassign the BARs */ > assign_mmio_bars(gcid, scom, reg, mm_win); > > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > index 27956f2..b2812d5 100644 > --- a/include/npu2-regs.h > +++ b/include/npu2-regs.h > @@ -90,6 +90,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_BLOCK_DAT 0x5 > #define NPU2_BLOCK_NTL0 0x9 > #define NPU2_BLOCK_NTL1 0xb > +#define NPU2_BLOCK_OTL0 0xc > +#define NPU2_BLOCK_OTL1 0xd > + > +/* OpenCAPI blocks */ > +#define NPU2_BLOCK_XSL 0xe > > /* MISC stack blocks */ > #define NPU2_BLOCK_ATS 0 > @@ -108,6 +113,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > > /* SM block registers */ > #define NPU2_CQ_SM_MISC_CFG0 0x000 > +#define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38) > +#define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57) > #define NPU2_CQ_SM_MISC_CFG1 0x008 > #define NPU2_PB_EPSILON 0x010 > #define NPU2_TIMER_CFG 0x018 > @@ -138,6 +145,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_GENID_BAR_ADDR PPC_BITMASK(3,35) > #define NPU2_GENID_BAR_POISON PPC_BIT(39) > #define NPU2_LOW_WATER_MARKS 0x040 > +#define NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC PPC_BIT(51) > #define NPU2_HIGH_WATER_MARKS 0x048 > #define NPU2_RELAXED_ORDERING_CFG0 0x050 > #define NPU2_RELAXED_ORDERING_CFG1 0x058 > @@ -176,6 +184,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > > /* CTL block registers */ > #define NPU2_CQ_CTL_MISC_CFG 0x000 > +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52) > +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55) > +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56) > #define NPU2_CQ_FUTURE_CFG1 0x008 > #define NPU2_CQ_FUTURE_CFG2 0x010 > #define NPU2_CQ_FUTURE_CFG3 0x018 > @@ -198,6 +209,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_CQ_LPC_THRESHOLD_CFG 0x080 > #define NPU2_CQ_INHIBIT_CFG 0x088 > #define NPU2_CQ_CTL_STATUS 0x090 > +#define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49) > +#define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51) > #define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0 > #define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8 > #define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0 > @@ -206,9 +219,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8 > #define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0 > #define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8 > +#define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140 > +#define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148 > +#define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1) > > /* DAT block registers */ > #define NPU2_CQ_DAT_MISC_CFG 0x008 > +#define NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(40) > #define NPU2_CQ_DAT_ECC_CFG 0x010 > #define NPU2_CQ_DAT_SCRATCH0 0x018 > #define NPU2_CQ_DAT_ECC_STATUS 0x020 > @@ -289,6 +306,43 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_NTL_DL_CONFIG(ndev) NPU2_DL_REG_OFFSET(ndev, 0xFFF8) > #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C) > > +/* OpenCAPI - XSL registers */ > +#define NPU2_XSL_PSL_SCNTL_A0 0x010 > +#define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0) > +#define NPU2_XSL_DEF 0x040 > +#define NPU2_XSL_GP 0x058 > +#define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16) > +#define NPU2_XSL_WRAP_CFG 0x0C0 > +#define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0) > + > +/* OpenCAPI - OTL registers */ > +#define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) > +#define NPU2_OTL_CONFIG0_EN PPC_BIT(0) > +#define NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE PPC_BIT(1) > +#define NPU2_OTL_CONFIG0_BRICKID PPC_BITMASK(2, 3) > +#define NPU2_OTL_CONFIG0_PE_MASK PPC_BITMASK(4, 7) > +#define NPU2_OTL_CONFIG0_ERAT_HASH_0 PPC_BITMASK(8, 13) > +#define NPU2_OTL_CONFIG0_ERAT_HASH_1 PPC_BITMASK(14, 19) > +#define NPU2_OTL_CONFIG0_ERAT_HASH_2 PPC_BITMASK(20, 25) > +#define NPU2_OTL_CONFIG0_ERAT_HASH_3 PPC_BITMASK(26, 31) > +#define NPU2_OTL_CONFIG0_BLOCK_TID_OVERRIDE PPC_BIT(50) > +#define NPU2_OTL_TLX_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x050) > +#define NPU2_OTL_TLX_CREDITS_VC0_CREDITS PPC_BITMASK(0, 7) > +#define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31) > +#define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39) > +#define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63) > +#define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058) > +#define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7) > +#define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11) > +#define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34) > +#define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39) > +#define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45) > +#define NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT PPC_BITMASK(46, 51) > +#define NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT PPC_BITMASK(52, 57) > +#define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58) > +#define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59) > +#define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60) > + > /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above > * there is only a single instance of each of these in the NPU so we > * define them as absolute offsets. */ > @@ -296,6 +350,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_MISC_RELAXTED_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x008) > #define NPU2_MISC_LCO_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x010) > #define NPU2_MISC_OPTICAL_IO_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x018) > +#define NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2 PPC_BITMASK(0, 2) > +#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1 PPC_BITMASK(3, 4) > +#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5 PPC_BITMASK(5, 6) > #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020) > #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028) > #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030) > @@ -320,6 +377,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_MISC_ERR_CTL_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x088) > #define NPU2_MISC_RING_ACK_ERR NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x090) > #define NPU2_MISC_IRQ_BASE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x098) > +#define NPU2_MISC_IRQ_BASE_MASK PPC_BITMASK(13, 51) > #define NPU2_MISC_ERR_BRICK_GROUP NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A0) > #define NPU2_MISC_FREEZE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A8) > #define NPU2_MISC_FENCE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0B0) > @@ -377,6 +435,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_MISC_IRQ_LOG13 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368) > #define NPU2_MISC_IRQ_LOG14 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370) > #define NPU2_MISC_IRQ_LOG15 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378) > +#define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408) > + > > /* ATS block registers */ > #define NPU2_ATS_PMU_CTL NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_ATS, 0x000) > @@ -421,8 +481,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_XTS_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x020) > #define NPU2_XTS_CFG_MMIOSD PPC_BIT(1) > #define NPU2_XTS_CFG_TRY_ATR_RO PPC_BIT(6) > +#define NPU2_XTS_CFG_OPENCAPI PPC_BIT(15) > #define NPU2_XTS_CFG2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x028) > #define NPU2_XTS_CFG2_NO_FLUSH_ENA PPC_BIT(49) > +#define NPU2_XTS_CFG2_XSL1_ENA PPC_BIT(54) > +#define NPU2_XTS_CFG2_XSL2_ENA PPC_BIT(55) > #define NPU2_XTS_DBG_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x030) > #define NPU2_XTS_DBG_CFG1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x038) > #define NPU2_XTS_PMU_CNT NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x040) > @@ -478,4 +541,29 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_DD1_MISC_SCOM_IND_SCOM_DATA 0x38f > #define NPU2_MISC_SCOM_IND_SCOM_DATA 0x68f > > + > +/* OpenCAPI - PowerBus and OBus configuration SCOM addresses */ > +#define PU_IOE_PB_MISC_CFG 0x5013823 > +#define PU_IOE_PB_MISC_CFG_SEL_03_NPU_NOT_PB PPC_BIT(13) > +#define PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB PPC_BIT(14) > +#define PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB PPC_BIT(15) > +#define OBUS_LL0_IOOL_PHY_CONFIG 0x901080C > +#define OBUS_LL3_IOOL_PHY_CONFIG 0xC01080C > +#define OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED PPC_BIT(52) > +#define OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED PPC_BIT(53) > +#define OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP PPC_BIT(54) > +#define OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED PPC_BIT(58) > +#define OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED PPC_BIT(59) > +#define OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED PPC_BIT(61) > +#define OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED PPC_BIT(62) > +#define OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED PPC_BIT(63) > +#define PU_IOE_PB_FP01_CFG 0x501380A > +#define PU_IOE_PB_FP23_CFG 0x501380B > +#define PU_IOE_PB_FP45_CFG 0x501380C > +#define PU_IOE_PB_FP67_CFG 0x501380D > +#define PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE PPC_BIT(20) > +#define PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE PPC_BIT(25) > +#define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52) > +#define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57) > + > #endif /* __NPU2_REGS_H */ > diff --git a/include/npu2.h b/include/npu2.h > index e19fab6..b28f91b 100644 > --- a/include/npu2.h > +++ b/include/npu2.h > @@ -134,6 +134,7 @@ struct npu2_dev { > > struct npu2 { > uint32_t index; > + struct dt_node *dt_node; > uint32_t flags; > uint32_t chip_id; > uint64_t xscom_base; > @@ -142,6 +143,7 @@ struct npu2 { > uint64_t mm_base; > uint64_t mm_size; > uint32_t base_lsi; > + uint32_t irq_base; > uint32_t total_devices; > struct npu2_dev *devices; > enum phys_map_type gpu_map_type; > diff --git a/include/skiboot.h b/include/skiboot.h > index db91325..6b14c82 100644 > --- a/include/skiboot.h > +++ b/include/skiboot.h > @@ -230,6 +230,7 @@ extern int preload_capp_ucode(void); > extern void preload_io_vpd(void); > extern void probe_npu(void); > extern void probe_npu2(void); > +extern void probe_npu2_opencapi(void); > extern void uart_init(void); > extern void mbox_init(void); > extern void early_uart_init(void); >
On 12/01/18 00:31, Frederic Barrat wrote: >> +static void enable_sm_allocation(uint32_t gcid, uint32_t scom_base, >> int index) >> +{ >> + uint64_t reg; >> + int stack = index_to_stack(index); >> + >> + prlog(PR_DEBUG, "OCAPI: %s: Enable State Machine Allocation\n", >> __func__); >> + /* Step 8 - Enable state-machine allocation */ >> + /* Low-Water Marks Registers - Enable state machine allocation */ >> + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; >> block++) { > > The 'block' declaration is following an usual coding style. ACK > >> + reg = npu2_scom_read(gcid, scom_base, >> + NPU2_REG_OFFSET(stack, block, >> + NPU2_LOW_WATER_MARKS), >> + NPU2_MISC_DA_LEN_8B); >> + reg |= NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC; >> + npu2_scom_write(gcid, scom_base, >> + NPU2_REG_OFFSET(stack, block, >> + NPU2_LOW_WATER_MARKS), >> + NPU2_MISC_DA_LEN_8B, reg); >> + } >> +} >> + >> +static void enable_pb_snooping(uint32_t gcid, uint32_t scom_base, int >> index) >> +{ >> + uint64_t reg; >> + int stack = index_to_stack(index); >> + >> + prlog(PR_DEBUG, "OCAPI: %s: Enable PowerBus snooping\n", __func__); >> + /* Step 9 - Enable PowerBus snooping */ >> + /* CQ_SM Misc Config Register #0 - Enable PowerBus snooping */ >> + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; >> block++) { > > Same here. ACK > >> + reg = npu2_scom_read(gcid, scom_base, >> + NPU2_REG_OFFSET(stack, block, >> + NPU2_CQ_SM_MISC_CFG0), >> + NPU2_MISC_DA_LEN_8B); >> + reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS; >> + npu2_scom_write(gcid, scom_base, >> + NPU2_REG_OFFSET(stack, block, >> + NPU2_CQ_SM_MISC_CFG0), >> + NPU2_MISC_DA_LEN_8B, reg); >> + } >> +} >> + ... >> +static int setup_irq(struct npu2 *p) >> +{ >> + uint64_t reg, mmio_addr; >> + uint32_t base; >> + >> + base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64); > > Mea culpa: we should use a macro instead of "64". That's likely > temporary anyway, as it should be merged down the road with nvlink > (Alistair's has just sent a related patch). > Will take a look
diff --git a/core/init.c b/core/init.c index 9eb8e25..1522ebe 100644 --- a/core/init.c +++ b/core/init.c @@ -1048,6 +1048,8 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) /* Probe NPUs */ probe_npu(); probe_npu2(); + /* TODO: Eventually, we'll do NVLink and OpenCAPI together */ + probe_npu2_opencapi(); /* Initialize PCI */ pci_init_slots(); diff --git a/hw/Makefile.inc b/hw/Makefile.inc index 27d8a38..cfc8646 100644 --- a/hw/Makefile.inc +++ b/hw/Makefile.inc @@ -7,7 +7,8 @@ HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o phb4.o HW_OBJS += fake-nvram.o lpc-mbox.o npu2.o npu2-hw-procedures.o -HW_OBJS += npu2-common.o phys-map.o sbe-p9.o capp.o occ-sensor.o vas.o p9-adu.o +HW_OBJS += npu2-common.o npu2-opencapi.o phys-map.o sbe-p9.o capp.o occ-sensor.o +HW_OBJS += vas.o p9-adu.o HW=hw/built-in.o # FIXME hack this for now diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c new file mode 100644 index 0000000..55d5cd9 --- /dev/null +++ b/hw/npu2-opencapi.c @@ -0,0 +1,794 @@ +/* Copyright 2013-2017 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Support for OpenCAPI on POWER9 NPUs + * + * This file provides support for OpenCAPI as implemented on POWER9. + * + * At present, we initialise the NPU separately from the NVLink code in npu2.c. + * As such, we don't currently support mixed NVLink and OpenCAPI configurations + * on the same NPU for machines such as Witherspoon. + * + * Procedure references in this file are to the POWER9 OpenCAPI NPU Workbook + * (IBM internal document). + * + * TODO: + * - Support for mixed NVLink and OpenCAPI on the same NPU + * - Support for link ganging (one AFU using multiple links) + * - Link reset and error handling + * - Presence detection + * - Consume HDAT NPU information + * - LPC Memory support + */ + +#include <skiboot.h> +#include <xscom.h> +#include <io.h> +#include <timebase.h> +#include <pci.h> +#include <pci-cfg.h> +#include <pci-slot.h> +#include <interrupts.h> +#include <opal.h> +#include <opal-api.h> +#include <npu2.h> +#include <npu2-regs.h> +#include <phys-map.h> +#include <xive.h> +#include <p9-adu.h> +#include <i2c.h> + +#define NPU_IRQ_LEVELS 35 + +static inline uint64_t index_to_stack(uint64_t index) { + switch (index) { + case 2: + case 3: + return NPU2_STACK_STCK_1; + break; + case 4: + case 5: + return NPU2_STACK_STCK_2; + break; + default: + assert(false); + } +} + +static inline uint64_t index_to_stacku(uint64_t index) { + switch (index) { + case 2: + case 3: + return NPU2_STACK_STCK_1U; + break; + case 4: + case 5: + return NPU2_STACK_STCK_2U; + break; + default: + assert(false); + } +} + +static inline uint64_t index_to_block(uint64_t index) { + switch (index) { + case 2: + case 4: + return NPU2_BLOCK_OTL0; + break; + case 3: + case 5: + return NPU2_BLOCK_OTL1; + break; + default: + assert(false); + } +} + +/* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */ + +static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base, + int index, enum npu2_dev_type type) +{ + /* Step 1 - Set Transport MUX controls to select correct OTL or NTL */ + uint64_t reg; + uint64_t field; + + /* TODO: Rework this to select for NVLink too */ + assert(type == NPU2_DEV_TYPE_OPENCAPI); + + prlog(PR_DEBUG, "OCAPI: %s: Setting transport mux controls\n", __func__); + + /* Optical IO Transport Mux Config for Bricks 0-2 and 4-5 */ + reg = npu2_scom_read(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0, + NPU2_MISC_DA_LEN_8B); + switch (index) { + case 0: + case 1: + /* not valid for OpenCAPI */ + assert(false); + break; + case 2: /* OTL1.0 */ + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg); + field &= ~0b100; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg, + field); + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg); + field |= 0b10; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg, + field); + break; + case 3: /* OTL1.1 */ + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg); + field &= ~0b010; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2, reg, + field); + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg); + field |= 0b01; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1, reg, + field); + break; + case 4: /* OTL2.0 */ + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg); + field |= 0b10; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg, + field); + break; + case 5: /* OTL2.1 */ + field = GETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg); + field |= 0b01; + reg = SETFIELD(NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5, reg, + field); + break; + default: + assert(false); + } + npu2_scom_write(gcid, scom_base, NPU2_MISC_OPTICAL_IO_CFG0, + NPU2_MISC_DA_LEN_8B, reg); + + /* + * PowerBus Optical Miscellaneous Config Register - select + * OpenCAPI for b4/5 and A-Link for b3 + */ + xscom_read(gcid, PU_IOE_PB_MISC_CFG, ®); + switch (index) { + case 0: + case 1: + case 2: + case 3: + break; + case 4: + reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB, reg, 1); + break; + case 5: + reg = SETFIELD(PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB, reg, 1); + break; + } + xscom_write(gcid, PU_IOE_PB_MISC_CFG, reg); +} + +static void enable_odl_phy_mux(uint32_t gcid, int index) +{ + uint64_t reg; + uint64_t phy_config_scom; + prlog(PR_DEBUG, "OCAPI: %s: Enabling ODL to PHY MUXes\n", __func__); + /* Step 2 - Enable MUXes for ODL to PHY connection */ + switch (index) { + case 2: + case 3: + phy_config_scom = OBUS_LL0_IOOL_PHY_CONFIG; + break; + case 4: + case 5: + phy_config_scom = OBUS_LL3_IOOL_PHY_CONFIG; + break; + default: + assert(false); + } + + /* PowerBus OLL PHY Training Config Register */ + xscom_read(gcid, phy_config_scom, ®); + + /* Enable ODLs to use shared PHYs */ + reg |= OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED; + reg |= OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED; + + /* + * Swap ODL1 to use brick 2 lanes instead of brick 1 lanes if using a + * 22-pin cable for OpenCAPI connection. + */ + if (platform.ocapi->odl_phy_swap) + reg |= OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP; + else + reg &= ~OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP; + + /* Disable A-Link link layers */ + reg &= ~OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED; + reg &= ~OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED; + + /* Disable NV-Link link layers */ + reg &= ~OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED; + reg &= ~OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED; + reg &= ~OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED; + xscom_write(gcid, phy_config_scom, reg); +} + +static void disable_alink_fp(uint32_t gcid) +{ + uint64_t reg = 0; + + prlog(PR_DEBUG, "OCAPI: %s: Disabling A-Link framer/parsers\n", __func__); + /* Step 3 - Disable A-Link framers/parsers */ + /* TODO: Confirm if needed on OPAL system */ + + reg |= PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE; + reg |= PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE; + reg |= PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE; + reg |= PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE; + xscom_write(gcid, PU_IOE_PB_FP01_CFG, reg); + xscom_write(gcid, PU_IOE_PB_FP23_CFG, reg); + xscom_write(gcid, PU_IOE_PB_FP45_CFG, reg); + xscom_write(gcid, PU_IOE_PB_FP67_CFG, reg); +} + +static void set_pb_hp_opencapi(uint32_t gcid, int index) +{ + /* Step 4 - Set PowerBus HotPlug Mode Registers */ + uint64_t reg; + + prlog(PR_DEBUG, "OCAPI: %s: Setting PowerBus Hotplug Mode registers\n", __func__); + + xscom_read(gcid, PB_WEST_HP_MODE_CURR, ®); + switch (index) { + case 2: + case 3: + /* Configure OPT0 as an OpenCAPI link */ + reg = SETFIELD(PPC_BITMASK(32, 33), reg, 0b01); + break; + case 4: + case 5: + /* Configure OPT3 as an OpenCAPI link */ + reg = SETFIELD(PPC_BITMASK(38, 39), reg, 0b01); + break; + default: + assert(false); + } + + p9_adu_set_pb_hp_mode(gcid, reg); +} + +static void enable_xsl_clocks(uint32_t gcid, uint32_t scom_base, int index) +{ + /* Step 5 - Enable Clocks in XSL */ + + prlog(PR_DEBUG, "OCAPI: %s: Enable clocks in XSL\n", __func__); + + npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(index_to_stack(index), + NPU2_BLOCK_XSL, + NPU2_XSL_WRAP_CFG), + NPU2_MISC_DA_LEN_8B, NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE); +} + +#define CQ_CTL_STATUS_TIMEOUT 10 /* milliseconds */ + +static int set_fence_control(uint32_t gcid, uint32_t scom_base, + int index, uint8_t status) +{ + int stack, block; + uint64_t reg, status_field; + uint8_t status_val; + uint64_t fence_control; + uint64_t timeout = mftb() + msecs_to_tb(CQ_CTL_STATUS_TIMEOUT); + + stack = index_to_stack(index); + block = index_to_block(index); + + fence_control = NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, + block == NPU2_BLOCK_OTL0 ? + NPU2_CQ_CTL_FENCE_CONTROL_0 : + NPU2_CQ_CTL_FENCE_CONTROL_1); + + reg = SETFIELD(NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE, 0ull, status); + npu2_scom_write(gcid, scom_base, fence_control, + NPU2_MISC_DA_LEN_8B, reg); + + /* Wait for fence status to update */ + if (index_to_block(index) == NPU2_BLOCK_OTL0) + status_field = NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED; + else + status_field = NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED; + + do { + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(index_to_stack(index), + NPU2_BLOCK_CTL, + NPU2_CQ_CTL_STATUS), + NPU2_MISC_DA_LEN_8B); + status_val = GETFIELD(status_field, reg); + if (status_val == status) + return OPAL_SUCCESS; + time_wait_ms(1); + } while (tb_compare(mftb(), timeout) == TB_ABEFOREB); + + /** + * @fwts-label OCAPIFenceStatusTimeout + * @fwts-advice The NPU fence status did not update as expected. This + * could be the result of a firmware or hardware bug. OpenCAPI + * functionality could be broken. + */ + prlog(PR_ERR, + "OCAPI: Fence status for brick %d stuck: expected 0x%x, got 0x%x\n", + index, status, status_val); + return OPAL_HARDWARE; +} + +static void set_npcq_config(uint32_t gcid, uint32_t scom_base, int index) +{ + uint64_t reg, stack, block; + + prlog(PR_DEBUG, "OCAPI: %s: Set NPCQ Config\n", __func__); + /* Step 6 - Set NPCQ configuration */ + /* CQ_CTL Misc Config Register #0 */ + stack = index_to_stack(index); + block = index_to_block(index); + + /* Enable OTL */ + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block), + NPU2_MISC_DA_LEN_8B, NPU2_OTL_CONFIG0_EN); + set_fence_control(gcid, scom_base, index, 0b01); + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, + NPU2_CQ_CTL_MISC_CFG), + NPU2_MISC_DA_LEN_8B); + /* Set OCAPI mode */ + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE; + if (block == NPU2_BLOCK_OTL0) + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE; + else + reg |= NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, + NPU2_CQ_CTL_MISC_CFG), + NPU2_MISC_DA_LEN_8B, reg); + + /* NPU Fenced */ + set_fence_control(gcid, scom_base, index, 0b11); + + /* NPU Half Fenced */ + set_fence_control(gcid, scom_base, index, 0b10); + + /* CQ_DAT Misc Config Register #1 */ + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT, + NPU2_CQ_DAT_MISC_CFG), + NPU2_MISC_DA_LEN_8B); + /* Set OCAPI mode for bricks 2-5 */ + reg |= NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_DAT, + NPU2_CQ_DAT_MISC_CFG), + NPU2_MISC_DA_LEN_8B, reg); + + /* CQ_SM Misc Config Register #0 */ + for (uint64_t block = NPU2_BLOCK_SM_0; + block <= NPU2_BLOCK_SM_3; block++) { + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_CQ_SM_MISC_CFG0), + NPU2_MISC_DA_LEN_8B); + /* Set OCAPI mode for bricks 2-5 */ + reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_CQ_SM_MISC_CFG0), + NPU2_MISC_DA_LEN_8B, reg); + } +} + +static void enable_xsl_xts_interfaces(uint32_t gcid, uint32_t scom_base, int index) +{ + uint64_t reg; + + prlog(PR_DEBUG, "OCAPI: %s: Enable XSL-XTS Interfaces\n", __func__); + /* Step 7 - Enable XSL-XTS interfaces */ + /* XTS Config Register - Enable XSL-XTS interface */ + reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B); + reg |= NPU2_XTS_CFG_OPENCAPI; + npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG, NPU2_MISC_DA_LEN_8B, reg); + + /* XTS Config2 Register - Enable XSL1/2 */ + reg = npu2_scom_read(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B); + switch (index_to_stack(index)) { + case NPU2_STACK_STCK_1: + reg |= NPU2_XTS_CFG2_XSL1_ENA; + break; + case NPU2_STACK_STCK_2: + reg |= NPU2_XTS_CFG2_XSL2_ENA; + break; + } + npu2_scom_write(gcid, scom_base, NPU2_XTS_CFG2, NPU2_MISC_DA_LEN_8B, reg); +} + +static void enable_sm_allocation(uint32_t gcid, uint32_t scom_base, int index) +{ + uint64_t reg; + int stack = index_to_stack(index); + + prlog(PR_DEBUG, "OCAPI: %s: Enable State Machine Allocation\n", __func__); + /* Step 8 - Enable state-machine allocation */ + /* Low-Water Marks Registers - Enable state machine allocation */ + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_LOW_WATER_MARKS), + NPU2_MISC_DA_LEN_8B); + reg |= NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_LOW_WATER_MARKS), + NPU2_MISC_DA_LEN_8B, reg); + } +} + +static void enable_pb_snooping(uint32_t gcid, uint32_t scom_base, int index) +{ + uint64_t reg; + int stack = index_to_stack(index); + + prlog(PR_DEBUG, "OCAPI: %s: Enable PowerBus snooping\n", __func__); + /* Step 9 - Enable PowerBus snooping */ + /* CQ_SM Misc Config Register #0 - Enable PowerBus snooping */ + for (uint64_t block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_CQ_SM_MISC_CFG0), + NPU2_MISC_DA_LEN_8B); + reg |= NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, block, + NPU2_CQ_SM_MISC_CFG0), + NPU2_MISC_DA_LEN_8B, reg); + } +} + +static void brick_config(uint32_t gcid, uint32_t scom_base, int index) +{ + set_transport_mux_controls(gcid, scom_base, index, NPU2_DEV_TYPE_OPENCAPI); + enable_odl_phy_mux(gcid, index); + disable_alink_fp(gcid); + set_pb_hp_opencapi(gcid, index); + enable_xsl_clocks(gcid, scom_base, index); + set_npcq_config(gcid, scom_base, index); + enable_xsl_xts_interfaces(gcid, scom_base, index); + enable_sm_allocation(gcid, scom_base, index); + enable_pb_snooping(gcid, scom_base, index); +} + +/* Procedure 13.1.3.5 - TL Configuration */ +static void tl_config(uint32_t gcid, uint32_t scom_base, uint64_t index) +{ + uint64_t reg; + uint64_t stack = index_to_stack(index); + uint64_t block = index_to_block(index); + + prlog(PR_DEBUG, "OCAPI: %s: TL Configuration\n", __func__); + /* OTL Config 0 Register */ + reg = 0; + /* OTL Enable */ + reg |= NPU2_OTL_CONFIG0_EN; + /* Block PE Handle from ERAT Index */ + reg |= NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE; + /* OTL Brick ID */ + reg = SETFIELD(NPU2_OTL_CONFIG0_BRICKID, reg, index - 2); + /* ERAT Hash 0 */ + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_0, reg, 0b011001); + /* ERAT Hash 1 */ + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_1, reg, 0b000111); + /* ERAT Hash 2 */ + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_2, reg, 0b101100); + /* ERAT Hash 3 */ + reg = SETFIELD(NPU2_OTL_CONFIG0_ERAT_HASH_3, reg, 0b100110); + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG0(stack, block), + NPU2_MISC_DA_LEN_8B, reg); + + /* OTL Config 1 Register */ + reg = 0; + /* + * We leave Template 1-3 bits at 0 to force template 0 as required + * for unknown devices. + * + * Template 0 Transmit Rate is set to most conservative setting which + * will always be supported. Other Template Transmit rates are left + * unset and will be set later by OS. + */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_TEMP0_RATE, reg, 0b1111); + /* Extra wait cycles TXI-TXO - varied from workbook */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_DRDY_WAIT, reg, 0b001); + /* Minimum Frequency to Return TLX Credits to AFU */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_CRET_FREQ, reg, 0b001); + /* Frequency to add age to Transmit Requests */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_AGE_FREQ, reg, 0b11000); + /* Response High Priority Threshold */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RS2_HPWAIT, reg, 0b011011); + /* 4-slot Request High Priority Threshold */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT, reg, 0b011011); + /* 6-slot Request High Priority */ + reg = SETFIELD(NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT, reg, 0b011011); + /* Stop the OCAPI Link on Uncorrectable Error + * TODO: Confirm final value - disabled for debug */ + + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG1(stack, block), + NPU2_MISC_DA_LEN_8B, reg); + + /* TLX Credit Configuration Register */ + reg = 0; + /* VC0/VC3/DCP0/DCP1 credits to send to AFU */ + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC0_CREDITS, reg, 0x40); + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_VC3_CREDITS, reg, 0x40); + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP0_CREDITS, reg, 0x80); + reg = SETFIELD(NPU2_OTL_TLX_CREDITS_DCP1_CREDITS, reg, 0x80); + npu2_scom_write(gcid, scom_base, NPU2_OTL_TLX_CREDITS(stack, block), + NPU2_MISC_DA_LEN_8B, reg); +} + +/* Detect Nimbus DD2.0 and DD2.01 */ +static int get_nimbus_level(void) +{ + struct proc_chip *chip = next_chip(NULL); + + if (chip && chip->type == PROC_CHIP_P9_NIMBUS) + return chip->ec_level & 0xff; + return -1; +} + +/* Procedure 13.1.3.6 - Address Translation Configuration */ +static void address_translation_config(uint32_t gcid, uint32_t scom_base, + uint64_t index) +{ + int chip_level; + uint64_t reg; + uint64_t stack = index_to_stack(index); + + prlog(PR_DEBUG, "OCAPI: %s: Address Translation Configuration\n", __func__); + /* PSL_SCNTL_A0 Register */ + /* + * ERAT shared between multiple AFUs + * + * The workbook has this bit around the wrong way from the hardware. + * + * TODO: handle correctly with link ganging + */ + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, + NPU2_XSL_PSL_SCNTL_A0), + NPU2_MISC_DA_LEN_8B); + reg |= NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, + NPU2_XSL_PSL_SCNTL_A0), + NPU2_MISC_DA_LEN_8B, reg); + + chip_level = get_nimbus_level(); + if (chip_level == 0x20) { + /* + * Errata HW408041 (section 15.1.10 of NPU workbook) + * "RA mismatch when both tlbie and checkout response + * are seen in same cycle" + */ + /* XSL_GP Register - Bloom Filter Disable */ + reg = npu2_scom_read(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), + NPU2_MISC_DA_LEN_8B); + /* To update XSL_GP, we must first write a magic value to it */ + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), + NPU2_MISC_DA_LEN_8B, 0x0523790323000000); + reg &= ~NPU2_XSL_GP_BLOOM_FILTER_ENABLE; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, NPU2_XSL_GP), + NPU2_MISC_DA_LEN_8B, reg); + } + + if (chip_level == 0x20 || chip_level == 0x21) { + /* + * DD2.0/2.1 EOA Bug. Fixed in DD2.2 + */ + reg = 0x32F8000000000001; + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_XSL, + NPU2_XSL_DEF), + NPU2_MISC_DA_LEN_8B, reg); + } +} + +/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar + * wrapper for the PHY BARs yet */ +static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg, + uint64_t addr, uint64_t size) +{ + uint64_t val; + int block; + switch (NPU2_REG(reg)) { + case NPU2_PHY_BAR: + val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21); + val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1); + break; + case NPU2_NTL0_BAR: + case NPU2_NTL1_BAR: + val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16); + val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16)); + val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1); + break; + case NPU2_GENID_BAR: + val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16); + val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1); + break; + default: + val = 0ul; + } + + for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) { + npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg), + NPU2_MISC_DA_LEN_8B, val); + prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n", + NPU2_REG_OFFSET(0, block, reg), val); + } +} + +static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base, + uint64_t reg[]) +{ + uint64_t addr, size; + + prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__); + phys_map_get(gcid, NPU_PHY, 0, &addr, &size); + write_bar(gcid, scom_base, + NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR), + addr, size); + prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__); + phys_map_get(gcid, NPU_PHY, 1, &addr, &size); + write_bar(gcid, scom_base, + NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR), + addr, size); + + prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__); + phys_map_get(gcid, NPU_REGS, 0, &addr, &size); + write_bar(gcid, scom_base, + NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR), + addr, size); + reg[0] = addr; + reg[1] = size; +} + +static int setup_irq(struct npu2 *p) +{ + uint64_t reg, mmio_addr; + uint32_t base; + + base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64); + if (base == XIVE_IRQ_ERROR) { + /** + * @fwts-label OCAPIIRQAllocationFailed + * @fwts-advice OpenCAPI IRQ setup failed. This is probably + * a firmware bug. OpenCAPI functionality will be broken. + */ + prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n"); + return -1; + } + p->irq_base = base; + + xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL); + mmio_addr = (uint64_t ) xive_get_trigger_port(base); + prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr); + reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13; + npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE, + NPU2_MISC_DA_LEN_8B, reg); + /* + * setup page size = 64k + * + * OS type is set to AIX: opal also runs with 2 pages per interrupt, + * so to cover the max offset for 35 levels of interrupt, we need + * bits 41 to 46, which is what the AIX setting does. There's no + * other meaning for that AIX setting. + */ + reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG, + NPU2_MISC_DA_LEN_8B); + reg |= NPU2_MISC_CFG_IPI_PS; + reg &= ~NPU2_MISC_CFG_IPI_OS; + npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG, + NPU2_MISC_DA_LEN_8B, reg); + + /* enable translation interrupts for all bricks */ + reg = npu2_scom_read(p->chip_id, p->xscom_base, + NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B); + reg |= PPC_BIT(0) | PPC_BIT(1) | PPC_BIT(2) | PPC_BIT(3); + npu2_scom_write(p->chip_id, p->xscom_base, + NPU2_MISC_IRQ_ON_ERROR_EN_FIR2, NPU2_MISC_DA_LEN_8B, + reg); + return 0; +} + +static void npu2_opencapi_probe(struct dt_node *dn) +{ + struct dt_node *link; + char *path; + uint32_t gcid, index, links, scom_base; + uint64_t reg[2]; + uint64_t dev_index; + struct npu2 *n; + int rc; + + path = dt_get_path(dn); + gcid = dt_get_chip_id(dn); + index = dt_prop_get_u32(dn, "ibm,npu-index"); + links = dt_prop_get_u32(dn, "ibm,npu-links"); + + /* Don't try to init when we have an NVLink link */ + dt_for_each_compatible(dn, link, "ibm,npu-link") { + prlog(PR_DEBUG, "OCAPI: NPU%d: NVLink link found, skipping\n", + index); + return; + } + + prlog(PR_INFO, "OCAPI: Chip %d Found OpenCAPI NPU%d (%d links) at %s\n", + gcid, index, links, path); + free(path); + + scom_base = dt_get_address(dn, 0, NULL); + prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base); + + setup_global_mmio_bar(gcid, scom_base, reg); + + n = zalloc(sizeof(struct npu2) + links * sizeof(struct npu2_dev)); + n->devices = (struct npu2_dev *)(n + 1); + n->chip_id = gcid; + n->xscom_base = scom_base; + n->regs = (void *)reg[0]; + n->dt_node = dn; + + dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") { + dev_index = dt_prop_get_u32(link, "ibm,npu-link-index"); + prlog(PR_INFO, "OCAPI: Configuring link index %lld\n", + dev_index); + + /* Procedure 13.1.3.1 - Select OCAPI vs NVLink */ + brick_config(gcid, scom_base, dev_index); + + /* Procedure 13.1.3.5 - Transaction Layer Configuration */ + tl_config(gcid, scom_base, dev_index); + + /* Procedure 13.1.3.6 - Address Translation Configuration */ + address_translation_config(gcid, scom_base, dev_index); + } + + /* Procedure 13.1.3.10 - Interrupt Configuration */ + rc = setup_irq(n); + if (rc) + goto failed; + + return; +failed: + free(n); +} + +void probe_npu2_opencapi(void) +{ + struct dt_node *np_npu; + + dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu") + npu2_opencapi_probe(np_npu); +} diff --git a/hw/npu2.c b/hw/npu2.c index 12d239a..218ac86 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -1274,7 +1274,7 @@ static void npu2_probe_phb(struct dt_node *dn) { struct proc_chip *proc_chip; struct dt_node *np; - uint32_t gcid, scom, index, phb_index, links; + uint32_t gcid, scom, index, phb_index, links = 0; uint64_t reg[2], mm_win[2]; char *path; @@ -1288,6 +1288,26 @@ static void npu2_probe_phb(struct dt_node *dn) return; } + dt_for_each_compatible(dn, np, "ibm,npu-link") { + links++; + } + + index = dt_prop_get_u32(dn, "ibm,npu-index"); + phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); + + prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n", + gcid, index, links, path); + free(path); + + /* Retrieve scom base address */ + scom = dt_get_address(dn, 0, NULL); + prlog(PR_INFO, " SCOM Base: %08x\n", scom); + + if (!links) { + prlog(PR_INFO, " No NVLink links found\n"); + return; + } + if (!is_p9dd1()) { /* TODO: Clean this up with register names, etc. when we get * time. This just turns NVLink mode on in each brick and should @@ -1324,17 +1344,6 @@ static void npu2_probe_phb(struct dt_node *dn) xscom_write_mask(gcid, 0x5011530, PPC_BIT(0), PPC_BIT(0)); } - index = dt_prop_get_u32(dn, "ibm,npu-index"); - phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); - links = dt_prop_get_u32(dn, "ibm,npu-links"); - prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n", - gcid, index, links, path); - free(path); - - /* Retrieve scom base address */ - scom = dt_get_address(dn, 0, NULL); - prlog(PR_INFO, " SCOM Base: %08x\n", scom); - /* Reassign the BARs */ assign_mmio_bars(gcid, scom, reg, mm_win); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 27956f2..b2812d5 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -90,6 +90,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_BLOCK_DAT 0x5 #define NPU2_BLOCK_NTL0 0x9 #define NPU2_BLOCK_NTL1 0xb +#define NPU2_BLOCK_OTL0 0xc +#define NPU2_BLOCK_OTL1 0xd + +/* OpenCAPI blocks */ +#define NPU2_BLOCK_XSL 0xe /* MISC stack blocks */ #define NPU2_BLOCK_ATS 0 @@ -108,6 +113,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, /* SM block registers */ #define NPU2_CQ_SM_MISC_CFG0 0x000 +#define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38) +#define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57) #define NPU2_CQ_SM_MISC_CFG1 0x008 #define NPU2_PB_EPSILON 0x010 #define NPU2_TIMER_CFG 0x018 @@ -138,6 +145,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_GENID_BAR_ADDR PPC_BITMASK(3,35) #define NPU2_GENID_BAR_POISON PPC_BIT(39) #define NPU2_LOW_WATER_MARKS 0x040 +#define NPU2_LOW_WATER_MARKS_ENABLE_MACHINE_ALLOC PPC_BIT(51) #define NPU2_HIGH_WATER_MARKS 0x048 #define NPU2_RELAXED_ORDERING_CFG0 0x050 #define NPU2_RELAXED_ORDERING_CFG1 0x058 @@ -176,6 +184,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, /* CTL block registers */ #define NPU2_CQ_CTL_MISC_CFG 0x000 +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52) +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55) +#define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56) #define NPU2_CQ_FUTURE_CFG1 0x008 #define NPU2_CQ_FUTURE_CFG2 0x010 #define NPU2_CQ_FUTURE_CFG3 0x018 @@ -198,6 +209,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_LPC_THRESHOLD_CFG 0x080 #define NPU2_CQ_INHIBIT_CFG 0x088 #define NPU2_CQ_CTL_STATUS 0x090 +#define NPU2_CQ_CTL_STATUS_BRK0_AM_FENCED PPC_BITMASK(48, 49) +#define NPU2_CQ_CTL_STATUS_BRK1_AM_FENCED PPC_BITMASK(50, 51) #define NPU2_CQ_C_ERR_RPT_MSG0 0x0C0 #define NPU2_CQ_C_ERR_RPT_MSG1 0x0C8 #define NPU2_CQ_C_ERR_RPT_FIRST0 0x0D0 @@ -206,9 +219,13 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8 #define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0 #define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8 +#define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140 +#define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148 +#define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1) /* DAT block registers */ #define NPU2_CQ_DAT_MISC_CFG 0x008 +#define NPU2_CQ_DAT_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(40) #define NPU2_CQ_DAT_ECC_CFG 0x010 #define NPU2_CQ_DAT_SCRATCH0 0x018 #define NPU2_CQ_DAT_ECC_STATUS 0x020 @@ -289,6 +306,43 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_NTL_DL_CONFIG(ndev) NPU2_DL_REG_OFFSET(ndev, 0xFFF8) #define NPU2_NTL_DL_CLK_CTRL(ndev) NPU2_DL_REG_OFFSET(ndev, 0x001C) +/* OpenCAPI - XSL registers */ +#define NPU2_XSL_PSL_SCNTL_A0 0x010 +#define NPU2_XSL_PSL_SCNTL_A0_MULTI_AFU_DIAL PPC_BIT(0) +#define NPU2_XSL_DEF 0x040 +#define NPU2_XSL_GP 0x058 +#define NPU2_XSL_GP_BLOOM_FILTER_ENABLE PPC_BIT(16) +#define NPU2_XSL_WRAP_CFG 0x0C0 +#define NPU2_XSL_WRAP_CFG_XSLO_CLOCK_ENABLE PPC_BIT(0) + +/* OpenCAPI - OTL registers */ +#define NPU2_OTL_CONFIG0(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) +#define NPU2_OTL_CONFIG0_EN PPC_BIT(0) +#define NPU2_OTL_CONFIG0_BLOCK_PE_HANDLE PPC_BIT(1) +#define NPU2_OTL_CONFIG0_BRICKID PPC_BITMASK(2, 3) +#define NPU2_OTL_CONFIG0_PE_MASK PPC_BITMASK(4, 7) +#define NPU2_OTL_CONFIG0_ERAT_HASH_0 PPC_BITMASK(8, 13) +#define NPU2_OTL_CONFIG0_ERAT_HASH_1 PPC_BITMASK(14, 19) +#define NPU2_OTL_CONFIG0_ERAT_HASH_2 PPC_BITMASK(20, 25) +#define NPU2_OTL_CONFIG0_ERAT_HASH_3 PPC_BITMASK(26, 31) +#define NPU2_OTL_CONFIG0_BLOCK_TID_OVERRIDE PPC_BIT(50) +#define NPU2_OTL_TLX_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x050) +#define NPU2_OTL_TLX_CREDITS_VC0_CREDITS PPC_BITMASK(0, 7) +#define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31) +#define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39) +#define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63) +#define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058) +#define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7) +#define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11) +#define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34) +#define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39) +#define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45) +#define NPU2_OTL_CONFIG1_TX_RQ4_HPWAIT PPC_BITMASK(46, 51) +#define NPU2_OTL_CONFIG1_TX_RQ6_HPWAIT PPC_BITMASK(52, 57) +#define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58) +#define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59) +#define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60) + /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above * there is only a single instance of each of these in the NPU so we * define them as absolute offsets. */ @@ -296,6 +350,9 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_MISC_RELAXTED_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x008) #define NPU2_MISC_LCO_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x010) #define NPU2_MISC_OPTICAL_IO_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x018) +#define NPU2_MISC_OPTICAL_IO_CFG0_NDLMUX_BRK0TO2 PPC_BITMASK(0, 2) +#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK0TO1 PPC_BITMASK(3, 4) +#define NPU2_MISC_OPTICAL_IO_CFG0_OCMUX_BRK4TO5 PPC_BITMASK(5, 6) #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020) #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028) #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030) @@ -320,6 +377,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_MISC_ERR_CTL_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x088) #define NPU2_MISC_RING_ACK_ERR NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x090) #define NPU2_MISC_IRQ_BASE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x098) +#define NPU2_MISC_IRQ_BASE_MASK PPC_BITMASK(13, 51) #define NPU2_MISC_ERR_BRICK_GROUP NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A0) #define NPU2_MISC_FREEZE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0A8) #define NPU2_MISC_FENCE_STATE NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x0B0) @@ -377,6 +435,8 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_MISC_IRQ_LOG13 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368) #define NPU2_MISC_IRQ_LOG14 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370) #define NPU2_MISC_IRQ_LOG15 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378) +#define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408) + /* ATS block registers */ #define NPU2_ATS_PMU_CTL NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_ATS, 0x000) @@ -421,8 +481,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_XTS_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x020) #define NPU2_XTS_CFG_MMIOSD PPC_BIT(1) #define NPU2_XTS_CFG_TRY_ATR_RO PPC_BIT(6) +#define NPU2_XTS_CFG_OPENCAPI PPC_BIT(15) #define NPU2_XTS_CFG2 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x028) #define NPU2_XTS_CFG2_NO_FLUSH_ENA PPC_BIT(49) +#define NPU2_XTS_CFG2_XSL1_ENA PPC_BIT(54) +#define NPU2_XTS_CFG2_XSL2_ENA PPC_BIT(55) #define NPU2_XTS_DBG_CFG0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x030) #define NPU2_XTS_DBG_CFG1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x038) #define NPU2_XTS_PMU_CNT NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_XTS, 0x040) @@ -478,4 +541,29 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_DD1_MISC_SCOM_IND_SCOM_DATA 0x38f #define NPU2_MISC_SCOM_IND_SCOM_DATA 0x68f + +/* OpenCAPI - PowerBus and OBus configuration SCOM addresses */ +#define PU_IOE_PB_MISC_CFG 0x5013823 +#define PU_IOE_PB_MISC_CFG_SEL_03_NPU_NOT_PB PPC_BIT(13) +#define PU_IOE_PB_MISC_CFG_SEL_04_NPU_NOT_PB PPC_BIT(14) +#define PU_IOE_PB_MISC_CFG_SEL_05_NPU_NOT_PB PPC_BIT(15) +#define OBUS_LL0_IOOL_PHY_CONFIG 0x901080C +#define OBUS_LL3_IOOL_PHY_CONFIG 0xC01080C +#define OBUS_IOOL_PHY_CONFIG_ODL0_ENABLED PPC_BIT(52) +#define OBUS_IOOL_PHY_CONFIG_ODL1_ENABLED PPC_BIT(53) +#define OBUS_IOOL_PHY_CONFIG_ODL_PHY_SWAP PPC_BIT(54) +#define OBUS_IOOL_PHY_CONFIG_LINK0_OLL_ENABLED PPC_BIT(58) +#define OBUS_IOOL_PHY_CONFIG_LINK1_OLL_ENABLED PPC_BIT(59) +#define OBUS_IOOL_PHY_CONFIG_NV0_NPU_ENABLED PPC_BIT(61) +#define OBUS_IOOL_PHY_CONFIG_NV1_NPU_ENABLED PPC_BIT(62) +#define OBUS_IOOL_PHY_CONFIG_NV2_NPU_ENABLED PPC_BIT(63) +#define PU_IOE_PB_FP01_CFG 0x501380A +#define PU_IOE_PB_FP23_CFG 0x501380B +#define PU_IOE_PB_FP45_CFG 0x501380C +#define PU_IOE_PB_FP67_CFG 0x501380D +#define PU_IOE_PB_FP_CFG_FP0_FMR_DISABLE PPC_BIT(20) +#define PU_IOE_PB_FP_CFG_FP0_PRS_DISABLE PPC_BIT(25) +#define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52) +#define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57) + #endif /* __NPU2_REGS_H */ diff --git a/include/npu2.h b/include/npu2.h index e19fab6..b28f91b 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -134,6 +134,7 @@ struct npu2_dev { struct npu2 { uint32_t index; + struct dt_node *dt_node; uint32_t flags; uint32_t chip_id; uint64_t xscom_base; @@ -142,6 +143,7 @@ struct npu2 { uint64_t mm_base; uint64_t mm_size; uint32_t base_lsi; + uint32_t irq_base; uint32_t total_devices; struct npu2_dev *devices; enum phys_map_type gpu_map_type; diff --git a/include/skiboot.h b/include/skiboot.h index db91325..6b14c82 100644 --- a/include/skiboot.h +++ b/include/skiboot.h @@ -230,6 +230,7 @@ extern int preload_capp_ucode(void); extern void preload_io_vpd(void); extern void probe_npu(void); extern void probe_npu2(void); +extern void probe_npu2_opencapi(void); extern void uart_init(void); extern void mbox_init(void); extern void early_uart_init(void);
Scan the device tree for NPUs with OpenCAPI links and configure the NPU per the initialisation sequence in the NPU OpenCAPI workbook. Training of individual links and setup of per-AFU/link configuration will be in a later patch. Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> --- core/init.c | 2 +- hw/Makefile.inc | 3 +- hw/npu2-opencapi.c | 794 +++++++++++++++++++++++++++++++++++++++++++++- hw/npu2.c | 33 +- include/npu2-regs.h | 88 +++++- include/npu2.h | 2 +- include/skiboot.h | 1 +- 7 files changed, 910 insertions(+), 13 deletions(-) create mode 100644 hw/npu2-opencapi.c