Message ID | 41e5168e0229596541d7ae4dbe818e15ebde6014.1517391242.git-series.andrew.donnellan@au1.ibm.com |
---|---|
State | Superseded |
Headers | show |
Series | Initial OpenCAPI 3.0 Support for P9 | expand |
Le 31/01/2018 à 10:34, Andrew Donnellan a écrit : > Scan the OpenCAPI links under the NPU, and for each link, reset the card, > set up a device, train the link and register a PHB. > > Implement the necessary operations for the OpenCAPI PHB type, and adapt the > existing npu2_set_pe() function to let us assign a PE number to an OpenCAPI > device. (For now, the PE numbers we assign to OpenCAPI devices are somewhat > meaningless - later on, we'll implement a mapping between Linux-allocated > PE numbers and a separate numbering space for PE numbers we actually > configure in the NPU.) > > Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com> > Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > > --- Reviewed-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com> > > v1->v2: > > - remove all-devices case for device reset (Fred) > - remove i2c 1.8v hack (Fred) > - find the right I2C port ID for resets only once per device rather than > once per reset (Fred) > > v2->v3: > > - add a stub ioda_reset callback so we don't see OPAL_UNSUPPORTED in > Linux > - Remove unused NPU IRQ register macro (Fred) > - Fix up PHB device tree properties so we don't get warnings from DTC > - use NPU phandle rather than link phandle for ibm,npcq property for > consistency with NPU2 > - use correct "ibm,config-space-type" property in device tree > --- > core/pci.c | 3 +- > hw/npu2-opencapi.c | 592 ++++++++++++++++++++++++++++++++++++++++++++- > hw/npu2.c | 60 ++--- > include/npu2-regs.h | 52 +++- > include/npu2.h | 14 +- > 5 files changed, 692 insertions(+), 29 deletions(-) > > diff --git a/core/pci.c b/core/pci.c > index 0809521..c712263 100644 > --- a/core/pci.c > +++ b/core/pci.c > @@ -1549,7 +1549,8 @@ static void pci_add_one_device_node(struct phb *phb, > pd->dn = np = dt_new(parent_node, name); > > /* XXX FIXME: make proper "compatible" properties */ > - if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { > + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) || > + phb->phb_type == phb_type_npu_v2_opencapi) { > snprintf(compat, MAX_NAME, "pciex%x,%x", > vdid & 0xffff, vdid >> 16); > dt_add_property_cells(np, "ibm,pci-config-space-type", 1); > diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c > index a66c8f7..d361b84 100644 > --- a/hw/npu2-opencapi.c > +++ b/hw/npu2-opencapi.c > @@ -53,6 +53,9 @@ > #include <i2c.h> > > #define NPU_IRQ_LEVELS 35 > +#define NPU_IRQ_LEVELS_XSL 23 > + > +static const struct phb_ops npu2_opencapi_ops; > > static inline uint64_t index_to_stack(uint64_t index) { > switch (index) { > @@ -99,6 +102,28 @@ static inline uint64_t index_to_block(uint64_t index) { > } > } > > +static uint64_t get_odl_status(uint32_t gcid, uint64_t index) { > + uint64_t reg, status_xscom; > + switch (index) { > + case 2: > + status_xscom = OB0_ODL0_STATUS; > + break; > + case 3: > + status_xscom = OB0_ODL1_STATUS; > + break; > + case 4: > + status_xscom = OB3_ODL0_STATUS; > + break; > + case 5: > + status_xscom = OB3_ODL1_STATUS; > + break; > + default: > + assert(false); > + } > + xscom_read(gcid, status_xscom, ®); > + return reg; > +} > + > /* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */ > > static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base, > @@ -673,6 +698,397 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base, > reg[1] = size; > } > > +/* Procedure 13.1.3.8 - AFU MMIO Range BARs */ > +static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base, > + struct npu2_dev *dev) > +{ > + uint64_t stack = index_to_stack(dev->index); > + uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? > + NPU2_NTL0_BAR : NPU2_NTL1_BAR; > + uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? > + NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG : > + NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG; > + uint64_t addr, size, reg; > + > + prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__); > + phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size); > + > + prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size); > + write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr, > + size); > + dev->bars[0].npu2_bar.base = addr; > + dev->bars[0].npu2_bar.size = size; > + > + reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16); > + reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16)); > + prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg); > + npu2_scom_write(gcid, scom_base, > + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, > + pa_offset), > + NPU2_MISC_DA_LEN_8B, reg); > +} > + > +/* Procedure 13.1.3.9 - AFU Config BARs */ > +static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base, > + struct npu2_dev *dev) > +{ > + uint64_t stack = index_to_stack(dev->index); > + int stack_num = stack - NPU2_STACK_STCK_0; > + uint64_t addr, size; > + > + prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__); > + phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size); > + prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr); > + write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR), > + addr, size); > + dev->bars[1].npu2_bar.base = addr; > + dev->bars[1].npu2_bar.size = size; > +} > + > +static void otl_enabletx(uint32_t gcid, uint32_t scom_base, uint64_t index) > +{ > + uint64_t stack = index_to_stack(index); > + uint64_t block = index_to_block(index); > + uint64_t reg; > + > + /* OTL Config 2 Register */ > + /* Transmit Enable */ > + prlog(PR_DEBUG, "OCAPI: %s: Enabling TX\n", __func__); > + reg = 0; > + reg |= NPU2_OTL_CONFIG2_TX_SEND_EN; > + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG2(stack, block), > + NPU2_MISC_DA_LEN_8B, reg); > + > + reg = npu2_scom_read(gcid, scom_base, NPU2_OTL_VC_CREDITS(stack, block), > + NPU2_MISC_DA_LEN_8B); > + prlog(PR_DEBUG, "OCAPI: credit counter: %llx\n", reg); > + /* TODO: Abort if credits are zero */ > +} > + > +static void reset_ocapi_device(struct npu2_dev *dev) > +{ > + uint8_t data[3]; > + int rc; > + int i; > + > + switch (dev->index) { > + case 2: > + case 4: > + memcpy(data, platform.ocapi->i2c_odl0_data, sizeof(data)); > + break; > + case 3: > + case 5: > + memcpy(data, platform.ocapi->i2c_odl1_data, sizeof(data)); > + break; > + default: > + assert(false); > + } > + > + for (i = 0; i < 3; i++) { > + rc = i2c_request_send(dev->i2c_port_id_ocapi, 0x20, SMBUS_WRITE, > + platform.ocapi->i2c_offset[i], 1, > + &data[i], sizeof(data[i]), 120); > + if (rc) { > + /** > + * @fwts-label OCAPIDeviceResetFailed > + * @fwts-advice There was an error attempting to send > + * a reset signal over I2C to the OpenCAPI device. > + */ > + prlog(PR_ERR, "OCAPI: Error writing I2C reset signal: %d\n", rc); > + break; > + } > + if (i != 0) > + time_wait_ms(5); > + } > +} > + > +static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) > +{ > + uint64_t reg, config_xscom; > + int timeout = 3000; > + prlog(PR_DEBUG, "OCAPI: %s: Training ODL\n", __func__); > + > + switch (index) { > + case 2: > + config_xscom = OB0_ODL0_CONFIG; > + break; > + case 3: > + config_xscom = OB0_ODL1_CONFIG; > + break; > + case 4: > + config_xscom = OB3_ODL0_CONFIG; > + break; > + case 5: > + config_xscom = OB3_ODL1_CONFIG; > + break; > + default: > + assert(false); > + } > + > + /* Reset ODL */ > + reg = OB_ODL_CONFIG_RESET; > + reg = SETFIELD(OB_ODL_CONFIG_VERSION, reg, 0b000001); > + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0110); > + reg = SETFIELD(OB_ODL_CONFIG_SUPPORTED_MODES, reg, 0b0010); > + reg |= OB_ODL_CONFIG_X4_BACKOFF_ENABLE; > + reg = SETFIELD(OB_ODL_CONFIG_PHY_CNTR_LIMIT, reg, 0b1111); > + reg |= OB_ODL_CONFIG_DEBUG_ENABLE; > + reg = SETFIELD(OB_ODL_CONFIG_FWD_PROGRESS_TIMER, reg, 0b0110); > + xscom_write(gcid, config_xscom, reg); > + > + reg &= ~OB_ODL_CONFIG_RESET; > + xscom_write(gcid, config_xscom, reg); > + > + reset_ocapi_device(dev); > + > + /* Transmit Pattern A */ > + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0001); > + xscom_write(gcid, config_xscom, reg); > + time_wait_ms(5); > + > + /* Bump lanes - this improves training reliability */ > + npu2_opencapi_bump_ui_lane(dev); > + > + /* Start training */ > + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b1000); > + xscom_write(gcid, config_xscom, reg); > + > + do { > + reg = get_odl_status(gcid, index); > + if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) { > + prlog(PR_NOTICE, > + "OCAPI: Link %d on chip %u trained in %dms\n", > + index, gcid, 3000 - timeout); > + return OPAL_SUCCESS; > + } > + time_wait_ms(1); > + } while (timeout--); > + prlog(PR_INFO, "OCAPI: Link %d on chip %u failed to train, retrying\n", > + index, gcid); > + prlog(PR_INFO, "OCAPI: Link status: %016llx\n", reg); > + return OPAL_HARDWARE; > +} > + > +static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val) > +{ > + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); > + uint64_t reg; > + int64_t link_width, rc = OPAL_SUCCESS; > + > + reg = get_odl_status(dev->npu->chip_id, dev->index); > + link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg); > + switch (link_width) { > + case 0b0001: > + *val = OPAL_SHPC_LINK_UP_x4; > + break; > + case 0b0010: > + *val = OPAL_SHPC_LINK_UP_x8; > + break; > + default: > + rc = OPAL_HARDWARE; > + } > + return rc; > +} > + > +static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb) > +{ > + struct pci_slot *slot; > + > + slot = pci_slot_alloc(phb, NULL); > + if (!slot) > + return slot; > + > + /* TODO: Figure out other slot functions */ > + slot->ops.get_presence_state = NULL; > + slot->ops.get_link_state = npu2_opencapi_get_link_state; > + slot->ops.get_power_state = NULL; > + slot->ops.get_attention_state = NULL; > + slot->ops.get_latch_state = NULL; > + slot->ops.set_power_state = NULL; > + slot->ops.set_attention_state = NULL; > + > + return slot; > +} > + > +static int64_t npu2_opencapi_pcicfg_check(struct npu2_dev *dev, uint32_t offset, > + uint32_t size) > +{ > + if (!dev || offset > 0xfff || (offset & (size - 1))) > + return OPAL_PARAMETER; > + > + return OPAL_SUCCESS; > +} > + > +static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn, > + uint32_t offset, uint32_t size, > + void *data) > +{ > + uint64_t cfg_addr; > + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); > + uint64_t genid_base; > + int64_t rc; > + > + rc = npu2_opencapi_pcicfg_check(dev, offset, size); > + if (rc) > + return rc; > + > + genid_base = dev->bars[1].npu2_bar.base + > + (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); > + > + cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; > + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | > + NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER | > + NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER, > + cfg_addr, bdfn); > + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER, > + cfg_addr, offset & ~3u); > + > + out_be64((uint64_t *)genid_base, cfg_addr); > + sync(); > + > + switch (size) { > + case 1: > + *((uint8_t *)data) = > + in_8((volatile uint8_t *)(genid_base + 128 + (offset & 3))); > + break; > + case 2: > + *((uint16_t *)data) = > + in_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2))); > + break; > + case 4: > + *((uint32_t *)data) = in_le32((volatile uint32_t *)(genid_base + 128)); > + break; > + default: > + return OPAL_PARAMETER; > + } > + > + return OPAL_SUCCESS; > +} > + > +#define NPU2_OPENCAPI_PCI_CFG_READ(size, type) \ > +static int64_t npu2_opencapi_pcicfg_read##size(struct phb *phb, \ > + uint32_t bdfn, \ > + uint32_t offset, \ > + type *data) \ > +{ \ > + /* Initialize data in case of error */ \ > + *data = (type)0xffffffff; \ > + return npu2_opencapi_pcicfg_read(phb, bdfn, offset, \ > + sizeof(type), data); \ > +} > + > +static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn, > + uint32_t offset, uint32_t size, > + uint32_t data) > +{ > + uint64_t cfg_addr; > + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); > + uint64_t genid_base; > + int64_t rc; > + > + rc = npu2_opencapi_pcicfg_check(dev, offset, size); > + if (rc) > + return rc; > + > + genid_base = dev->bars[1].npu2_bar.base + > + (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); > + > + cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; > + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | > + NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER | > + NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER, > + cfg_addr, bdfn); > + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER, > + cfg_addr, offset & ~3u); > + > + out_be64((uint64_t *)genid_base, cfg_addr); > + sync(); > + > + switch (size) { > + case 1: > + out_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)), > + data); > + break; > + case 2: > + out_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)), > + data); > + break; > + case 4: > + out_le32((volatile uint32_t *)(genid_base + 128), data); > + break; > + default: > + return OPAL_PARAMETER; > + } > + > + return OPAL_SUCCESS; > +} > + > +#define NPU2_OPENCAPI_PCI_CFG_WRITE(size, type) \ > +static int64_t npu2_opencapi_pcicfg_write##size(struct phb *phb, \ > + uint32_t bdfn, \ > + uint32_t offset, \ > + type data) \ > +{ \ > + return npu2_opencapi_pcicfg_write(phb, bdfn, offset, \ > + sizeof(type), data); \ > +} > + > +NPU2_OPENCAPI_PCI_CFG_READ(8, u8) > +NPU2_OPENCAPI_PCI_CFG_READ(16, u16) > +NPU2_OPENCAPI_PCI_CFG_READ(32, u32) > +NPU2_OPENCAPI_PCI_CFG_WRITE(8, u8) > +NPU2_OPENCAPI_PCI_CFG_WRITE(16, u16) > +NPU2_OPENCAPI_PCI_CFG_WRITE(32, u32) > + > +static int64_t npu2_opencapi_ioda_reset(struct phb __unused *phb, > + bool __unused purge) > +{ > + /* Not relevant to OpenCAPI - we do this just to silence the error */ > + return OPAL_SUCCESS; > +} > + > +static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd, > + void *data __unused) > +{ > + uint32_t irq; > + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); > + uint64_t block = index_to_block(dev->index); > + uint64_t stacku = index_to_stacku(dev->index); > + uint64_t dsisr, dar, tfc, handle; > + > + /* > + * Pass the hw irq number for the translation fault irq > + * irq levels 23 -> 26 are for translation faults, 1 per brick > + */ > + irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL; > + if (stacku == NPU2_STACK_STCK_2U) > + irq += 2; > + if (block == NPU2_BLOCK_OTL1) > + irq++; > + > + /* > + * Add the addresses of the registers needed by the OS to handle > + * faults. The OS accesses them by mmio. > + */ > + dsisr = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block); > + dar = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block); > + tfc = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block); > + handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku, > + block); > + dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq); > + dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio", > + hi32(dsisr), lo32(dsisr), > + hi32(dar), lo32(dar), > + hi32(tfc), lo32(tfc), > + hi32(handle), lo32(handle)); > + return 0; > +} > + > +static void npu2_opencapi_final_fixup(struct phb *phb) > +{ > + pci_walk_dev(phb, NULL, npu2_add_mmio_regs, NULL); > +} > + > static void mask_nvlink_fir(struct npu2 *p) > { > uint64_t reg; > @@ -766,6 +1182,137 @@ static int setup_irq(struct npu2 *p) > return 0; > } > > +static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, > + struct npu2_dev *dev) > +{ > + uint32_t dev_index, npu_index; > + struct dt_node *dn_phb, *dn; > + struct pci_slot *slot; > + char port_name[17]; > + uint64_t mm_win[2]; > + int retries = 20; > + int rc; > + > + dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); > + npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index"); > + > + /* Populate PHB device node */ > + phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0], > + &mm_win[1]); > + prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n", > + mm_win[0], mm_win[1]); > + dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]); > + assert(dn_phb); > + dt_add_property_strings(dn_phb, > + "compatible", > + "ibm,power9-npu-opencapi-pciex", > + "ibm,ioda2-npu2-opencapi-phb"); > + > + dt_add_property_cells(dn_phb, "#address-cells", 3); > + dt_add_property_cells(dn_phb, "#size-cells", 2); > + dt_add_property_cells(dn_phb, "#interrupt-cells", 1); > + dt_add_property_cells(dn_phb, "bus-range", 0, 0xff); > + dt_add_property_cells(dn_phb, "clock-frequency", 0x200, 0); > + dt_add_property_cells(dn_phb, "interrupt-parent", get_ics_phandle()); > + > + dt_add_property_strings(dn_phb, "device_type", "pciex"); > + dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win)); > + dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index); > + dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id); > + dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base); > + dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle); > + dt_add_property_cells(dn_phb, "ibm,links", 1); > + dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win)); > + dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0); > + dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM); > + > + n->mm_base = mm_win[0]; > + n->mm_size = mm_win[1]; > + > + dt_add_property_cells(dn_phb, "ranges", 0x02000000, > + hi32(n->mm_base), lo32(n->mm_base), > + hi32(n->mm_base), lo32(n->mm_base), > + hi32(n->mm_size), lo32(n->mm_size)); > + > + dev->type = NPU2_DEV_TYPE_OPENCAPI; > + dev->npu = n; > + dev->dt_node = dn_link; > + dev->phb_ocapi.dt_node = dn_phb; > + dev->phb_ocapi.ops = &npu2_opencapi_ops; > + dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi; > + dev->phb_ocapi.scan_map = 1; > + dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); > + dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy"); > + dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask"); > + dev->bdfn = 0; > + n->total_devices++; > + > + /* Find I2C port for handling device reset */ > + snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d", > + dev->npu->chip_id, platform.ocapi->i2c_engine, > + platform.ocapi->i2c_port); > + prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name); > + > + dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") { > + if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) { > + dev->i2c_port_id_ocapi = dt_prop_get_u32(dn, "ibm,opal-id"); > + break; > + } > + } > + > + if (!dev->i2c_port_id_ocapi) { > + prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name); > + goto failed; > + } > + > + /* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */ > + /* Procedure 13.1.3.8 - AFU MMIO Range BARs */ > + setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev); > + /* Procedure 13.1.3.9 - AFU Config BARs */ > + setup_afu_config_bars(n->chip_id, n->xscom_base, dev); > + > + set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00); > + > + npu2_opencapi_phy_setup(dev); > + > + do { > + rc = odl_train(n->chip_id, dev->index, dev); > + } while (rc != OPAL_SUCCESS && --retries); > + > + if (rc != OPAL_SUCCESS && retries == 0) { > + /** > + * @fwts-label OCAPILinkTrainingFailed > + * @fwts-advice The OpenCAPI link training procedure failed. > + * This indicates a hardware or firmware bug. OpenCAPI > + * functionality will not be available on this link. > + */ > + prlog(PR_ERR, "OCAPI: Link %d on chip %u failed to train\n", > + dev->index, n->chip_id); > + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", > + get_odl_status(n->chip_id, dev->index)); > + goto failed; > + } > + > + otl_enabletx(n->chip_id, n->xscom_base, dev->index); > + > + slot = npu2_opencapi_slot_create(&dev->phb_ocapi); > + if (!slot) > + { > + /** > + * @fwts-label OCAPICannotCreatePHBSlot > + * @fwts-advice Firmware probably ran out of memory creating > + * NPU slot. OpenCAPI functionality could be broken. > + */ > + prlog(PR_ERR, "OCAPI: Cannot create PHB slot\n"); > + } > + > + pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID); > + return; > +failed: > + dt_add_property_string(dn_phb, "status", "error"); > + return; > +} > + > static void npu2_opencapi_probe(struct dt_node *dn) > { > struct dt_node *link; > @@ -774,7 +1321,7 @@ static void npu2_opencapi_probe(struct dt_node *dn) > uint64_t reg[2]; > uint64_t dev_index; > struct npu2 *n; > - int rc; > + int rc, i = 0; > > path = dt_get_path(dn); > gcid = dt_get_chip_id(dn); > @@ -792,6 +1339,8 @@ static void npu2_opencapi_probe(struct dt_node *dn) > gcid, index, links, path); > free(path); > > + assert(platform.ocapi); > + > scom_base = dt_get_address(dn, 0, NULL); > prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base); > > @@ -824,6 +1373,11 @@ static void npu2_opencapi_probe(struct dt_node *dn) > if (rc) > goto failed; > > + dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") { > + npu2_opencapi_setup_device(link, n, &n->devices[i]); > + i++; > + } > + > return; > failed: > free(n); > @@ -836,3 +1390,39 @@ void probe_npu2_opencapi(void) > dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu") > npu2_opencapi_probe(np_npu); > } > + > +static const struct phb_ops npu2_opencapi_ops = { > + .cfg_read8 = npu2_opencapi_pcicfg_read8, > + .cfg_read16 = npu2_opencapi_pcicfg_read16, > + .cfg_read32 = npu2_opencapi_pcicfg_read32, > + .cfg_write8 = npu2_opencapi_pcicfg_write8, > + .cfg_write16 = npu2_opencapi_pcicfg_write16, > + .cfg_write32 = npu2_opencapi_pcicfg_write32, > + .choose_bus = NULL, > + .device_init = NULL, > + .phb_final_fixup = npu2_opencapi_final_fixup, > + .ioda_reset = npu2_opencapi_ioda_reset, > + .papr_errinjct_reset = NULL, > + .pci_reinit = NULL, > + .set_phb_mem_window = NULL, > + .phb_mmio_enable = NULL, > + .map_pe_mmio_window = NULL, > + .map_pe_dma_window = NULL, > + .map_pe_dma_window_real = NULL, > + .pci_msi_eoi = NULL, > + .set_xive_pe = NULL, > + .get_msi_32 = NULL, > + .get_msi_64 = NULL, > + .set_pe = npu2_set_pe, > + .set_peltv = NULL, > + .eeh_freeze_status = npu2_freeze_status, /* TODO */ > + .eeh_freeze_clear = NULL, > + .eeh_freeze_set = NULL, > + .next_error = NULL, > + .err_inject = NULL, > + .get_diag_data = NULL, > + .get_diag_data2 = NULL, > + .set_capi_mode = NULL, > + .set_capp_recovery = NULL, > + .tce_kill = NULL, > +}; > diff --git a/hw/npu2.c b/hw/npu2.c > index 3e30f19..7f209de 100644 > --- a/hw/npu2.c > +++ b/hw/npu2.c > @@ -1001,17 +1001,17 @@ static int64_t npu2_map_pe_dma_window(struct phb *phb, > return OPAL_SUCCESS; > } > > -static int64_t npu2_set_pe(struct phb *phb, > - uint64_t pe_num, > - uint64_t bdfn, > - uint8_t bcompare, > - uint8_t dcompare, > - uint8_t fcompare, > - uint8_t action) > +int64_t npu2_set_pe(struct phb *phb, > + uint64_t pe_num, > + uint64_t bdfn, > + uint8_t bcompare, > + uint8_t dcompare, > + uint8_t fcompare, > + uint8_t action) > { > struct npu2 *p; > struct npu2_dev *dev; > - uint64_t reg, val; > + uint64_t reg, val, pe_bdfn; > > /* Sanity check */ > if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE) > @@ -1036,21 +1036,31 @@ static int64_t npu2_set_pe(struct phb *phb, > if (!dev) > return OPAL_PARAMETER; > > - val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; > - val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num); > - val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); > - > - if (!NPU2DEV_BRICK(dev)) > - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, > - NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0); > + if (dev->type == NPU2_DEV_TYPE_OPENCAPI) > + pe_bdfn = dev->bdfn; > else > - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, > - NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0); > + pe_bdfn = dev->nvlink.gpu_bdfn; > + > + if (dev->type == NPU2_DEV_TYPE_NVLINK) { > + val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; > + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num); > + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn); > + > + if (!NPU2DEV_BRICK(dev)) > + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, > + NPU2_BLOCK_CTL, > + NPU2_CQ_BRICK0_BDF2PE_MAP0); > + else > + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, > + NPU2_BLOCK_CTL, > + NPU2_CQ_BRICK1_BDF2PE_MAP0); > + > + npu2_write(p, reg, val); > + } > > - npu2_write(p, reg, val); > val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE; > val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num); > - val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); > + val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn); > reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, > NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18)); > p->bdf2pe_cache[dev->index] = val; > @@ -1111,12 +1121,12 @@ static struct pci_slot *npu2_slot_create(struct phb *phb) > return slot; > } > > -static int64_t npu2_freeze_status(struct phb *phb __unused, > - uint64_t pe_number __unused, > - uint8_t *freeze_state, > - uint16_t *pci_error_type __unused, > - uint16_t *severity __unused, > - uint64_t *phb_status __unused) > +int64_t npu2_freeze_status(struct phb *phb __unused, > + uint64_t pe_number __unused, > + uint8_t *freeze_state, > + uint16_t *pci_error_type __unused, > + uint16_t *severity __unused, > + uint64_t *phb_status __unused) > { > /* > * FIXME: When it's called by skiboot PCI config accessor, > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > index b219ad7..71c8d8f 100644 > --- a/include/npu2-regs.h > +++ b/include/npu2-regs.h > @@ -116,6 +116,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38) > #define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57) > #define NPU2_CQ_SM_MISC_CFG1 0x008 > +#define NPU2_CQ_SM_MISC_CFG2 0x148 > #define NPU2_PB_EPSILON 0x010 > #define NPU2_TIMER_CFG 0x018 > #define NPU2_GPU0_MEM_BAR 0x020 > @@ -187,7 +188,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52) > #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55) > #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56) > -#define NPU2_CQ_FUTURE_CFG1 0x008 > +#define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG 0x0B0 > +#define NPU2_CQ_CTL_MISC_MMIOPA_ADDR PPC_BITMASK(1,35) > +#define NPU2_CQ_CTL_MISC_MMIOPA_SIZE PPC_BITMASK(39,43) > +#define NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG 0x0B8 > +#define NPU2_CQ_CTL_MISC_CFG1 0x008 > #define NPU2_CQ_FUTURE_CFG2 0x010 > #define NPU2_CQ_FUTURE_CFG3 0x018 > #define NPU2_CQ_PERF_MATCH 0x020 > @@ -219,6 +224,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8 > #define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0 > #define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8 > +#define NPU2_CQ_CTL_CONFIG_ADDR0 0x120 > +#define NPU2_CQ_CTL_CONFIG_ADDR1 0x128 > +#define NPU2_CQ_CTL_CONFIG_ADDR_ENABLE PPC_BIT(0) > +#define NPU2_CQ_CTL_CONFIG_ADDR_STATUS PPC_BITMASK(1, 3) > +#define NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER PPC_BITMASK(4, 11) > +#define NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER PPC_BITMASK(12, 16) > +#define NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER PPC_BITMASK(17, 19) > +#define NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER PPC_BITMASK(20, 31) > +#define NPU2_CQ_CTL_CONFIG_ADDR_TYPE PPC_BIT(32) > #define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140 > #define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148 > #define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1) > @@ -331,9 +345,16 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31) > #define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39) > #define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63) > +#define NPU2_OTL_VC_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x090) > #define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058) > +#define NPU2_OTL_CONFIG1_TX_TEMP1_EN PPC_BIT(1) > +#define NPU2_OTL_CONFIG1_TX_TEMP2_EN PPC_BIT(2) > +#define NPU2_OTL_CONFIG1_TX_TEMP3_EN PPC_BIT(3) > #define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7) > #define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11) > +#define NPU2_OTL_CONFIG1_TX_TEMP1_RATE PPC_BITMASK(12, 15) > +#define NPU2_OTL_CONFIG1_TX_TEMP2_RATE PPC_BITMASK(16, 19) > +#define NPU2_OTL_CONFIG1_TX_TEMP3_RATE PPC_BITMASK(20, 23) > #define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34) > #define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39) > #define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45) > @@ -342,6 +363,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58) > #define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59) > #define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60) > +#define NPU2_OTL_CONFIG1_TX_T0_MASK_CRTN0 PPC_BIT(61) > +#define NPU2_OTL_CONFIG1_TX_T123_MASK_CRTN0 PPC_BIT(62) > +#define NPU2_OTL_CONFIG2(stack, block) NPU2_REG_OFFSET(stack, block, 0x0C0) > +#define NPU2_OTL_CONFIG2_TX_SEND_EN PPC_BIT(0) > + > +#define NPU2_OTL_OSL_DSISR(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) > +#define NPU2_OTL_OSL_DAR(stack, block) NPU2_REG_OFFSET(stack, block, 0x008) > +#define NPU2_OTL_OSL_TFC(stack, block) NPU2_REG_OFFSET(stack, block, 0x010) > +#define NPU2_OTL_OSL_PEHANDLE(stack, block) NPU2_REG_OFFSET(stack, block, 0x018) > > /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above > * there is only a single instance of each of these in the NPU so we > @@ -553,6 +583,26 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, > #define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52) > #define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57) > > +#define OB0_ODL0_CONFIG 0x901082A > +#define OB0_ODL1_CONFIG 0x901082B > +#define OB3_ODL0_CONFIG 0xC01082A > +#define OB3_ODL1_CONFIG 0xC01082B > +#define OB_ODL_CONFIG_RESET PPC_BIT(0) > +#define OB_ODL_CONFIG_VERSION PPC_BITMASK(2, 7) > +#define OB_ODL_CONFIG_TRAIN_MODE PPC_BITMASK(8, 11) > +#define OB_ODL_CONFIG_SUPPORTED_MODES PPC_BITMASK(12, 15) > +#define OB_ODL_CONFIG_X4_BACKOFF_ENABLE PPC_BIT(16) > +#define OB_ODL_CONFIG_PHY_CNTR_LIMIT PPC_BITMASK(20, 23) > +#define OB_ODL_CONFIG_DEBUG_ENABLE PPC_BIT(33) > +#define OB_ODL_CONFIG_FWD_PROGRESS_TIMER PPC_BITMASK(40, 43) > + > +#define OB0_ODL0_STATUS 0x901082C > +#define OB0_ODL1_STATUS 0x901082D > +#define OB3_ODL0_STATUS 0xC01082C > +#define OB3_ODL1_STATUS 0xC01082D > +#define OB_ODL_STATUS_TRAINED_MODE PPC_BITMASK(0,3) > +#define OB_ODL_STATUS_TRAINING_STATE_MACHINE PPC_BITMASK(49, 51) > + > #define OB0_ODL0_TRAINING_STATUS 0x901082E > #define OB0_ODL1_TRAINING_STATUS 0x901082F > #define OB3_ODL0_TRAINING_STATUS 0xC01082E > diff --git a/include/npu2.h b/include/npu2.h > index 2922d21..bd5bced 100644 > --- a/include/npu2.h > +++ b/include/npu2.h > @@ -207,5 +207,17 @@ extern int nv_zcal_nominal; > bool is_p9dd1(void); > void npu2_opencapi_phy_setup(struct npu2_dev *dev); > void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev); > - > +int64_t npu2_set_pe(struct phb *phb, > + uint64_t pe_num, > + uint64_t bdfn, > + uint8_t bcompare, > + uint8_t dcompare, > + uint8_t fcompare, > + uint8_t action); > +int64_t npu2_freeze_status(struct phb *phb __unused, > + uint64_t pe_number __unused, > + uint8_t *freeze_state, > + uint16_t *pci_error_type __unused, > + uint16_t *severity __unused, > + uint64_t *phb_status __unused); > #endif /* __NPU2_H */ >
diff --git a/core/pci.c b/core/pci.c index 0809521..c712263 100644 --- a/core/pci.c +++ b/core/pci.c @@ -1549,7 +1549,8 @@ static void pci_add_one_device_node(struct phb *phb, pd->dn = np = dt_new(parent_node, name); /* XXX FIXME: make proper "compatible" properties */ - if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) || + phb->phb_type == phb_type_npu_v2_opencapi) { snprintf(compat, MAX_NAME, "pciex%x,%x", vdid & 0xffff, vdid >> 16); dt_add_property_cells(np, "ibm,pci-config-space-type", 1); diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c index a66c8f7..d361b84 100644 --- a/hw/npu2-opencapi.c +++ b/hw/npu2-opencapi.c @@ -53,6 +53,9 @@ #include <i2c.h> #define NPU_IRQ_LEVELS 35 +#define NPU_IRQ_LEVELS_XSL 23 + +static const struct phb_ops npu2_opencapi_ops; static inline uint64_t index_to_stack(uint64_t index) { switch (index) { @@ -99,6 +102,28 @@ static inline uint64_t index_to_block(uint64_t index) { } } +static uint64_t get_odl_status(uint32_t gcid, uint64_t index) { + uint64_t reg, status_xscom; + switch (index) { + case 2: + status_xscom = OB0_ODL0_STATUS; + break; + case 3: + status_xscom = OB0_ODL1_STATUS; + break; + case 4: + status_xscom = OB3_ODL0_STATUS; + break; + case 5: + status_xscom = OB3_ODL1_STATUS; + break; + default: + assert(false); + } + xscom_read(gcid, status_xscom, ®); + return reg; +} + /* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */ static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base, @@ -673,6 +698,397 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base, reg[1] = size; } +/* Procedure 13.1.3.8 - AFU MMIO Range BARs */ +static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base, + struct npu2_dev *dev) +{ + uint64_t stack = index_to_stack(dev->index); + uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? + NPU2_NTL0_BAR : NPU2_NTL1_BAR; + uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ? + NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG : + NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG; + uint64_t addr, size, reg; + + prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__); + phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size); + + prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size); + write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr, + size); + dev->bars[0].npu2_bar.base = addr; + dev->bars[0].npu2_bar.size = size; + + reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16); + reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16)); + prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg); + npu2_scom_write(gcid, scom_base, + NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL, + pa_offset), + NPU2_MISC_DA_LEN_8B, reg); +} + +/* Procedure 13.1.3.9 - AFU Config BARs */ +static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base, + struct npu2_dev *dev) +{ + uint64_t stack = index_to_stack(dev->index); + int stack_num = stack - NPU2_STACK_STCK_0; + uint64_t addr, size; + + prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__); + phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size); + prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr); + write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR), + addr, size); + dev->bars[1].npu2_bar.base = addr; + dev->bars[1].npu2_bar.size = size; +} + +static void otl_enabletx(uint32_t gcid, uint32_t scom_base, uint64_t index) +{ + uint64_t stack = index_to_stack(index); + uint64_t block = index_to_block(index); + uint64_t reg; + + /* OTL Config 2 Register */ + /* Transmit Enable */ + prlog(PR_DEBUG, "OCAPI: %s: Enabling TX\n", __func__); + reg = 0; + reg |= NPU2_OTL_CONFIG2_TX_SEND_EN; + npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG2(stack, block), + NPU2_MISC_DA_LEN_8B, reg); + + reg = npu2_scom_read(gcid, scom_base, NPU2_OTL_VC_CREDITS(stack, block), + NPU2_MISC_DA_LEN_8B); + prlog(PR_DEBUG, "OCAPI: credit counter: %llx\n", reg); + /* TODO: Abort if credits are zero */ +} + +static void reset_ocapi_device(struct npu2_dev *dev) +{ + uint8_t data[3]; + int rc; + int i; + + switch (dev->index) { + case 2: + case 4: + memcpy(data, platform.ocapi->i2c_odl0_data, sizeof(data)); + break; + case 3: + case 5: + memcpy(data, platform.ocapi->i2c_odl1_data, sizeof(data)); + break; + default: + assert(false); + } + + for (i = 0; i < 3; i++) { + rc = i2c_request_send(dev->i2c_port_id_ocapi, 0x20, SMBUS_WRITE, + platform.ocapi->i2c_offset[i], 1, + &data[i], sizeof(data[i]), 120); + if (rc) { + /** + * @fwts-label OCAPIDeviceResetFailed + * @fwts-advice There was an error attempting to send + * a reset signal over I2C to the OpenCAPI device. + */ + prlog(PR_ERR, "OCAPI: Error writing I2C reset signal: %d\n", rc); + break; + } + if (i != 0) + time_wait_ms(5); + } +} + +static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev) +{ + uint64_t reg, config_xscom; + int timeout = 3000; + prlog(PR_DEBUG, "OCAPI: %s: Training ODL\n", __func__); + + switch (index) { + case 2: + config_xscom = OB0_ODL0_CONFIG; + break; + case 3: + config_xscom = OB0_ODL1_CONFIG; + break; + case 4: + config_xscom = OB3_ODL0_CONFIG; + break; + case 5: + config_xscom = OB3_ODL1_CONFIG; + break; + default: + assert(false); + } + + /* Reset ODL */ + reg = OB_ODL_CONFIG_RESET; + reg = SETFIELD(OB_ODL_CONFIG_VERSION, reg, 0b000001); + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0110); + reg = SETFIELD(OB_ODL_CONFIG_SUPPORTED_MODES, reg, 0b0010); + reg |= OB_ODL_CONFIG_X4_BACKOFF_ENABLE; + reg = SETFIELD(OB_ODL_CONFIG_PHY_CNTR_LIMIT, reg, 0b1111); + reg |= OB_ODL_CONFIG_DEBUG_ENABLE; + reg = SETFIELD(OB_ODL_CONFIG_FWD_PROGRESS_TIMER, reg, 0b0110); + xscom_write(gcid, config_xscom, reg); + + reg &= ~OB_ODL_CONFIG_RESET; + xscom_write(gcid, config_xscom, reg); + + reset_ocapi_device(dev); + + /* Transmit Pattern A */ + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0001); + xscom_write(gcid, config_xscom, reg); + time_wait_ms(5); + + /* Bump lanes - this improves training reliability */ + npu2_opencapi_bump_ui_lane(dev); + + /* Start training */ + reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b1000); + xscom_write(gcid, config_xscom, reg); + + do { + reg = get_odl_status(gcid, index); + if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) { + prlog(PR_NOTICE, + "OCAPI: Link %d on chip %u trained in %dms\n", + index, gcid, 3000 - timeout); + return OPAL_SUCCESS; + } + time_wait_ms(1); + } while (timeout--); + prlog(PR_INFO, "OCAPI: Link %d on chip %u failed to train, retrying\n", + index, gcid); + prlog(PR_INFO, "OCAPI: Link status: %016llx\n", reg); + return OPAL_HARDWARE; +} + +static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val) +{ + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb); + uint64_t reg; + int64_t link_width, rc = OPAL_SUCCESS; + + reg = get_odl_status(dev->npu->chip_id, dev->index); + link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg); + switch (link_width) { + case 0b0001: + *val = OPAL_SHPC_LINK_UP_x4; + break; + case 0b0010: + *val = OPAL_SHPC_LINK_UP_x8; + break; + default: + rc = OPAL_HARDWARE; + } + return rc; +} + +static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb) +{ + struct pci_slot *slot; + + slot = pci_slot_alloc(phb, NULL); + if (!slot) + return slot; + + /* TODO: Figure out other slot functions */ + slot->ops.get_presence_state = NULL; + slot->ops.get_link_state = npu2_opencapi_get_link_state; + slot->ops.get_power_state = NULL; + slot->ops.get_attention_state = NULL; + slot->ops.get_latch_state = NULL; + slot->ops.set_power_state = NULL; + slot->ops.set_attention_state = NULL; + + return slot; +} + +static int64_t npu2_opencapi_pcicfg_check(struct npu2_dev *dev, uint32_t offset, + uint32_t size) +{ + if (!dev || offset > 0xfff || (offset & (size - 1))) + return OPAL_PARAMETER; + + return OPAL_SUCCESS; +} + +static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t size, + void *data) +{ + uint64_t cfg_addr; + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); + uint64_t genid_base; + int64_t rc; + + rc = npu2_opencapi_pcicfg_check(dev, offset, size); + if (rc) + return rc; + + genid_base = dev->bars[1].npu2_bar.base + + (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); + + cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | + NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER | + NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER, + cfg_addr, bdfn); + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER, + cfg_addr, offset & ~3u); + + out_be64((uint64_t *)genid_base, cfg_addr); + sync(); + + switch (size) { + case 1: + *((uint8_t *)data) = + in_8((volatile uint8_t *)(genid_base + 128 + (offset & 3))); + break; + case 2: + *((uint16_t *)data) = + in_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2))); + break; + case 4: + *((uint32_t *)data) = in_le32((volatile uint32_t *)(genid_base + 128)); + break; + default: + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +#define NPU2_OPENCAPI_PCI_CFG_READ(size, type) \ +static int64_t npu2_opencapi_pcicfg_read##size(struct phb *phb, \ + uint32_t bdfn, \ + uint32_t offset, \ + type *data) \ +{ \ + /* Initialize data in case of error */ \ + *data = (type)0xffffffff; \ + return npu2_opencapi_pcicfg_read(phb, bdfn, offset, \ + sizeof(type), data); \ +} + +static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn, + uint32_t offset, uint32_t size, + uint32_t data) +{ + uint64_t cfg_addr; + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); + uint64_t genid_base; + int64_t rc; + + rc = npu2_opencapi_pcicfg_check(dev, offset, size); + if (rc) + return rc; + + genid_base = dev->bars[1].npu2_bar.base + + (index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0); + + cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE; + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER | + NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER | + NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER, + cfg_addr, bdfn); + cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER, + cfg_addr, offset & ~3u); + + out_be64((uint64_t *)genid_base, cfg_addr); + sync(); + + switch (size) { + case 1: + out_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)), + data); + break; + case 2: + out_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)), + data); + break; + case 4: + out_le32((volatile uint32_t *)(genid_base + 128), data); + break; + default: + return OPAL_PARAMETER; + } + + return OPAL_SUCCESS; +} + +#define NPU2_OPENCAPI_PCI_CFG_WRITE(size, type) \ +static int64_t npu2_opencapi_pcicfg_write##size(struct phb *phb, \ + uint32_t bdfn, \ + uint32_t offset, \ + type data) \ +{ \ + return npu2_opencapi_pcicfg_write(phb, bdfn, offset, \ + sizeof(type), data); \ +} + +NPU2_OPENCAPI_PCI_CFG_READ(8, u8) +NPU2_OPENCAPI_PCI_CFG_READ(16, u16) +NPU2_OPENCAPI_PCI_CFG_READ(32, u32) +NPU2_OPENCAPI_PCI_CFG_WRITE(8, u8) +NPU2_OPENCAPI_PCI_CFG_WRITE(16, u16) +NPU2_OPENCAPI_PCI_CFG_WRITE(32, u32) + +static int64_t npu2_opencapi_ioda_reset(struct phb __unused *phb, + bool __unused purge) +{ + /* Not relevant to OpenCAPI - we do this just to silence the error */ + return OPAL_SUCCESS; +} + +static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd, + void *data __unused) +{ + uint32_t irq; + struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb); + uint64_t block = index_to_block(dev->index); + uint64_t stacku = index_to_stacku(dev->index); + uint64_t dsisr, dar, tfc, handle; + + /* + * Pass the hw irq number for the translation fault irq + * irq levels 23 -> 26 are for translation faults, 1 per brick + */ + irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL; + if (stacku == NPU2_STACK_STCK_2U) + irq += 2; + if (block == NPU2_BLOCK_OTL1) + irq++; + + /* + * Add the addresses of the registers needed by the OS to handle + * faults. The OS accesses them by mmio. + */ + dsisr = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block); + dar = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block); + tfc = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block); + handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku, + block); + dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq); + dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio", + hi32(dsisr), lo32(dsisr), + hi32(dar), lo32(dar), + hi32(tfc), lo32(tfc), + hi32(handle), lo32(handle)); + return 0; +} + +static void npu2_opencapi_final_fixup(struct phb *phb) +{ + pci_walk_dev(phb, NULL, npu2_add_mmio_regs, NULL); +} + static void mask_nvlink_fir(struct npu2 *p) { uint64_t reg; @@ -766,6 +1182,137 @@ static int setup_irq(struct npu2 *p) return 0; } +static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n, + struct npu2_dev *dev) +{ + uint32_t dev_index, npu_index; + struct dt_node *dn_phb, *dn; + struct pci_slot *slot; + char port_name[17]; + uint64_t mm_win[2]; + int retries = 20; + int rc; + + dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); + npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index"); + + /* Populate PHB device node */ + phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0], + &mm_win[1]); + prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n", + mm_win[0], mm_win[1]); + dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]); + assert(dn_phb); + dt_add_property_strings(dn_phb, + "compatible", + "ibm,power9-npu-opencapi-pciex", + "ibm,ioda2-npu2-opencapi-phb"); + + dt_add_property_cells(dn_phb, "#address-cells", 3); + dt_add_property_cells(dn_phb, "#size-cells", 2); + dt_add_property_cells(dn_phb, "#interrupt-cells", 1); + dt_add_property_cells(dn_phb, "bus-range", 0, 0xff); + dt_add_property_cells(dn_phb, "clock-frequency", 0x200, 0); + dt_add_property_cells(dn_phb, "interrupt-parent", get_ics_phandle()); + + dt_add_property_strings(dn_phb, "device_type", "pciex"); + dt_add_property(dn_phb, "reg", mm_win, sizeof(mm_win)); + dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index); + dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id); + dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base); + dt_add_property_cells(dn_phb, "ibm,npcq", n->dt_node->phandle); + dt_add_property_cells(dn_phb, "ibm,links", 1); + dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win)); + dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0); + dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM); + + n->mm_base = mm_win[0]; + n->mm_size = mm_win[1]; + + dt_add_property_cells(dn_phb, "ranges", 0x02000000, + hi32(n->mm_base), lo32(n->mm_base), + hi32(n->mm_base), lo32(n->mm_base), + hi32(n->mm_size), lo32(n->mm_size)); + + dev->type = NPU2_DEV_TYPE_OPENCAPI; + dev->npu = n; + dev->dt_node = dn_link; + dev->phb_ocapi.dt_node = dn_phb; + dev->phb_ocapi.ops = &npu2_opencapi_ops; + dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi; + dev->phb_ocapi.scan_map = 1; + dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index"); + dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy"); + dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask"); + dev->bdfn = 0; + n->total_devices++; + + /* Find I2C port for handling device reset */ + snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d", + dev->npu->chip_id, platform.ocapi->i2c_engine, + platform.ocapi->i2c_port); + prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name); + + dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") { + if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) { + dev->i2c_port_id_ocapi = dt_prop_get_u32(dn, "ibm,opal-id"); + break; + } + } + + if (!dev->i2c_port_id_ocapi) { + prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name); + goto failed; + } + + /* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */ + /* Procedure 13.1.3.8 - AFU MMIO Range BARs */ + setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev); + /* Procedure 13.1.3.9 - AFU Config BARs */ + setup_afu_config_bars(n->chip_id, n->xscom_base, dev); + + set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00); + + npu2_opencapi_phy_setup(dev); + + do { + rc = odl_train(n->chip_id, dev->index, dev); + } while (rc != OPAL_SUCCESS && --retries); + + if (rc != OPAL_SUCCESS && retries == 0) { + /** + * @fwts-label OCAPILinkTrainingFailed + * @fwts-advice The OpenCAPI link training procedure failed. + * This indicates a hardware or firmware bug. OpenCAPI + * functionality will not be available on this link. + */ + prlog(PR_ERR, "OCAPI: Link %d on chip %u failed to train\n", + dev->index, n->chip_id); + prlog(PR_ERR, "OCAPI: Final link status: %016llx\n", + get_odl_status(n->chip_id, dev->index)); + goto failed; + } + + otl_enabletx(n->chip_id, n->xscom_base, dev->index); + + slot = npu2_opencapi_slot_create(&dev->phb_ocapi); + if (!slot) + { + /** + * @fwts-label OCAPICannotCreatePHBSlot + * @fwts-advice Firmware probably ran out of memory creating + * NPU slot. OpenCAPI functionality could be broken. + */ + prlog(PR_ERR, "OCAPI: Cannot create PHB slot\n"); + } + + pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID); + return; +failed: + dt_add_property_string(dn_phb, "status", "error"); + return; +} + static void npu2_opencapi_probe(struct dt_node *dn) { struct dt_node *link; @@ -774,7 +1321,7 @@ static void npu2_opencapi_probe(struct dt_node *dn) uint64_t reg[2]; uint64_t dev_index; struct npu2 *n; - int rc; + int rc, i = 0; path = dt_get_path(dn); gcid = dt_get_chip_id(dn); @@ -792,6 +1339,8 @@ static void npu2_opencapi_probe(struct dt_node *dn) gcid, index, links, path); free(path); + assert(platform.ocapi); + scom_base = dt_get_address(dn, 0, NULL); prlog(PR_INFO, "OCAPI: SCOM Base: %08x\n", scom_base); @@ -824,6 +1373,11 @@ static void npu2_opencapi_probe(struct dt_node *dn) if (rc) goto failed; + dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") { + npu2_opencapi_setup_device(link, n, &n->devices[i]); + i++; + } + return; failed: free(n); @@ -836,3 +1390,39 @@ void probe_npu2_opencapi(void) dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu") npu2_opencapi_probe(np_npu); } + +static const struct phb_ops npu2_opencapi_ops = { + .cfg_read8 = npu2_opencapi_pcicfg_read8, + .cfg_read16 = npu2_opencapi_pcicfg_read16, + .cfg_read32 = npu2_opencapi_pcicfg_read32, + .cfg_write8 = npu2_opencapi_pcicfg_write8, + .cfg_write16 = npu2_opencapi_pcicfg_write16, + .cfg_write32 = npu2_opencapi_pcicfg_write32, + .choose_bus = NULL, + .device_init = NULL, + .phb_final_fixup = npu2_opencapi_final_fixup, + .ioda_reset = npu2_opencapi_ioda_reset, + .papr_errinjct_reset = NULL, + .pci_reinit = NULL, + .set_phb_mem_window = NULL, + .phb_mmio_enable = NULL, + .map_pe_mmio_window = NULL, + .map_pe_dma_window = NULL, + .map_pe_dma_window_real = NULL, + .pci_msi_eoi = NULL, + .set_xive_pe = NULL, + .get_msi_32 = NULL, + .get_msi_64 = NULL, + .set_pe = npu2_set_pe, + .set_peltv = NULL, + .eeh_freeze_status = npu2_freeze_status, /* TODO */ + .eeh_freeze_clear = NULL, + .eeh_freeze_set = NULL, + .next_error = NULL, + .err_inject = NULL, + .get_diag_data = NULL, + .get_diag_data2 = NULL, + .set_capi_mode = NULL, + .set_capp_recovery = NULL, + .tce_kill = NULL, +}; diff --git a/hw/npu2.c b/hw/npu2.c index 3e30f19..7f209de 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -1001,17 +1001,17 @@ static int64_t npu2_map_pe_dma_window(struct phb *phb, return OPAL_SUCCESS; } -static int64_t npu2_set_pe(struct phb *phb, - uint64_t pe_num, - uint64_t bdfn, - uint8_t bcompare, - uint8_t dcompare, - uint8_t fcompare, - uint8_t action) +int64_t npu2_set_pe(struct phb *phb, + uint64_t pe_num, + uint64_t bdfn, + uint8_t bcompare, + uint8_t dcompare, + uint8_t fcompare, + uint8_t action) { struct npu2 *p; struct npu2_dev *dev; - uint64_t reg, val; + uint64_t reg, val, pe_bdfn; /* Sanity check */ if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE) @@ -1036,21 +1036,31 @@ static int64_t npu2_set_pe(struct phb *phb, if (!dev) return OPAL_PARAMETER; - val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; - val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num); - val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); - - if (!NPU2DEV_BRICK(dev)) - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, - NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0); + if (dev->type == NPU2_DEV_TYPE_OPENCAPI) + pe_bdfn = dev->bdfn; else - reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, - NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0); + pe_bdfn = dev->nvlink.gpu_bdfn; + + if (dev->type == NPU2_DEV_TYPE_NVLINK) { + val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE; + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num); + val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn); + + if (!NPU2DEV_BRICK(dev)) + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + NPU2_BLOCK_CTL, + NPU2_CQ_BRICK0_BDF2PE_MAP0); + else + reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2, + NPU2_BLOCK_CTL, + NPU2_CQ_BRICK1_BDF2PE_MAP0); + + npu2_write(p, reg, val); + } - npu2_write(p, reg, val); val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE; val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num); - val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn); + val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn); reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18)); p->bdf2pe_cache[dev->index] = val; @@ -1111,12 +1121,12 @@ static struct pci_slot *npu2_slot_create(struct phb *phb) return slot; } -static int64_t npu2_freeze_status(struct phb *phb __unused, - uint64_t pe_number __unused, - uint8_t *freeze_state, - uint16_t *pci_error_type __unused, - uint16_t *severity __unused, - uint64_t *phb_status __unused) +int64_t npu2_freeze_status(struct phb *phb __unused, + uint64_t pe_number __unused, + uint8_t *freeze_state, + uint16_t *pci_error_type __unused, + uint16_t *severity __unused, + uint64_t *phb_status __unused) { /* * FIXME: When it's called by skiboot PCI config accessor, diff --git a/include/npu2-regs.h b/include/npu2-regs.h index b219ad7..71c8d8f 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -116,6 +116,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38) #define NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57) #define NPU2_CQ_SM_MISC_CFG1 0x008 +#define NPU2_CQ_SM_MISC_CFG2 0x148 #define NPU2_PB_EPSILON 0x010 #define NPU2_TIMER_CFG 0x018 #define NPU2_GPU0_MEM_BAR 0x020 @@ -187,7 +188,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52) #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55) #define NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56) -#define NPU2_CQ_FUTURE_CFG1 0x008 +#define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG 0x0B0 +#define NPU2_CQ_CTL_MISC_MMIOPA_ADDR PPC_BITMASK(1,35) +#define NPU2_CQ_CTL_MISC_MMIOPA_SIZE PPC_BITMASK(39,43) +#define NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG 0x0B8 +#define NPU2_CQ_CTL_MISC_CFG1 0x008 #define NPU2_CQ_FUTURE_CFG2 0x010 #define NPU2_CQ_FUTURE_CFG3 0x018 #define NPU2_CQ_PERF_MATCH 0x020 @@ -219,6 +224,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_CQ_C_ERR_RPT_MASK1 0x0E8 #define NPU2_CQ_C_ERR_RPT_HOLD0 0x0F0 #define NPU2_CQ_C_ERR_RPT_HOLD1 0x0F8 +#define NPU2_CQ_CTL_CONFIG_ADDR0 0x120 +#define NPU2_CQ_CTL_CONFIG_ADDR1 0x128 +#define NPU2_CQ_CTL_CONFIG_ADDR_ENABLE PPC_BIT(0) +#define NPU2_CQ_CTL_CONFIG_ADDR_STATUS PPC_BITMASK(1, 3) +#define NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER PPC_BITMASK(4, 11) +#define NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER PPC_BITMASK(12, 16) +#define NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER PPC_BITMASK(17, 19) +#define NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER PPC_BITMASK(20, 31) +#define NPU2_CQ_CTL_CONFIG_ADDR_TYPE PPC_BIT(32) #define NPU2_CQ_CTL_FENCE_CONTROL_0 0x140 #define NPU2_CQ_CTL_FENCE_CONTROL_1 0x148 #define NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1) @@ -331,9 +345,16 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_OTL_TLX_CREDITS_VC3_CREDITS PPC_BITMASK(24, 31) #define NPU2_OTL_TLX_CREDITS_DCP0_CREDITS PPC_BITMASK(32, 39) #define NPU2_OTL_TLX_CREDITS_DCP1_CREDITS PPC_BITMASK(56, 63) +#define NPU2_OTL_VC_CREDITS(stack, block) NPU2_REG_OFFSET(stack, block, 0x090) #define NPU2_OTL_CONFIG1(stack, block) NPU2_REG_OFFSET(stack, block, 0x058) +#define NPU2_OTL_CONFIG1_TX_TEMP1_EN PPC_BIT(1) +#define NPU2_OTL_CONFIG1_TX_TEMP2_EN PPC_BIT(2) +#define NPU2_OTL_CONFIG1_TX_TEMP3_EN PPC_BIT(3) #define NPU2_OTL_CONFIG1_TX_DRDY_WAIT PPC_BITMASK(5, 7) #define NPU2_OTL_CONFIG1_TX_TEMP0_RATE PPC_BITMASK(8, 11) +#define NPU2_OTL_CONFIG1_TX_TEMP1_RATE PPC_BITMASK(12, 15) +#define NPU2_OTL_CONFIG1_TX_TEMP2_RATE PPC_BITMASK(16, 19) +#define NPU2_OTL_CONFIG1_TX_TEMP3_RATE PPC_BITMASK(20, 23) #define NPU2_OTL_CONFIG1_TX_CRET_FREQ PPC_BITMASK(32, 34) #define NPU2_OTL_CONFIG1_TX_AGE_FREQ PPC_BITMASK(35, 39) #define NPU2_OTL_CONFIG1_TX_RS2_HPWAIT PPC_BITMASK(40, 45) @@ -342,6 +363,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS PPC_BIT(58) #define NPU2_OTL_CONFIG1_TX_STOP_LINK PPC_BIT(59) #define NPU2_OTL_CONFIG1_TX_STOP_ON_UE PPC_BIT(60) +#define NPU2_OTL_CONFIG1_TX_T0_MASK_CRTN0 PPC_BIT(61) +#define NPU2_OTL_CONFIG1_TX_T123_MASK_CRTN0 PPC_BIT(62) +#define NPU2_OTL_CONFIG2(stack, block) NPU2_REG_OFFSET(stack, block, 0x0C0) +#define NPU2_OTL_CONFIG2_TX_SEND_EN PPC_BIT(0) + +#define NPU2_OTL_OSL_DSISR(stack, block) NPU2_REG_OFFSET(stack, block, 0x000) +#define NPU2_OTL_OSL_DAR(stack, block) NPU2_REG_OFFSET(stack, block, 0x008) +#define NPU2_OTL_OSL_TFC(stack, block) NPU2_REG_OFFSET(stack, block, 0x010) +#define NPU2_OTL_OSL_PEHANDLE(stack, block) NPU2_REG_OFFSET(stack, block, 0x018) /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above * there is only a single instance of each of these in the NPU so we @@ -553,6 +583,26 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base, #define PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE PPC_BIT(52) #define PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE PPC_BIT(57) +#define OB0_ODL0_CONFIG 0x901082A +#define OB0_ODL1_CONFIG 0x901082B +#define OB3_ODL0_CONFIG 0xC01082A +#define OB3_ODL1_CONFIG 0xC01082B +#define OB_ODL_CONFIG_RESET PPC_BIT(0) +#define OB_ODL_CONFIG_VERSION PPC_BITMASK(2, 7) +#define OB_ODL_CONFIG_TRAIN_MODE PPC_BITMASK(8, 11) +#define OB_ODL_CONFIG_SUPPORTED_MODES PPC_BITMASK(12, 15) +#define OB_ODL_CONFIG_X4_BACKOFF_ENABLE PPC_BIT(16) +#define OB_ODL_CONFIG_PHY_CNTR_LIMIT PPC_BITMASK(20, 23) +#define OB_ODL_CONFIG_DEBUG_ENABLE PPC_BIT(33) +#define OB_ODL_CONFIG_FWD_PROGRESS_TIMER PPC_BITMASK(40, 43) + +#define OB0_ODL0_STATUS 0x901082C +#define OB0_ODL1_STATUS 0x901082D +#define OB3_ODL0_STATUS 0xC01082C +#define OB3_ODL1_STATUS 0xC01082D +#define OB_ODL_STATUS_TRAINED_MODE PPC_BITMASK(0,3) +#define OB_ODL_STATUS_TRAINING_STATE_MACHINE PPC_BITMASK(49, 51) + #define OB0_ODL0_TRAINING_STATUS 0x901082E #define OB0_ODL1_TRAINING_STATUS 0x901082F #define OB3_ODL0_TRAINING_STATUS 0xC01082E diff --git a/include/npu2.h b/include/npu2.h index 2922d21..bd5bced 100644 --- a/include/npu2.h +++ b/include/npu2.h @@ -207,5 +207,17 @@ extern int nv_zcal_nominal; bool is_p9dd1(void); void npu2_opencapi_phy_setup(struct npu2_dev *dev); void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev); - +int64_t npu2_set_pe(struct phb *phb, + uint64_t pe_num, + uint64_t bdfn, + uint8_t bcompare, + uint8_t dcompare, + uint8_t fcompare, + uint8_t action); +int64_t npu2_freeze_status(struct phb *phb __unused, + uint64_t pe_number __unused, + uint8_t *freeze_state, + uint16_t *pci_error_type __unused, + uint16_t *severity __unused, + uint64_t *phb_status __unused); #endif /* __NPU2_H */