[7/9] npu2-opencapi: Train OpenCAPI links and setup devices

Message ID c57f38cdc48a386e503bdc8e98b3d3a87926c5cf.1513579137.git-series.andrew.donnellan@au1.ibm.com
State Changes Requested
Headers show
Series
  • Initial OpenCAPI 3.0 Support for P9
Related show

Commit Message

Andrew Donnellan Dec. 18, 2017, 7:07 a.m.
Scan the OpenCAPI links under the NPU, and for each link, reset the card,
set up a device, train the link and register a PHB.

Implement the necessary operations for the OpenCAPI PHB type, and adapt the
existing npu2_set_pe() function to let us assign a PE number to an OpenCAPI
device. (For now, the PE numbers we assign to OpenCAPI devices are somewhat
meaningless - later on, we'll implement a mapping between Linux-allocated
PE numbers and a separate numbering space for PE numbers we actually
configure in the NPU.)

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
---
 hw/npu2-opencapi.c  | 587 ++++++++++++++++++++++++++++++++++++++++++++-
 hw/npu2.c           |  60 ++---
 include/npu2-regs.h |  56 +++-
 include/npu2.h      |  14 +-
 4 files changed, 689 insertions(+), 28 deletions(-)

Comments

Frederic Barrat Jan. 11, 2018, 2:42 p.m. | #1
Le 18/12/2017 à 08:07, Andrew Donnellan a écrit :
> Scan the OpenCAPI links under the NPU, and for each link, reset the card,
> set up a device, train the link and register a PHB.
> 
> Implement the necessary operations for the OpenCAPI PHB type, and adapt the
> existing npu2_set_pe() function to let us assign a PE number to an OpenCAPI
> device. (For now, the PE numbers we assign to OpenCAPI devices are somewhat
> meaningless - later on, we'll implement a mapping between Linux-allocated
> PE numbers and a separate numbering space for PE numbers we actually
> configure in the NPU.)
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> Signed-off-by: Frederic Barrat <fbarrat@linux.vnet.ibm.com>
> ---
>   hw/npu2-opencapi.c  | 587 ++++++++++++++++++++++++++++++++++++++++++++-
>   hw/npu2.c           |  60 ++---
>   include/npu2-regs.h |  56 +++-
>   include/npu2.h      |  14 +-
>   4 files changed, 689 insertions(+), 28 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 55d5cd9..ea55dad 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -53,6 +53,9 @@
>   #include <i2c.h>
> 
>   #define NPU_IRQ_LEVELS		35
> +#define NPU_IRQ_LEVELS_XSL	23
> +
> +static const struct phb_ops npu2_opencapi_ops;
> 
>   static inline uint64_t index_to_stack(uint64_t index) {
>   	switch (index) {
> @@ -99,6 +102,28 @@ static inline uint64_t index_to_block(uint64_t index) {
>   	}
>   }
> 
> +static uint64_t get_odl_status(uint32_t gcid, uint64_t index) {
> +	uint64_t reg, status_xscom;
> +	switch (index) {
> +	case 2:
> +		status_xscom = OB0_ODL0_STATUS;
> +		break;
> +	case 3:
> +		status_xscom = OB0_ODL1_STATUS;
> +		break;
> +	case 4:
> +		status_xscom = OB3_ODL0_STATUS;
> +		break;
> +	case 5:
> +		status_xscom = OB3_ODL1_STATUS;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +	xscom_read(gcid, status_xscom, &reg);
> +	return reg;
> +}
> +
>   /* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
> 
>   static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base,
> @@ -674,6 +699,420 @@ static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
>   	reg[1] = size;
>   }
> 
> +/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> +static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
> +				struct npu2_dev *dev)
> +{
> +	uint64_t stack = index_to_stack(dev->index);
> +	uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
> +		NPU2_NTL0_BAR : NPU2_NTL1_BAR;
> +	uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
> +		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
> +		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
> +	uint64_t addr, size, reg;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
> +	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size);
> +
> +	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
> +	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
> +		size);
> +	dev->bars[0].npu2_bar.base = addr;
> +	dev->bars[0].npu2_bar.size = size;
> +
> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
> +	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
> +	npu2_scom_write(gcid, scom_base,
> +			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
> +					pa_offset),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +}
> +
> +/* Procedure 13.1.3.9 - AFU Config BARs */
> +static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
> +				  struct npu2_dev *dev)
> +{
> +	uint64_t stack = index_to_stack(dev->index);
> +	int stack_num = stack - NPU2_STACK_STCK_0;
> +	uint64_t addr, size;
> +
> +	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
> +	phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
> +	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
> +	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
> +		addr, size);
> +	dev->bars[1].npu2_bar.base = addr;
> +	dev->bars[1].npu2_bar.size = size;
> +}
> +
> +static void otl_enabletx(uint32_t gcid, uint32_t scom_base, uint64_t index)
> +{
> +	uint64_t stack = index_to_stack(index);
> +	uint64_t block = index_to_block(index);
> +	uint64_t reg;
> +
> +	/* OTL Config 2 Register */
> +	/* Transmit Enable */
> +	prlog(PR_DEBUG, "OCAPI: %s: Enabling TX\n", __func__);
> +	reg = 0;
> +	reg |= NPU2_OTL_CONFIG2_TX_SEND_EN;
> +	npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG2(stack, block),
> +			NPU2_MISC_DA_LEN_8B, reg);
> +
> +	reg = npu2_scom_read(gcid, scom_base, NPU2_OTL_VC_CREDITS(stack, block),
> +			     NPU2_MISC_DA_LEN_8B);
> +	prlog(PR_DEBUG, "OCAPI: credit counter: %llx\n", reg);
> +	/* TODO: Abort if credits are zero */
> +}
> +
> +static void reset_ocapi_device(uint32_t gcid, int index)
> +{
> +	struct dt_node *dn;
> +	char port_name[17];
> +	uint32_t opal_id = 0;
> +	uint8_t data[3];
> +	int rc;
> +	int i;
> +
> +	assert(platform.ocapi);
> +
> +	if (platform.ocapi->i2c_voltage_18) {
> +		xscom_write_mask(gcid, PERV_ROOT_CTRL2,
> +				 PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN,
> +				 PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN);
> +	}
> +
> +	switch (index) {
> +	case 2:
> +	case 4:
> +		memcpy(data, platform.ocapi->i2c_odl0_data, sizeof(data));
> +		break;
> +	case 3:
> +	case 5:
> +		memcpy(data, platform.ocapi->i2c_odl1_data, sizeof(data));
> +		break;
> +	case -1:
> +		memcpy(data, platform.ocapi->i2c_odl01_data, sizeof(data));
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d", gcid,
> +		 platform.ocapi->i2c_engine, platform.ocapi->i2c_port);
> +	prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name);
> +
> +	dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
> +		if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
> +			opal_id = dt_prop_get_u32(dn, "ibm,opal-id");
> +			break;
> +		}
> +	}


I'm just realizing now that it's a bit silly to always parse the device 
tree on each reset. The opal_id value is per-chip, I believe, so we 
don't really have a proper place to store it, but maybe we could store 
it per-device?

   Fred


> +
> +	if (!opal_id) {
> +		prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name);
> +		return;
> +	}
> +
> +	for (i = 0; i < 3; i++) {
> +		rc = i2c_request_send(opal_id, 0x20, SMBUS_WRITE,
> +				      platform.ocapi->i2c_offset[i], 1,
> +				      &data[i], sizeof(data[i]), 120);
> +		if (rc) {
> +			/**
> +			 * @fwts-label OCAPIDeviceResetFailed
> +			 * @fwts-advice There was an error attempting to send
> +			 * a reset signal over I2C to the OpenCAPI device.
> +			 */
> +			prlog(PR_ERR, "OCAPI: Error writing I2C reset signal: %d\n", rc);
> +			break;
> +		}
> +		if (i != 0)
> +			time_wait_ms(5);
> +	}
> +}
> +
> +static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
> +{
> +	uint64_t reg, config_xscom;
> +	int timeout = 3000;
> +	prlog(PR_DEBUG, "OCAPI: %s: Training ODL\n", __func__);
> +
> +	switch (index) {
> +	case 2:
> +		config_xscom = OB0_ODL0_CONFIG;
> +		break;
> +	case 3:
> +		config_xscom = OB0_ODL1_CONFIG;
> +		break;
> +	case 4:
> +		config_xscom = OB3_ODL0_CONFIG;
> +		break;
> +	case 5:
> +		config_xscom = OB3_ODL1_CONFIG;
> +		break;
> +	default:
> +		assert(false);
> +	}
> +
> +	/* Reset ODL */
> +	reg = OB_ODL_CONFIG_RESET;
> +	reg = SETFIELD(OB_ODL_CONFIG_VERSION, reg, 0b000001);
> +	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0110);
> +	reg = SETFIELD(OB_ODL_CONFIG_SUPPORTED_MODES, reg, 0b0010);
> +	reg |= OB_ODL_CONFIG_X4_BACKOFF_ENABLE;
> +	reg = SETFIELD(OB_ODL_CONFIG_PHY_CNTR_LIMIT, reg, 0b1111);
> +	reg |= OB_ODL_CONFIG_DEBUG_ENABLE;
> +	reg = SETFIELD(OB_ODL_CONFIG_FWD_PROGRESS_TIMER, reg, 0b0110);
> +	xscom_write(gcid, config_xscom, reg);
> +
> +	reg &= ~OB_ODL_CONFIG_RESET;
> +	xscom_write(gcid, config_xscom, reg);
> +
> +	reset_ocapi_device(gcid, index);
> +
> +	/* Transmit Pattern A */
> +	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0001);
> +	xscom_write(gcid, config_xscom, reg);
> +	time_wait_ms(5);
> +
> +	/* Bump lanes - this improves training reliability */
> +	npu2_opencapi_bump_ui_lane(dev);
> +
> +	/* Start training */
> +	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b1000);
> +	xscom_write(gcid, config_xscom, reg);
> +
> +	do {
> +		reg = get_odl_status(gcid, index);
> +		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) {
> +			prlog(PR_NOTICE,
> +			      "OCAPI: Link %d on chip %u trained in %dms\n",
> +			      index, gcid, 3000 - timeout);
> +			return OPAL_SUCCESS;
> +		}
> +		time_wait_ms(1);
> +	} while (timeout--);
> +	prlog(PR_INFO, "OCAPI: Link %d on chip %u failed to train, retrying\n",
> +	      index, gcid);
> +	prlog(PR_INFO, "OCAPI: Link status: %016llx\n", reg);
> +	return OPAL_HARDWARE;
> +}
> +
> +static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
> +{
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
> +	uint64_t reg;
> +	int64_t link_width, rc = OPAL_SUCCESS;
> +
> +	reg = get_odl_status(dev->npu->chip_id, dev->index);
> +	link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
> +	switch (link_width) {
> +	case 0b0001:
> +		*val = OPAL_SHPC_LINK_UP_x4;
> +		break;
> +	case 0b0010:
> +		*val = OPAL_SHPC_LINK_UP_x8;
> +		break;
> +	default:
> +		rc = OPAL_HARDWARE;
> +	}
> +	return rc;
> +}
> +
> +static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
> +{
> +	struct pci_slot *slot;
> +
> +	slot = pci_slot_alloc(phb, NULL);
> +	if (!slot)
> +		return slot;
> +
> +	/* TODO: Figure out other slot functions */
> +	slot->ops.get_presence_state = NULL;
> +	slot->ops.get_link_state = npu2_opencapi_get_link_state;
> +	slot->ops.get_power_state = NULL;
> +	slot->ops.get_attention_state = NULL;
> +	slot->ops.get_latch_state     = NULL;
> +	slot->ops.set_power_state     = NULL;
> +	slot->ops.set_attention_state = NULL;
> +
> +	return slot;
> +}
> +
> +static int64_t npu2_opencapi_pcicfg_check(struct npu2_dev *dev, uint32_t offset,
> +					  uint32_t size)
> +{
> +	if (!dev || offset > 0xfff || (offset & (size - 1)))
> +		return OPAL_PARAMETER;
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
> +					 uint32_t offset, uint32_t size,
> +					 void *data)
> +{
> +	uint64_t cfg_addr;
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
> +	uint64_t genid_base;
> +	int64_t rc;
> +
> +	rc = npu2_opencapi_pcicfg_check(dev, offset, size);
> +	if (rc)
> +		return rc;
> +
> +	genid_base = dev->bars[1].npu2_bar.base +
> +		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> +
> +	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
> +	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
> +			    NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
> +			    NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
> +			    cfg_addr, bdfn);
> +	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
> +			    cfg_addr, offset & ~3u);
> +
> +	out_be64((uint64_t *)genid_base, cfg_addr);
> +	sync();
> +
> +	switch (size) {
> +	case 1:
> +		*((uint8_t *)data) =
> +			in_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)));
> +		break;
> +	case 2:
> +		*((uint16_t *)data) =
> +			in_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)));
> +		break;
> +	case 4:
> +		*((uint32_t *)data) = in_le32((volatile uint32_t *)(genid_base + 128));
> +		break;
> +	default:
> +		return OPAL_PARAMETER;
> +	}
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +#define NPU2_OPENCAPI_PCI_CFG_READ(size, type)				\
> +static int64_t npu2_opencapi_pcicfg_read##size(struct phb *phb,		\
> +					       uint32_t bdfn,		\
> +					       uint32_t offset,		\
> +					       type *data)		\
> +{									\
> +	/* Initialize data in case of error */				\
> +	*data = (type)0xffffffff;					\
> +	return npu2_opencapi_pcicfg_read(phb, bdfn, offset,		\
> +					 sizeof(type), data);		\
> +}
> +
> +static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
> +					  uint32_t offset, uint32_t size,
> +					  uint32_t data)
> +{
> +	uint64_t cfg_addr;
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
> +	uint64_t genid_base;
> +	int64_t rc;
> +
> +	rc = npu2_opencapi_pcicfg_check(dev, offset, size);
> +	if (rc)
> +		return rc;
> +
> +	genid_base = dev->bars[1].npu2_bar.base +
> +		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
> +
> +	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
> +	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
> +			    NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
> +			    NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
> +			    cfg_addr, bdfn);
> +	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
> +			    cfg_addr, offset & ~3u);
> +
> +	out_be64((uint64_t *)genid_base, cfg_addr);
> +	sync();
> +
> +	switch (size) {
> +	case 1:
> +		out_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)),
> +		      data);
> +		break;
> +	case 2:
> +		out_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)),
> +					       data);
> +		break;
> +	case 4:
> +		out_le32((volatile uint32_t *)(genid_base + 128), data);
> +		break;
> +	default:
> +		return OPAL_PARAMETER;
> +	}
> +
> +	return OPAL_SUCCESS;
> +}
> +
> +#define NPU2_OPENCAPI_PCI_CFG_WRITE(size, type)				\
> +static int64_t npu2_opencapi_pcicfg_write##size(struct phb *phb,	\
> +						uint32_t bdfn,		\
> +						uint32_t offset,	\
> +						type data)		\
> +{									\
> +	return npu2_opencapi_pcicfg_write(phb, bdfn, offset,		\
> +					  sizeof(type), data);		\
> +}
> +
> +NPU2_OPENCAPI_PCI_CFG_READ(8, u8)
> +NPU2_OPENCAPI_PCI_CFG_READ(16, u16)
> +NPU2_OPENCAPI_PCI_CFG_READ(32, u32)
> +NPU2_OPENCAPI_PCI_CFG_WRITE(8, u8)
> +NPU2_OPENCAPI_PCI_CFG_WRITE(16, u16)
> +NPU2_OPENCAPI_PCI_CFG_WRITE(32, u32)
> +
> +static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
> +			      void *data __unused)
> +{
> +	uint32_t irq;
> +	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
> +	uint64_t block = index_to_block(dev->index);
> +	uint64_t stacku = index_to_stacku(dev->index);
> +	uint64_t dsisr, dar, tfc, handle;
> +
> +	/*
> +	 * Pass the hw irq number for the translation fault irq
> +	 * irq levels 23 -> 26 are for translation faults, 1 per brick
> +	 */
> +	irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
> +	if (stacku == NPU2_STACK_STCK_2U)
> +		irq += 2;
> +	if (block == NPU2_BLOCK_OTL1)
> +		irq++;
> +
> +	/*
> +	 * Add the addresses of the registers needed by the OS to handle
> +	 * faults. The OS accesses them by mmio.
> +	 */
> +	dsisr  = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block);
> +	dar    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
> +	tfc    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
> +	handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
> +								block);
> +	dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
> +	dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
> +			hi32(dsisr), lo32(dsisr),
> +			hi32(dar), lo32(dar),
> +			hi32(tfc), lo32(tfc),
> +			hi32(handle), lo32(handle));
> +	return 0;
> +}
> +
> +static void npu2_opencapi_final_fixup(struct phb *phb)
> +{
> +	pci_walk_dev(phb, NULL, npu2_add_mmio_regs, NULL);
> +}
> +
>   static int setup_irq(struct npu2 *p)
>   {
>   	uint64_t reg, mmio_addr;
> @@ -722,6 +1161,111 @@ static int setup_irq(struct npu2 *p)
>   	return 0;
>   }
> 
> +static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
> +				       struct npu2_dev *dev)
> +{
> +	struct dt_node *dn_phb;
> +	struct pci_slot *slot;
> +	int retries = 20;
> +	int rc;
> +	uint32_t dev_index, npu_index;
> +	uint64_t mm_win[2];
> +
> +	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> +	npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
> +
> +	/* Populate PHB device node */
> +	phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
> +		     &mm_win[1]);
> +	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
> +	      mm_win[0], mm_win[1]);
> +	dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
> +	assert(dn_phb);
> +	dt_add_property_strings(dn_phb,
> +				"compatible",
> +				"ibm,power9-npu-opencapi-pciex",
> +				"ibm,ioda2-npu2-opencapi-phb");
> +	dt_add_property_strings(dn_phb, "device_type", "pciex");
> +	dt_add_property(dn_phb, "reg", &n->regs, sizeof(n->regs));
> +	dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
> +	dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
> +	dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
> +	dt_add_property_cells(dn_phb, "ibm,npcq", dn_link->phandle);
> +	dt_add_property_cells(dn_phb, "ibm,links", 1);
> +	dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
> +	dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
> +	dt_add_property_cells(dn_phb, "bus-range", 0, 0xff);
> +	dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
> +
> +	n->mm_base = mm_win[0];
> +	n->mm_size = mm_win[1];
> +
> +	dt_add_property_cells(dn_phb, "ranges", 0x02000000,
> +			      hi32(n->mm_base), lo32(n->mm_base),
> +			      hi32(n->mm_base), lo32(n->mm_base),
> +			      hi32(n->mm_size), lo32(n->mm_size));
> +
> +	dev->type = NPU2_DEV_TYPE_OPENCAPI;
> +	dev->npu = n;
> +	dev->dt_node = dn_link;
> +	dev->phb_ocapi.dt_node = dn_phb;
> +	dev->phb_ocapi.ops = &npu2_opencapi_ops;
> +	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
> +	dev->phb_ocapi.scan_map = 1;
> +	dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
> +	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
> +	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
> +	dev->bdfn = 0;
> +	n->total_devices++;
> +
> +	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
> +	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> +	setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
> +	/* Procedure 13.1.3.9 - AFU Config BARs */
> +	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
> +
> +	set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00);
> +
> +	npu2_opencapi_phy_setup(dev);
> +
> +	do {
> +		rc = odl_train(n->chip_id, dev->index, dev);
> +	} while (rc != OPAL_SUCCESS && --retries);
> +
> +	if (rc != OPAL_SUCCESS && retries == 0) {
> +		/**
> +		 * @fwts-label OCAPILinkTrainingFailed
> +		 * @fwts-advice The OpenCAPI link training procedure failed.
> +		 * This indicates a hardware or firmware bug. OpenCAPI
> +		 * functionality will not be available on this link.
> +		 */
> +		prlog(PR_ERR, "OCAPI: Link %d on chip %u failed to train\n",
> +		      dev->index, n->chip_id);
> +		prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
> +		      get_odl_status(n->chip_id, dev->index));
> +		goto failed;
> +	}
> +
> +	otl_enabletx(n->chip_id, n->xscom_base, dev->index);
> +
> +	slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
> +	if (!slot)
> +	{
> +		/**
> +		 * @fwts-label OCAPICannotCreatePHBSlot
> +		 * @fwts-advice Firmware probably ran out of memory creating
> +		 * NPU slot. OpenCAPI functionality could be broken.
> +		 */
> +		prlog(PR_ERR, "OCAPI: Cannot create PHB slot\n");
> +	}
> +
> +	pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID);
> +	return;
> +failed:
> +	dt_add_property_string(dn_phb, "status", "error");
> +	return;
> +}
> +
>   static void npu2_opencapi_probe(struct dt_node *dn)
>   {
>   	struct dt_node *link;
> @@ -730,7 +1274,7 @@ static void npu2_opencapi_probe(struct dt_node *dn)
>   	uint64_t reg[2];
>   	uint64_t dev_index;
>   	struct npu2 *n;
> -	int rc;
> +	int rc, i = 0;
> 
>   	path = dt_get_path(dn);
>   	gcid = dt_get_chip_id(dn);
> @@ -780,6 +1324,11 @@ static void npu2_opencapi_probe(struct dt_node *dn)
>   	if (rc)
>   		goto failed;
> 
> +	dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") {
> +		npu2_opencapi_setup_device(link, n, &n->devices[i]);
> +		i++;
> +	}
> +
>   	return;
>   failed:
>   	free(n);
> @@ -792,3 +1341,39 @@ void probe_npu2_opencapi(void)
>   	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
>   		npu2_opencapi_probe(np_npu);
>   }
> +
> +static const struct phb_ops npu2_opencapi_ops = {
> +	.cfg_read8		= npu2_opencapi_pcicfg_read8,
> +	.cfg_read16		= npu2_opencapi_pcicfg_read16,
> +	.cfg_read32		= npu2_opencapi_pcicfg_read32,
> +	.cfg_write8		= npu2_opencapi_pcicfg_write8,
> +	.cfg_write16		= npu2_opencapi_pcicfg_write16,
> +	.cfg_write32		= npu2_opencapi_pcicfg_write32,
> +	.choose_bus		= NULL,
> +	.device_init		= NULL,
> +	.phb_final_fixup	= npu2_opencapi_final_fixup,
> +	.ioda_reset		= NULL,
> +	.papr_errinjct_reset	= NULL,
> +	.pci_reinit		= NULL,
> +	.set_phb_mem_window	= NULL,
> +	.phb_mmio_enable	= NULL,
> +	.map_pe_mmio_window	= NULL,
> +	.map_pe_dma_window	= NULL,
> +	.map_pe_dma_window_real	= NULL,
> +	.pci_msi_eoi		= NULL,
> +	.set_xive_pe		= NULL,
> +	.get_msi_32		= NULL,
> +	.get_msi_64		= NULL,
> +	.set_pe			= npu2_set_pe,
> +	.set_peltv		= NULL,
> +	.eeh_freeze_status	= npu2_freeze_status,  /* TODO */
> +	.eeh_freeze_clear	= NULL,
> +	.eeh_freeze_set		= NULL,
> +	.next_error		= NULL,
> +	.err_inject		= NULL,
> +	.get_diag_data		= NULL,
> +	.get_diag_data2		= NULL,
> +	.set_capi_mode		= NULL,
> +	.set_capp_recovery	= NULL,
> +	.tce_kill		= NULL,
> +};
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 218ac86..8c76cb1 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -1001,17 +1001,17 @@ static int64_t npu2_map_pe_dma_window(struct phb *phb,
>   	return OPAL_SUCCESS;
>   }
> 
> -static int64_t npu2_set_pe(struct phb *phb,
> -			   uint64_t pe_num,
> -			   uint64_t bdfn,
> -			   uint8_t bcompare,
> -			   uint8_t dcompare,
> -			   uint8_t fcompare,
> -			   uint8_t action)
> +int64_t npu2_set_pe(struct phb *phb,
> +		    uint64_t pe_num,
> +		    uint64_t bdfn,
> +		    uint8_t bcompare,
> +		    uint8_t dcompare,
> +		    uint8_t fcompare,
> +		    uint8_t action)
>   {
>   	struct npu2 *p;
>   	struct npu2_dev *dev;
> -	uint64_t reg, val;
> +	uint64_t reg, val, pe_bdfn;
> 
>   	/* Sanity check */
>   	if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
> @@ -1036,21 +1036,31 @@ static int64_t npu2_set_pe(struct phb *phb,
>   	if (!dev)
>   		return OPAL_PARAMETER;
> 
> -	val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
> -	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
> -	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
> -
> -	if (!NPU2DEV_BRICK(dev))
> -		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> -				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
> +	if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
> +		pe_bdfn = dev->bdfn;
>   	else
> -		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> -				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
> +		pe_bdfn = dev->nvlink.gpu_bdfn;
> +
> +	if (dev->type == NPU2_DEV_TYPE_NVLINK) {
> +		val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
> +		val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
> +		val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
> +
> +		if (!NPU2DEV_BRICK(dev))
> +			reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> +					      NPU2_BLOCK_CTL,
> +					      NPU2_CQ_BRICK0_BDF2PE_MAP0);
> +		else
> +			reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
> +					      NPU2_BLOCK_CTL,
> +					      NPU2_CQ_BRICK1_BDF2PE_MAP0);
> +
> +		npu2_write(p, reg, val);
> +	}
> 
> -	npu2_write(p, reg, val);
>   	val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
>   	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
> -	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
> +	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
>   	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
>   			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
>   	p->bdf2pe_cache[dev->index] = val;
> @@ -1111,12 +1121,12 @@ static struct pci_slot *npu2_slot_create(struct phb *phb)
>   	return slot;
>   }
> 
> -static int64_t npu2_freeze_status(struct phb *phb __unused,
> -				  uint64_t pe_number __unused,
> -				  uint8_t *freeze_state,
> -				  uint16_t *pci_error_type __unused,
> -				  uint16_t *severity __unused,
> -				  uint64_t *phb_status __unused)
> +int64_t npu2_freeze_status(struct phb *phb __unused,
> +			   uint64_t pe_number __unused,
> +			   uint8_t *freeze_state,
> +			   uint16_t *pci_error_type __unused,
> +			   uint16_t *severity __unused,
> +			   uint64_t *phb_status __unused)
>   {
>   	/*
>   	 * FIXME: When it's called by skiboot PCI config accessor,
> diff --git a/include/npu2-regs.h b/include/npu2-regs.h
> index 77b1529..876d7f0 100644
> --- a/include/npu2-regs.h
> +++ b/include/npu2-regs.h
> @@ -116,6 +116,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
>   #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
>   #define NPU2_CQ_SM_MISC_CFG1			0x008
> +#define NPU2_CQ_SM_MISC_CFG2			0x148
>   #define NPU2_PB_EPSILON				0x010
>   #define NPU2_TIMER_CFG				0x018
>   #define NPU2_GPU0_MEM_BAR			0x020
> @@ -187,7 +188,11 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
>   #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
> -#define NPU2_CQ_FUTURE_CFG1			0x008
> +#define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG		0x0B0
> +#define   NPU2_CQ_CTL_MISC_MMIOPA_ADDR		PPC_BITMASK(1,35)
> +#define   NPU2_CQ_CTL_MISC_MMIOPA_SIZE		PPC_BITMASK(39,43)
> +#define NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG		0x0B8
> +#define NPU2_CQ_CTL_MISC_CFG1			0x008
>   #define NPU2_CQ_FUTURE_CFG2			0x010
>   #define NPU2_CQ_FUTURE_CFG3			0x018
>   #define NPU2_CQ_PERF_MATCH			0x020
> @@ -219,6 +224,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_CQ_C_ERR_RPT_MASK1			0x0E8
>   #define NPU2_CQ_C_ERR_RPT_HOLD0			0x0F0
>   #define NPU2_CQ_C_ERR_RPT_HOLD1			0x0F8
> +#define NPU2_CQ_CTL_CONFIG_ADDR0		0x120
> +#define NPU2_CQ_CTL_CONFIG_ADDR1		0x128
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_ENABLE	PPC_BIT(0)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_STATUS	PPC_BITMASK(1, 3)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER	PPC_BITMASK(4, 11)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER PPC_BITMASK(12, 16)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER PPC_BITMASK(17, 19)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER PPC_BITMASK(20, 31)
> +#define   NPU2_CQ_CTL_CONFIG_ADDR_TYPE		PPC_BIT(32)
>   #define NPU2_CQ_CTL_FENCE_CONTROL_0		0x140
>   #define NPU2_CQ_CTL_FENCE_CONTROL_1		0x148
>   #define   NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1)
> @@ -331,9 +345,16 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define   NPU2_OTL_TLX_CREDITS_VC3_CREDITS	PPC_BITMASK(24, 31)
>   #define   NPU2_OTL_TLX_CREDITS_DCP0_CREDITS	PPC_BITMASK(32, 39)
>   #define   NPU2_OTL_TLX_CREDITS_DCP1_CREDITS	PPC_BITMASK(56, 63)
> +#define NPU2_OTL_VC_CREDITS(stack, block)	NPU2_REG_OFFSET(stack, block, 0x090)
>   #define NPU2_OTL_CONFIG1(stack, block)		NPU2_REG_OFFSET(stack, block, 0x058)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP1_EN		PPC_BIT(1)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP2_EN		PPC_BIT(2)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP3_EN		PPC_BIT(3)
>   #define   NPU2_OTL_CONFIG1_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
>   #define   NPU2_OTL_CONFIG1_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP1_RATE	PPC_BITMASK(12, 15)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
> +#define   NPU2_OTL_CONFIG1_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
>   #define   NPU2_OTL_CONFIG1_TX_CRET_FREQ 	PPC_BITMASK(32, 34)
>   #define   NPU2_OTL_CONFIG1_TX_AGE_FREQ		PPC_BITMASK(35, 39)
>   #define   NPU2_OTL_CONFIG1_TX_RS2_HPWAIT	PPC_BITMASK(40, 45)
> @@ -342,6 +363,15 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define   NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS	PPC_BIT(58)
>   #define   NPU2_OTL_CONFIG1_TX_STOP_LINK		PPC_BIT(59)
>   #define   NPU2_OTL_CONFIG1_TX_STOP_ON_UE	PPC_BIT(60)
> +#define   NPU2_OTL_CONFIG1_TX_T0_MASK_CRTN0	PPC_BIT(61)
> +#define   NPU2_OTL_CONFIG1_TX_T123_MASK_CRTN0	PPC_BIT(62)
> +#define NPU2_OTL_CONFIG2(stack, block)		NPU2_REG_OFFSET(stack, block, 0x0C0)
> +#define   NPU2_OTL_CONFIG2_TX_SEND_EN		PPC_BIT(0)
> +
> +#define NPU2_OTL_OSL_DSISR(stack, block)	NPU2_REG_OFFSET(stack, block, 0x000)
> +#define NPU2_OTL_OSL_DAR(stack, block)		NPU2_REG_OFFSET(stack, block, 0x008)
> +#define NPU2_OTL_OSL_TFC(stack, block)		NPU2_REG_OFFSET(stack, block, 0x010)
> +#define NPU2_OTL_OSL_PEHANDLE(stack, block)	NPU2_REG_OFFSET(stack, block, 0x018)
> 
>   /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
>    * there is only a single instance of each of these in the NPU so we
> @@ -435,6 +465,7 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define NPU2_MISC_IRQ_LOG13			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368)
>   #define NPU2_MISC_IRQ_LOG14			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370)
>   #define NPU2_MISC_IRQ_LOG15			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378)
> +#define NPU2_MISC_FENCE_ON_ERROR_EN_FIR2	NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x400)
>   #define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408)
> 
> 
> @@ -566,10 +597,33 @@ void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
>   #define    PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE	PPC_BIT(52)
>   #define    PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE	PPC_BIT(57)
> 
> +#define OB0_ODL0_CONFIG				0x901082A
> +#define OB0_ODL1_CONFIG				0x901082B
> +#define OB3_ODL0_CONFIG				0xC01082A
> +#define OB3_ODL1_CONFIG				0xC01082B
> +#define   OB_ODL_CONFIG_RESET			PPC_BIT(0)
> +#define   OB_ODL_CONFIG_VERSION			PPC_BITMASK(2, 7)
> +#define   OB_ODL_CONFIG_TRAIN_MODE		PPC_BITMASK(8, 11)
> +#define   OB_ODL_CONFIG_SUPPORTED_MODES		PPC_BITMASK(12, 15)
> +#define   OB_ODL_CONFIG_X4_BACKOFF_ENABLE	PPC_BIT(16)
> +#define   OB_ODL_CONFIG_PHY_CNTR_LIMIT		PPC_BITMASK(20, 23)
> +#define   OB_ODL_CONFIG_DEBUG_ENABLE		PPC_BIT(33)
> +#define   OB_ODL_CONFIG_FWD_PROGRESS_TIMER	PPC_BITMASK(40, 43)
> +
> +#define OB0_ODL0_STATUS				0x901082C
> +#define OB0_ODL1_STATUS				0x901082D
> +#define OB3_ODL0_STATUS				0xC01082C
> +#define OB3_ODL1_STATUS				0xC01082D
> +#define   OB_ODL_STATUS_TRAINED_MODE		PPC_BITMASK(0,3)
> +#define   OB_ODL_STATUS_TRAINING_STATE_MACHINE	PPC_BITMASK(49, 51)
> +
>   #define OB0_ODL0_TRAINING_STATUS		0x901082E
>   #define OB0_ODL1_TRAINING_STATUS		0x901082F
>   #define OB3_ODL0_TRAINING_STATUS		0xC01082E
>   #define OB3_ODL1_TRAINING_STATUS		0xC01082F
>   #define   OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B PPC_BITMASK(8, 15)
> 
> +#define PERV_ROOT_CTRL2				0x0050012
> +#define   PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN PPC_BIT(24)
> +
>   #endif /* __NPU2_REGS_H */
> diff --git a/include/npu2.h b/include/npu2.h
> index 1f46bca..e0554d7 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -202,5 +202,17 @@ extern int nv_zcal_nominal;
>   bool is_p9dd1(void);
>   void npu2_opencapi_phy_setup(struct npu2_dev *dev);
>   void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
> -
> +int64_t npu2_set_pe(struct phb *phb,
> +		    uint64_t pe_num,
> +		    uint64_t bdfn,
> +		    uint8_t bcompare,
> +		    uint8_t dcompare,
> +		    uint8_t fcompare,
> +		    uint8_t action);
> +int64_t npu2_freeze_status(struct phb *phb __unused,
> +			   uint64_t pe_number __unused,
> +			   uint8_t *freeze_state,
> +			   uint16_t *pci_error_type __unused,
> +			   uint16_t *severity __unused,
> +			   uint64_t *phb_status __unused);
>   #endif /* __NPU2_H */
>
Andrew Donnellan Jan. 12, 2018, 2:53 a.m. | #2
On 12/01/18 01:42, Frederic Barrat wrote:
>> +    dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
>> +        if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
>> +            opal_id = dt_prop_get_u32(dn, "ibm,opal-id");
>> +            break;
>> +        }
>> +    }
> 
> 
> I'm just realizing now that it's a bit silly to always parse the device 
> tree on each reset. The opal_id value is per-chip, I believe, so we 
> don't really have a proper place to store it, but maybe we could store 
> it per-device?

Good point.

It occurs to me that we didn't test this on a machine with an OBUS3 - 
presumably there's going to be a second port for that OBUS...

Patch

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 55d5cd9..ea55dad 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -53,6 +53,9 @@ 
 #include <i2c.h>
 
 #define NPU_IRQ_LEVELS		35
+#define NPU_IRQ_LEVELS_XSL	23
+
+static const struct phb_ops npu2_opencapi_ops;
 
 static inline uint64_t index_to_stack(uint64_t index) {
 	switch (index) {
@@ -99,6 +102,28 @@  static inline uint64_t index_to_block(uint64_t index) {
 	}
 }
 
+static uint64_t get_odl_status(uint32_t gcid, uint64_t index) {
+	uint64_t reg, status_xscom;
+	switch (index) {
+	case 2:
+		status_xscom = OB0_ODL0_STATUS;
+		break;
+	case 3:
+		status_xscom = OB0_ODL1_STATUS;
+		break;
+	case 4:
+		status_xscom = OB3_ODL0_STATUS;
+		break;
+	case 5:
+		status_xscom = OB3_ODL1_STATUS;
+		break;
+	default:
+		assert(false);
+	}
+	xscom_read(gcid, status_xscom, &reg);
+	return reg;
+}
+
 /* Procedure 13.1.3.1 - select OCAPI vs NVLink for bricks 2-3/4-5 */
 
 static void set_transport_mux_controls(uint32_t gcid, uint32_t scom_base,
@@ -674,6 +699,420 @@  static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
 	reg[1] = size;
 }
 
+/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
+static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
+				struct npu2_dev *dev)
+{
+	uint64_t stack = index_to_stack(dev->index);
+	uint64_t offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+		NPU2_NTL0_BAR : NPU2_NTL1_BAR;
+	uint64_t pa_offset = index_to_block(dev->index) == NPU2_BLOCK_OTL0 ?
+		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
+		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
+	uint64_t addr, size, reg;
+
+	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
+	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->index, &addr, &size);
+
+	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
+	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
+		size);
+	dev->bars[0].npu2_bar.base = addr;
+	dev->bars[0].npu2_bar.size = size;
+
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
+	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
+	npu2_scom_write(gcid, scom_base,
+			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
+					pa_offset),
+			NPU2_MISC_DA_LEN_8B, reg);
+}
+
+/* Procedure 13.1.3.9 - AFU Config BARs */
+static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
+				  struct npu2_dev *dev)
+{
+	uint64_t stack = index_to_stack(dev->index);
+	int stack_num = stack - NPU2_STACK_STCK_0;
+	uint64_t addr, size;
+
+	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
+	phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
+	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
+	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
+		addr, size);
+	dev->bars[1].npu2_bar.base = addr;
+	dev->bars[1].npu2_bar.size = size;
+}
+
+static void otl_enabletx(uint32_t gcid, uint32_t scom_base, uint64_t index)
+{
+	uint64_t stack = index_to_stack(index);
+	uint64_t block = index_to_block(index);
+	uint64_t reg;
+
+	/* OTL Config 2 Register */
+	/* Transmit Enable */
+	prlog(PR_DEBUG, "OCAPI: %s: Enabling TX\n", __func__);
+	reg = 0;
+	reg |= NPU2_OTL_CONFIG2_TX_SEND_EN;
+	npu2_scom_write(gcid, scom_base, NPU2_OTL_CONFIG2(stack, block),
+			NPU2_MISC_DA_LEN_8B, reg);
+
+	reg = npu2_scom_read(gcid, scom_base, NPU2_OTL_VC_CREDITS(stack, block),
+			     NPU2_MISC_DA_LEN_8B);
+	prlog(PR_DEBUG, "OCAPI: credit counter: %llx\n", reg);
+	/* TODO: Abort if credits are zero */
+}
+
+static void reset_ocapi_device(uint32_t gcid, int index)
+{
+	struct dt_node *dn;
+	char port_name[17];
+	uint32_t opal_id = 0;
+	uint8_t data[3];
+	int rc;
+	int i;
+
+	assert(platform.ocapi);
+
+	if (platform.ocapi->i2c_voltage_18) {
+		xscom_write_mask(gcid, PERV_ROOT_CTRL2,
+				 PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN,
+				 PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN);
+	}
+
+	switch (index) {
+	case 2:
+	case 4:
+		memcpy(data, platform.ocapi->i2c_odl0_data, sizeof(data));
+		break;
+	case 3:
+	case 5:
+		memcpy(data, platform.ocapi->i2c_odl1_data, sizeof(data));
+		break;
+	case -1:
+		memcpy(data, platform.ocapi->i2c_odl01_data, sizeof(data));
+		break;
+	default:
+		assert(false);
+	}
+
+	snprintf(port_name, sizeof(port_name), "p8_%08x_e%dp%d", gcid,
+		 platform.ocapi->i2c_engine, platform.ocapi->i2c_port);
+	prlog(PR_DEBUG, "OCAPI: Looking for I2C port %s\n", port_name);
+
+	dt_for_each_compatible(dt_root, dn, "ibm,power9-i2c-port") {
+		if (streq(port_name, dt_prop_get(dn, "ibm,port-name"))) {
+			opal_id = dt_prop_get_u32(dn, "ibm,opal-id");
+			break;
+		}
+	}
+
+	if (!opal_id) {
+		prlog(PR_ERR, "OCAPI: Couldn't find I2C port %s\n", port_name);
+		return;
+	}
+
+	for (i = 0; i < 3; i++) {
+		rc = i2c_request_send(opal_id, 0x20, SMBUS_WRITE,
+				      platform.ocapi->i2c_offset[i], 1,
+				      &data[i], sizeof(data[i]), 120);
+		if (rc) {
+			/**
+			 * @fwts-label OCAPIDeviceResetFailed
+			 * @fwts-advice There was an error attempting to send
+			 * a reset signal over I2C to the OpenCAPI device.
+			 */
+			prlog(PR_ERR, "OCAPI: Error writing I2C reset signal: %d\n", rc);
+			break;
+		}
+		if (i != 0)
+			time_wait_ms(5);
+	}
+}
+
+static int odl_train(uint32_t gcid, uint32_t index, struct npu2_dev *dev)
+{
+	uint64_t reg, config_xscom;
+	int timeout = 3000;
+	prlog(PR_DEBUG, "OCAPI: %s: Training ODL\n", __func__);
+
+	switch (index) {
+	case 2:
+		config_xscom = OB0_ODL0_CONFIG;
+		break;
+	case 3:
+		config_xscom = OB0_ODL1_CONFIG;
+		break;
+	case 4:
+		config_xscom = OB3_ODL0_CONFIG;
+		break;
+	case 5:
+		config_xscom = OB3_ODL1_CONFIG;
+		break;
+	default:
+		assert(false);
+	}
+
+	/* Reset ODL */
+	reg = OB_ODL_CONFIG_RESET;
+	reg = SETFIELD(OB_ODL_CONFIG_VERSION, reg, 0b000001);
+	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0110);
+	reg = SETFIELD(OB_ODL_CONFIG_SUPPORTED_MODES, reg, 0b0010);
+	reg |= OB_ODL_CONFIG_X4_BACKOFF_ENABLE;
+	reg = SETFIELD(OB_ODL_CONFIG_PHY_CNTR_LIMIT, reg, 0b1111);
+	reg |= OB_ODL_CONFIG_DEBUG_ENABLE;
+	reg = SETFIELD(OB_ODL_CONFIG_FWD_PROGRESS_TIMER, reg, 0b0110);
+	xscom_write(gcid, config_xscom, reg);
+
+	reg &= ~OB_ODL_CONFIG_RESET;
+	xscom_write(gcid, config_xscom, reg);
+
+	reset_ocapi_device(gcid, index);
+
+	/* Transmit Pattern A */
+	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b0001);
+	xscom_write(gcid, config_xscom, reg);
+	time_wait_ms(5);
+
+	/* Bump lanes - this improves training reliability */
+	npu2_opencapi_bump_ui_lane(dev);
+
+	/* Start training */
+	reg = SETFIELD(OB_ODL_CONFIG_TRAIN_MODE, reg, 0b1000);
+	xscom_write(gcid, config_xscom, reg);
+
+	do {
+		reg = get_odl_status(gcid, index);
+		if (GETFIELD(OB_ODL_STATUS_TRAINING_STATE_MACHINE, reg) == 0x7) {
+			prlog(PR_NOTICE,
+			      "OCAPI: Link %d on chip %u trained in %dms\n",
+			      index, gcid, 3000 - timeout);
+			return OPAL_SUCCESS;
+		}
+		time_wait_ms(1);
+	} while (timeout--);
+	prlog(PR_INFO, "OCAPI: Link %d on chip %u failed to train, retrying\n",
+	      index, gcid);
+	prlog(PR_INFO, "OCAPI: Link status: %016llx\n", reg);
+	return OPAL_HARDWARE;
+}
+
+static int64_t npu2_opencapi_get_link_state(struct pci_slot *slot, uint8_t *val)
+{
+	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(slot->phb);
+	uint64_t reg;
+	int64_t link_width, rc = OPAL_SUCCESS;
+
+	reg = get_odl_status(dev->npu->chip_id, dev->index);
+	link_width = GETFIELD(OB_ODL_STATUS_TRAINED_MODE, reg);
+	switch (link_width) {
+	case 0b0001:
+		*val = OPAL_SHPC_LINK_UP_x4;
+		break;
+	case 0b0010:
+		*val = OPAL_SHPC_LINK_UP_x8;
+		break;
+	default:
+		rc = OPAL_HARDWARE;
+	}
+	return rc;
+}
+
+static struct pci_slot *npu2_opencapi_slot_create(struct phb *phb)
+{
+	struct pci_slot *slot;
+
+	slot = pci_slot_alloc(phb, NULL);
+	if (!slot)
+		return slot;
+
+	/* TODO: Figure out other slot functions */
+	slot->ops.get_presence_state = NULL;
+	slot->ops.get_link_state = npu2_opencapi_get_link_state;
+	slot->ops.get_power_state = NULL;
+	slot->ops.get_attention_state = NULL;
+	slot->ops.get_latch_state     = NULL;
+	slot->ops.set_power_state     = NULL;
+	slot->ops.set_attention_state = NULL;
+
+	return slot;
+}
+
+static int64_t npu2_opencapi_pcicfg_check(struct npu2_dev *dev, uint32_t offset,
+					  uint32_t size)
+{
+	if (!dev || offset > 0xfff || (offset & (size - 1)))
+		return OPAL_PARAMETER;
+
+	return OPAL_SUCCESS;
+}
+
+static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
+					 uint32_t offset, uint32_t size,
+					 void *data)
+{
+	uint64_t cfg_addr;
+	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+	uint64_t genid_base;
+	int64_t rc;
+
+	rc = npu2_opencapi_pcicfg_check(dev, offset, size);
+	if (rc)
+		return rc;
+
+	genid_base = dev->bars[1].npu2_bar.base +
+		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+
+	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
+	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
+			    NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
+			    NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
+			    cfg_addr, bdfn);
+	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
+			    cfg_addr, offset & ~3u);
+
+	out_be64((uint64_t *)genid_base, cfg_addr);
+	sync();
+
+	switch (size) {
+	case 1:
+		*((uint8_t *)data) =
+			in_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)));
+		break;
+	case 2:
+		*((uint16_t *)data) =
+			in_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)));
+		break;
+	case 4:
+		*((uint32_t *)data) = in_le32((volatile uint32_t *)(genid_base + 128));
+		break;
+	default:
+		return OPAL_PARAMETER;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+#define NPU2_OPENCAPI_PCI_CFG_READ(size, type)				\
+static int64_t npu2_opencapi_pcicfg_read##size(struct phb *phb,		\
+					       uint32_t bdfn,		\
+					       uint32_t offset,		\
+					       type *data)		\
+{									\
+	/* Initialize data in case of error */				\
+	*data = (type)0xffffffff;					\
+	return npu2_opencapi_pcicfg_read(phb, bdfn, offset,		\
+					 sizeof(type), data);		\
+}
+
+static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
+					  uint32_t offset, uint32_t size,
+					  uint32_t data)
+{
+	uint64_t cfg_addr;
+	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+	uint64_t genid_base;
+	int64_t rc;
+
+	rc = npu2_opencapi_pcicfg_check(dev, offset, size);
+	if (rc)
+		return rc;
+
+	genid_base = dev->bars[1].npu2_bar.base +
+		(index_to_block(dev->index) == NPU2_BLOCK_OTL1 ? 256 : 0);
+
+	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
+	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER |
+			    NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER |
+			    NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER,
+			    cfg_addr, bdfn);
+	cfg_addr = SETFIELD(NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER,
+			    cfg_addr, offset & ~3u);
+
+	out_be64((uint64_t *)genid_base, cfg_addr);
+	sync();
+
+	switch (size) {
+	case 1:
+		out_8((volatile uint8_t *)(genid_base + 128 + (offset & 3)),
+		      data);
+		break;
+	case 2:
+		out_le16((volatile uint16_t *)(genid_base + 128 + (offset & 2)),
+					       data);
+		break;
+	case 4:
+		out_le32((volatile uint32_t *)(genid_base + 128), data);
+		break;
+	default:
+		return OPAL_PARAMETER;
+	}
+
+	return OPAL_SUCCESS;
+}
+
+#define NPU2_OPENCAPI_PCI_CFG_WRITE(size, type)				\
+static int64_t npu2_opencapi_pcicfg_write##size(struct phb *phb,	\
+						uint32_t bdfn,		\
+						uint32_t offset,	\
+						type data)		\
+{									\
+	return npu2_opencapi_pcicfg_write(phb, bdfn, offset,		\
+					  sizeof(type), data);		\
+}
+
+NPU2_OPENCAPI_PCI_CFG_READ(8, u8)
+NPU2_OPENCAPI_PCI_CFG_READ(16, u16)
+NPU2_OPENCAPI_PCI_CFG_READ(32, u32)
+NPU2_OPENCAPI_PCI_CFG_WRITE(8, u8)
+NPU2_OPENCAPI_PCI_CFG_WRITE(16, u16)
+NPU2_OPENCAPI_PCI_CFG_WRITE(32, u32)
+
+static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
+			      void *data __unused)
+{
+	uint32_t irq;
+	struct npu2_dev *dev = phb_to_npu2_dev_ocapi(phb);
+	uint64_t block = index_to_block(dev->index);
+	uint64_t stacku = index_to_stacku(dev->index);
+	uint64_t dsisr, dar, tfc, handle;
+
+	/*
+	 * Pass the hw irq number for the translation fault irq
+	 * irq levels 23 -> 26 are for translation faults, 1 per brick
+	 */
+	irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
+	if (stacku == NPU2_STACK_STCK_2U)
+		irq += 2;
+	if (block == NPU2_BLOCK_OTL1)
+		irq++;
+
+	/*
+	 * Add the addresses of the registers needed by the OS to handle
+	 * faults. The OS accesses them by mmio.
+	 */
+	dsisr  = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DSISR(stacku, block);
+	dar    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_DAR(stacku, block);
+	tfc    = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_TFC(stacku, block);
+	handle = (uint64_t) dev->npu->regs + NPU2_OTL_OSL_PEHANDLE(stacku,
+								block);
+	dt_add_property_cells(pd->dn, "ibm,opal-xsl-irq", irq);
+	dt_add_property_cells(pd->dn, "ibm,opal-xsl-mmio",
+			hi32(dsisr), lo32(dsisr),
+			hi32(dar), lo32(dar),
+			hi32(tfc), lo32(tfc),
+			hi32(handle), lo32(handle));
+	return 0;
+}
+
+static void npu2_opencapi_final_fixup(struct phb *phb)
+{
+	pci_walk_dev(phb, NULL, npu2_add_mmio_regs, NULL);
+}
+
 static int setup_irq(struct npu2 *p)
 {
 	uint64_t reg, mmio_addr;
@@ -722,6 +1161,111 @@  static int setup_irq(struct npu2 *p)
 	return 0;
 }
 
+static void npu2_opencapi_setup_device(struct dt_node *dn_link, struct npu2 *n,
+				       struct npu2_dev *dev)
+{
+	struct dt_node *dn_phb;
+	struct pci_slot *slot;
+	int retries = 20;
+	int rc;
+	uint32_t dev_index, npu_index;
+	uint64_t mm_win[2];
+
+	dev_index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+	npu_index = dt_prop_get_u32(n->dt_node, "ibm,npu-index");
+
+	/* Populate PHB device node */
+	phys_map_get(n->chip_id, NPU_OCAPI_MMIO, dev_index, &mm_win[0],
+		     &mm_win[1]);
+	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
+	      mm_win[0], mm_win[1]);
+	dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
+	assert(dn_phb);
+	dt_add_property_strings(dn_phb,
+				"compatible",
+				"ibm,power9-npu-opencapi-pciex",
+				"ibm,ioda2-npu2-opencapi-phb");
+	dt_add_property_strings(dn_phb, "device_type", "pciex");
+	dt_add_property(dn_phb, "reg", &n->regs, sizeof(n->regs));
+	dt_add_property_cells(dn_phb, "ibm,npu-index", npu_index);
+	dt_add_property_cells(dn_phb, "ibm,chip-id", n->chip_id);
+	dt_add_property_cells(dn_phb, "ibm,xscom-base", n->xscom_base);
+	dt_add_property_cells(dn_phb, "ibm,npcq", dn_link->phandle);
+	dt_add_property_cells(dn_phb, "ibm,links", 1);
+	dt_add_property(dn_phb, "ibm,mmio-window", mm_win, sizeof(mm_win));
+	dt_add_property_cells(dn_phb, "ibm,phb-diag-data-size", 0);
+	dt_add_property_cells(dn_phb, "bus-range", 0, 0xff);
+	dt_add_property_cells(dn_phb, "ibm,opal-num-pes", NPU2_MAX_PE_NUM);
+
+	n->mm_base = mm_win[0];
+	n->mm_size = mm_win[1];
+
+	dt_add_property_cells(dn_phb, "ranges", 0x02000000,
+			      hi32(n->mm_base), lo32(n->mm_base),
+			      hi32(n->mm_base), lo32(n->mm_base),
+			      hi32(n->mm_size), lo32(n->mm_size));
+
+	dev->type = NPU2_DEV_TYPE_OPENCAPI;
+	dev->npu = n;
+	dev->dt_node = dn_link;
+	dev->phb_ocapi.dt_node = dn_phb;
+	dev->phb_ocapi.ops = &npu2_opencapi_ops;
+	dev->phb_ocapi.phb_type = phb_type_npu_v2_opencapi;
+	dev->phb_ocapi.scan_map = 1;
+	dev->index = dt_prop_get_u32(dn_link, "ibm,npu-link-index");
+	dev->pl_xscom_base = dt_prop_get_u64(dn_link, "ibm,npu-phy");
+	dev->lane_mask = dt_prop_get_u32(dn_link, "ibm,npu-lane-mask");
+	dev->bdfn = 0;
+	n->total_devices++;
+
+	/* TODO: Procedure 13.1.3.7 - AFU Memory Range BARs */
+	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
+	setup_afu_mmio_bars(n->chip_id, n->xscom_base, dev);
+	/* Procedure 13.1.3.9 - AFU Config BARs */
+	setup_afu_config_bars(n->chip_id, n->xscom_base, dev);
+
+	set_fence_control(n->chip_id, n->xscom_base, dev->index, 0b00);
+
+	npu2_opencapi_phy_setup(dev);
+
+	do {
+		rc = odl_train(n->chip_id, dev->index, dev);
+	} while (rc != OPAL_SUCCESS && --retries);
+
+	if (rc != OPAL_SUCCESS && retries == 0) {
+		/**
+		 * @fwts-label OCAPILinkTrainingFailed
+		 * @fwts-advice The OpenCAPI link training procedure failed.
+		 * This indicates a hardware or firmware bug. OpenCAPI
+		 * functionality will not be available on this link.
+		 */
+		prlog(PR_ERR, "OCAPI: Link %d on chip %u failed to train\n",
+		      dev->index, n->chip_id);
+		prlog(PR_ERR, "OCAPI: Final link status: %016llx\n",
+		      get_odl_status(n->chip_id, dev->index));
+		goto failed;
+	}
+
+	otl_enabletx(n->chip_id, n->xscom_base, dev->index);
+
+	slot = npu2_opencapi_slot_create(&dev->phb_ocapi);
+	if (!slot)
+	{
+		/**
+		 * @fwts-label OCAPICannotCreatePHBSlot
+		 * @fwts-advice Firmware probably ran out of memory creating
+		 * NPU slot. OpenCAPI functionality could be broken.
+		 */
+		prlog(PR_ERR, "OCAPI: Cannot create PHB slot\n");
+	}
+
+	pci_register_phb(&dev->phb_ocapi, OPAL_DYNAMIC_PHB_ID);
+	return;
+failed:
+	dt_add_property_string(dn_phb, "status", "error");
+	return;
+}
+
 static void npu2_opencapi_probe(struct dt_node *dn)
 {
 	struct dt_node *link;
@@ -730,7 +1274,7 @@  static void npu2_opencapi_probe(struct dt_node *dn)
 	uint64_t reg[2];
 	uint64_t dev_index;
 	struct npu2 *n;
-	int rc;
+	int rc, i = 0;
 
 	path = dt_get_path(dn);
 	gcid = dt_get_chip_id(dn);
@@ -780,6 +1324,11 @@  static void npu2_opencapi_probe(struct dt_node *dn)
 	if (rc)
 		goto failed;
 
+	dt_for_each_compatible(dn, link, "ibm,npu-link-opencapi") {
+		npu2_opencapi_setup_device(link, n, &n->devices[i]);
+		i++;
+	}
+
 	return;
 failed:
 	free(n);
@@ -792,3 +1341,39 @@  void probe_npu2_opencapi(void)
 	dt_for_each_compatible(dt_root, np_npu, "ibm,power9-npu")
 		npu2_opencapi_probe(np_npu);
 }
+
+static const struct phb_ops npu2_opencapi_ops = {
+	.cfg_read8		= npu2_opencapi_pcicfg_read8,
+	.cfg_read16		= npu2_opencapi_pcicfg_read16,
+	.cfg_read32		= npu2_opencapi_pcicfg_read32,
+	.cfg_write8		= npu2_opencapi_pcicfg_write8,
+	.cfg_write16		= npu2_opencapi_pcicfg_write16,
+	.cfg_write32		= npu2_opencapi_pcicfg_write32,
+	.choose_bus		= NULL,
+	.device_init		= NULL,
+	.phb_final_fixup	= npu2_opencapi_final_fixup,
+	.ioda_reset		= NULL,
+	.papr_errinjct_reset	= NULL,
+	.pci_reinit		= NULL,
+	.set_phb_mem_window	= NULL,
+	.phb_mmio_enable	= NULL,
+	.map_pe_mmio_window	= NULL,
+	.map_pe_dma_window	= NULL,
+	.map_pe_dma_window_real	= NULL,
+	.pci_msi_eoi		= NULL,
+	.set_xive_pe		= NULL,
+	.get_msi_32		= NULL,
+	.get_msi_64		= NULL,
+	.set_pe			= npu2_set_pe,
+	.set_peltv		= NULL,
+	.eeh_freeze_status	= npu2_freeze_status,  /* TODO */
+	.eeh_freeze_clear	= NULL,
+	.eeh_freeze_set		= NULL,
+	.next_error		= NULL,
+	.err_inject		= NULL,
+	.get_diag_data		= NULL,
+	.get_diag_data2		= NULL,
+	.set_capi_mode		= NULL,
+	.set_capp_recovery	= NULL,
+	.tce_kill		= NULL,
+};
diff --git a/hw/npu2.c b/hw/npu2.c
index 218ac86..8c76cb1 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -1001,17 +1001,17 @@  static int64_t npu2_map_pe_dma_window(struct phb *phb,
 	return OPAL_SUCCESS;
 }
 
-static int64_t npu2_set_pe(struct phb *phb,
-			   uint64_t pe_num,
-			   uint64_t bdfn,
-			   uint8_t bcompare,
-			   uint8_t dcompare,
-			   uint8_t fcompare,
-			   uint8_t action)
+int64_t npu2_set_pe(struct phb *phb,
+		    uint64_t pe_num,
+		    uint64_t bdfn,
+		    uint8_t bcompare,
+		    uint8_t dcompare,
+		    uint8_t fcompare,
+		    uint8_t action)
 {
 	struct npu2 *p;
 	struct npu2_dev *dev;
-	uint64_t reg, val;
+	uint64_t reg, val, pe_bdfn;
 
 	/* Sanity check */
 	if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
@@ -1036,21 +1036,31 @@  static int64_t npu2_set_pe(struct phb *phb,
 	if (!dev)
 		return OPAL_PARAMETER;
 
-	val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
-	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
-	val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
-
-	if (!NPU2DEV_BRICK(dev))
-		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
-				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
+	if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
+		pe_bdfn = dev->bdfn;
 	else
-		reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
-				      NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
+		pe_bdfn = dev->nvlink.gpu_bdfn;
+
+	if (dev->type == NPU2_DEV_TYPE_NVLINK) {
+		val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
+		val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
+		val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
+
+		if (!NPU2DEV_BRICK(dev))
+			reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+					      NPU2_BLOCK_CTL,
+					      NPU2_CQ_BRICK0_BDF2PE_MAP0);
+		else
+			reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->index/2,
+					      NPU2_BLOCK_CTL,
+					      NPU2_CQ_BRICK1_BDF2PE_MAP0);
+
+		npu2_write(p, reg, val);
+	}
 
-	npu2_write(p, reg, val);
 	val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
 	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
-	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
+	val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, pe_bdfn);
 	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
 			      NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->index * 0x18));
 	p->bdf2pe_cache[dev->index] = val;
@@ -1111,12 +1121,12 @@  static struct pci_slot *npu2_slot_create(struct phb *phb)
 	return slot;
 }
 
-static int64_t npu2_freeze_status(struct phb *phb __unused,
-				  uint64_t pe_number __unused,
-				  uint8_t *freeze_state,
-				  uint16_t *pci_error_type __unused,
-				  uint16_t *severity __unused,
-				  uint64_t *phb_status __unused)
+int64_t npu2_freeze_status(struct phb *phb __unused,
+			   uint64_t pe_number __unused,
+			   uint8_t *freeze_state,
+			   uint16_t *pci_error_type __unused,
+			   uint16_t *severity __unused,
+			   uint64_t *phb_status __unused)
 {
 	/*
 	 * FIXME: When it's called by skiboot PCI config accessor,
diff --git a/include/npu2-regs.h b/include/npu2-regs.h
index 77b1529..876d7f0 100644
--- a/include/npu2-regs.h
+++ b/include/npu2-regs.h
@@ -116,6 +116,7 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_ENABLE_PBUS PPC_BIT(38)
 #define   NPU2_CQ_SM_MISC_CFG0_CONFIG_OCAPI_MODE PPC_BIT(57)
 #define NPU2_CQ_SM_MISC_CFG1			0x008
+#define NPU2_CQ_SM_MISC_CFG2			0x148
 #define NPU2_PB_EPSILON				0x010
 #define NPU2_TIMER_CFG				0x018
 #define NPU2_GPU0_MEM_BAR			0x020
@@ -187,7 +188,11 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OCAPI_MODE PPC_BIT(52)
 #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL0_ENABLE PPC_BIT(55)
 #define   NPU2_CQ_CTL_MISC_CFG_CONFIG_OTL1_ENABLE PPC_BIT(56)
-#define NPU2_CQ_FUTURE_CFG1			0x008
+#define NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG		0x0B0
+#define   NPU2_CQ_CTL_MISC_MMIOPA_ADDR		PPC_BITMASK(1,35)
+#define   NPU2_CQ_CTL_MISC_MMIOPA_SIZE		PPC_BITMASK(39,43)
+#define NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG		0x0B8
+#define NPU2_CQ_CTL_MISC_CFG1			0x008
 #define NPU2_CQ_FUTURE_CFG2			0x010
 #define NPU2_CQ_FUTURE_CFG3			0x018
 #define NPU2_CQ_PERF_MATCH			0x020
@@ -219,6 +224,15 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_CQ_C_ERR_RPT_MASK1			0x0E8
 #define NPU2_CQ_C_ERR_RPT_HOLD0			0x0F0
 #define NPU2_CQ_C_ERR_RPT_HOLD1			0x0F8
+#define NPU2_CQ_CTL_CONFIG_ADDR0		0x120
+#define NPU2_CQ_CTL_CONFIG_ADDR1		0x128
+#define   NPU2_CQ_CTL_CONFIG_ADDR_ENABLE	PPC_BIT(0)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_STATUS	PPC_BITMASK(1, 3)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_BUS_NUMBER	PPC_BITMASK(4, 11)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_DEVICE_NUMBER PPC_BITMASK(12, 16)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_FUNCTION_NUMBER PPC_BITMASK(17, 19)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_REGISTER_NUMBER PPC_BITMASK(20, 31)
+#define   NPU2_CQ_CTL_CONFIG_ADDR_TYPE		PPC_BIT(32)
 #define NPU2_CQ_CTL_FENCE_CONTROL_0		0x140
 #define NPU2_CQ_CTL_FENCE_CONTROL_1		0x148
 #define   NPU2_CQ_CTL_FENCE_CONTROL_REQUEST_FENCE PPC_BITMASK(0, 1)
@@ -331,9 +345,16 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define   NPU2_OTL_TLX_CREDITS_VC3_CREDITS	PPC_BITMASK(24, 31)
 #define   NPU2_OTL_TLX_CREDITS_DCP0_CREDITS	PPC_BITMASK(32, 39)
 #define   NPU2_OTL_TLX_CREDITS_DCP1_CREDITS	PPC_BITMASK(56, 63)
+#define NPU2_OTL_VC_CREDITS(stack, block)	NPU2_REG_OFFSET(stack, block, 0x090)
 #define NPU2_OTL_CONFIG1(stack, block)		NPU2_REG_OFFSET(stack, block, 0x058)
+#define   NPU2_OTL_CONFIG1_TX_TEMP1_EN		PPC_BIT(1)
+#define   NPU2_OTL_CONFIG1_TX_TEMP2_EN		PPC_BIT(2)
+#define   NPU2_OTL_CONFIG1_TX_TEMP3_EN		PPC_BIT(3)
 #define   NPU2_OTL_CONFIG1_TX_DRDY_WAIT		PPC_BITMASK(5, 7)
 #define   NPU2_OTL_CONFIG1_TX_TEMP0_RATE	PPC_BITMASK(8, 11)
+#define   NPU2_OTL_CONFIG1_TX_TEMP1_RATE	PPC_BITMASK(12, 15)
+#define   NPU2_OTL_CONFIG1_TX_TEMP2_RATE	PPC_BITMASK(16, 19)
+#define   NPU2_OTL_CONFIG1_TX_TEMP3_RATE	PPC_BITMASK(20, 23)
 #define   NPU2_OTL_CONFIG1_TX_CRET_FREQ 	PPC_BITMASK(32, 34)
 #define   NPU2_OTL_CONFIG1_TX_AGE_FREQ		PPC_BITMASK(35, 39)
 #define   NPU2_OTL_CONFIG1_TX_RS2_HPWAIT	PPC_BITMASK(40, 45)
@@ -342,6 +363,15 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define   NPU2_OTL_CONFIG1_TX_CBUF_ECC_DIS	PPC_BIT(58)
 #define   NPU2_OTL_CONFIG1_TX_STOP_LINK		PPC_BIT(59)
 #define   NPU2_OTL_CONFIG1_TX_STOP_ON_UE	PPC_BIT(60)
+#define   NPU2_OTL_CONFIG1_TX_T0_MASK_CRTN0	PPC_BIT(61)
+#define   NPU2_OTL_CONFIG1_TX_T123_MASK_CRTN0	PPC_BIT(62)
+#define NPU2_OTL_CONFIG2(stack, block)		NPU2_REG_OFFSET(stack, block, 0x0C0)
+#define   NPU2_OTL_CONFIG2_TX_SEND_EN		PPC_BIT(0)
+
+#define NPU2_OTL_OSL_DSISR(stack, block)	NPU2_REG_OFFSET(stack, block, 0x000)
+#define NPU2_OTL_OSL_DAR(stack, block)		NPU2_REG_OFFSET(stack, block, 0x008)
+#define NPU2_OTL_OSL_TFC(stack, block)		NPU2_REG_OFFSET(stack, block, 0x010)
+#define NPU2_OTL_OSL_PEHANDLE(stack, block)	NPU2_REG_OFFSET(stack, block, 0x018)
 
 /* Misc block registers. Unlike the SM/CTL/DAT/NTL registers above
  * there is only a single instance of each of these in the NPU so we
@@ -435,6 +465,7 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define NPU2_MISC_IRQ_LOG13			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x368)
 #define NPU2_MISC_IRQ_LOG14			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x370)
 #define NPU2_MISC_IRQ_LOG15			NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x378)
+#define NPU2_MISC_FENCE_ON_ERROR_EN_FIR2	NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x400)
 #define NPU2_MISC_IRQ_ON_ERROR_EN_FIR2		NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x408)
 
 
@@ -566,10 +597,33 @@  void npu2_scom_write(uint64_t gcid, uint64_t scom_base,
 #define    PU_IOE_PB_FP_CFG_FP1_FMR_DISABLE	PPC_BIT(52)
 #define    PU_IOE_PB_FP_CFG_FP1_PRS_DISABLE	PPC_BIT(57)
 
+#define OB0_ODL0_CONFIG				0x901082A
+#define OB0_ODL1_CONFIG				0x901082B
+#define OB3_ODL0_CONFIG				0xC01082A
+#define OB3_ODL1_CONFIG				0xC01082B
+#define   OB_ODL_CONFIG_RESET			PPC_BIT(0)
+#define   OB_ODL_CONFIG_VERSION			PPC_BITMASK(2, 7)
+#define   OB_ODL_CONFIG_TRAIN_MODE		PPC_BITMASK(8, 11)
+#define   OB_ODL_CONFIG_SUPPORTED_MODES		PPC_BITMASK(12, 15)
+#define   OB_ODL_CONFIG_X4_BACKOFF_ENABLE	PPC_BIT(16)
+#define   OB_ODL_CONFIG_PHY_CNTR_LIMIT		PPC_BITMASK(20, 23)
+#define   OB_ODL_CONFIG_DEBUG_ENABLE		PPC_BIT(33)
+#define   OB_ODL_CONFIG_FWD_PROGRESS_TIMER	PPC_BITMASK(40, 43)
+
+#define OB0_ODL0_STATUS				0x901082C
+#define OB0_ODL1_STATUS				0x901082D
+#define OB3_ODL0_STATUS				0xC01082C
+#define OB3_ODL1_STATUS				0xC01082D
+#define   OB_ODL_STATUS_TRAINED_MODE		PPC_BITMASK(0,3)
+#define   OB_ODL_STATUS_TRAINING_STATE_MACHINE	PPC_BITMASK(49, 51)
+
 #define OB0_ODL0_TRAINING_STATUS		0x901082E
 #define OB0_ODL1_TRAINING_STATUS		0x901082F
 #define OB3_ODL0_TRAINING_STATUS		0xC01082E
 #define OB3_ODL1_TRAINING_STATUS		0xC01082F
 #define   OB_ODL_TRAINING_STATUS_STS_RX_PATTERN_B PPC_BITMASK(8, 15)
 
+#define PERV_ROOT_CTRL2				0x0050012
+#define   PERV_ROOT_CTRL2_TP_IO_VSB_OP0A_V1P8_EN PPC_BIT(24)
+
 #endif /* __NPU2_REGS_H */
diff --git a/include/npu2.h b/include/npu2.h
index 1f46bca..e0554d7 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -202,5 +202,17 @@  extern int nv_zcal_nominal;
 bool is_p9dd1(void);
 void npu2_opencapi_phy_setup(struct npu2_dev *dev);
 void npu2_opencapi_bump_ui_lane(struct npu2_dev *dev);
-
+int64_t npu2_set_pe(struct phb *phb,
+		    uint64_t pe_num,
+		    uint64_t bdfn,
+		    uint8_t bcompare,
+		    uint8_t dcompare,
+		    uint8_t fcompare,
+		    uint8_t action);
+int64_t npu2_freeze_status(struct phb *phb __unused,
+			   uint64_t pe_number __unused,
+			   uint8_t *freeze_state,
+			   uint16_t *pci_error_type __unused,
+			   uint16_t *severity __unused,
+			   uint64_t *phb_status __unused);
 #endif /* __NPU2_H */