Message ID | b8f8ff5b70b78d2a3a1f5cf6f4e26af9bce2df36.1502851015.git.sam.bobroff@au1.ibm.com |
---|---|
State | RFC |
Headers | show |
Hi Sam, This looks pretty good except for one minor simplification below. I also can't help but think it would be better if we split the patch up into three - one to enable the IRQs, one to add the vendor space misc_irq_request stuff and another with NPU->NPU2 name changes to keep the history clean. Thanks! Regards, Alistair On Wed, 16 Aug 2017 12:37:40 PM Sam Bobroff wrote: > Allow the NPU2 to trigger "recoverable data link" interrupts > and provide a way to test them via a new vendor capability. > > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com> > --- > > +static void npu2_add_interrupt_map(struct npu2 *p, > + struct dt_node *dn) > +{ > + struct dt_node *npu2_dn, *link, *phb_dn; > + uint32_t npu2_phandle, index = 0, i; > + uint32_t icsp = get_ics_phandle(); > + uint32_t *map; > + size_t map_size; > + uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7}; > + int nlinks = 0; > + > + npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq"); > + assert((npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle))); > + assert((phb_dn = p->phb.dt_node)); > + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { > + nlinks++; > + } You could just use p->total_devices here. > + map_size = 7 * sizeof(*map) * nlinks; > + map = malloc(map_size); > + index = 0; > + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { > + i = index * 7; > + map[i + 0] = (p->devices[index].bdfn << 8); > + map[i + 1] = 0; > + map[i + 2] = 0; > + > + map[i + 3] = 1; /* INT A */ > + map[i + 4] = icsp; /* interrupt-parent */ > + map[i + 5] = p->base_lsi + (index * 2) + 1; /* NDL No-Stall Event */ > + map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL. */ > + index++; > + } > + dt_add_property(phb_dn, "interrupt-map", map, map_size); > + free(map); > + dt_add_property(phb_dn, "interrupt-map-mask", mask, sizeof(mask)); > +} > + > static void npu2_add_phb_properties(struct npu2 *p) > { > struct dt_node *np = p->phb.dt_node; > @@ -1617,7 +1682,7 @@ static void npu2_add_phb_properties(struct npu2 *p) > dt_add_property_cells(np, "clock-frequency", 0x200, 0); > dt_add_property_cells(np, "interrupt-parent", icsp); > > - /* NPU PHB properties */ > + /* NPU2 PHB properties */ > dt_add_property_cells(np, "ibm,opal-num-pes", > NPU2_MAX_PE_NUM); > dt_add_property_cells(np, "ibm,opal-reserved-pe", > @@ -1641,6 +1706,77 @@ static void npu2_add_phb_properties(struct npu2 *p) > hi32(mm_size), lo32(mm_size)); > } > > +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused) > +{ > + return IRQ_ATTR_TARGET_LINUX; > +} > + > +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn) > +{ > + struct npu2 *p = is->data; > + uint32_t idx = isn - p->base_lsi; > + const char *name; > + > + switch (idx) { > + case 0: name = "NDL 0 Stall Event (brick 0)"; break; > + case 1: name = "NDL 0 No-Stall Event (brick 0)"; break; > + case 2: name = "NDL 1 Stall Event (brick 1)"; break; > + case 3: name = "NDL 1 No-Stall Event (brick 1)"; break; > + case 4: name = "NDL 2 Stall Event (brick 2)"; break; > + case 5: name = "NDL 2 No-Stall Event (brick 2)"; break; > + case 6: name = "NDL 5 Stall Event (brick 3)"; break; > + case 7: name = "NDL 5 No-Stall Event (brick 3)"; break; > + case 8: name = "NDL 4 Stall Event (brick 4)"; break; > + case 9: name = "NDL 4 No-Stall Event (brick 4)"; break; > + case 10: name = "NDL 3 Stall Event (brick 5)"; break; > + case 11: name = "NDL 3 No-Stall Event (brick 5)"; break; > + case 12: name = "NTL 0 Event"; break; > + case 13: name = "NTL 1 Event"; break; > + case 14: name = "NTL 2 Event"; break; > + case 15: name = "NTL 3 Event"; break; > + case 16: name = "NTL 4 Event"; break; > + case 17: name = "NTL 5 Event"; break; > + case 18: name = "TCE Event"; break; > + case 19: name = "ATS Event"; break; > + case 20: name = "CQ Event"; break; > + case 21: name = "MISC Event"; break; > + case 22: name = "NMMU Local Xstop"; break; > + default: name = "Unknown"; > + } > + return strdup(name); > +} > + > +static const struct irq_source_ops npu2_ipi_ops = { > + .attributes = npu2_ipi_attributes, > + .name = npu2_ipi_name, > +}; > + > +static void npu2_setup_irqs(struct npu2 *p) > +{ > + uint64_t reg, val; > + void *tp; > + > + p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN); > + if (p->base_lsi == XIVE_IRQ_ERROR) { > + prlog(PR_ERR, "NPU2: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n"); > + return; > + } > + xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops ); > + > + /* Set IPI configuration */ > + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG); > + val = npu2_read(p, reg); > + val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K); > + val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX); > + npu2_write(p, reg, val); > + > + /* Set IRQ base */ > + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE); > + tp = xive_get_trigger_port(p->base_lsi); > + val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT; > + npu2_write(p, reg, val); > +} > + > static void npu2_create_phb(struct dt_node *dn) > { > const struct dt_property *prop; > @@ -1678,7 +1814,9 @@ static void npu2_create_phb(struct dt_node *dn) > list_head_init(&p->phb.devices); > list_head_init(&p->phb.virt_devices); > > + npu2_setup_irqs(p); > npu2_populate_devices(p, dn); > + npu2_add_interrupt_map(p, dn); > npu2_add_phb_properties(p); > > slot = npu2_slot_create(&p->phb); > @@ -1687,9 +1825,9 @@ static void npu2_create_phb(struct dt_node *dn) > /** > * @fwts-label NPUCannotCreatePHBSlot > * @fwts-advice Firmware probably ran out of memory creating > - * NPU slot. NVLink functionality could be broken. > + * NPU2 slot. NVLink functionality could be broken. > */ > - prlog(PR_ERR, "NPU: Cannot create PHB slot\n"); > + prlog(PR_ERR, "NPU2: Cannot create PHB slot\n"); > } > > pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID); > @@ -1710,7 +1848,7 @@ void probe_npu2(void) > prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal); > } > > - /* Scan NPU XSCOM nodes */ > + /* Scan NPU2 XSCOM nodes */ > dt_for_each_compatible(dt_root, np, "ibm,power9-npu") > npu2_probe_phb(np); > > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > index 86e2658a..759404cc 100644 > --- a/include/npu2-regs.h > +++ b/include/npu2-regs.h > @@ -286,6 +286,16 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask); > #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020) > #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028) > #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030) > +#define NPU2_MISC_CFG_CONFIG_SYNC_WAIT PPC_BITMASK(0,4) > +#define NPU2_MISC_CFG_PERF_CONFIG_ENABLE PPC_BIT(5) > +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MASK PPC_BIT(6) > +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MATCH PPC_BITMASK(7,10) > +#define NPU2_MISC_CFG_IPI_PS PPC_BIT(11) > +#define NPU2_MISC_CFG_IPI_PS_4K 0 > +#define NPU2_MISC_CFG_IPI_PS_64K 1 > +#define NPU2_MISC_CFG_IPI_OS PPC_BIT(12) > +#define NPU2_MISC_CFG_IPI_OS_AIX 0 > +#define NPU2_MISC_CFG_IPI_OS_LINUX 1 > #define NPU2_MISC_INHIBIT_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x038) > #define NPU2_MISC_FREEZE_ENABLE0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x040) > #define NPU2_MISC_FREEZE_ENABLE1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x048) >
On Thu, Aug 17, 2017 at 05:03:08PM +1000, Alistair Popple wrote: > Hi Sam, > > This looks pretty good except for one minor simplification below. I also can't > help but think it would be better if we split the patch up into three - one to > enable the IRQs, one to add the vendor space misc_irq_request stuff and another > with NPU->NPU2 name changes to keep the history clean. OK, will do. I'll repost ASAP. > Thanks! > > Regards, > > Alistair > > On Wed, 16 Aug 2017 12:37:40 PM Sam Bobroff wrote: > > Allow the NPU2 to trigger "recoverable data link" interrupts > > and provide a way to test them via a new vendor capability. > > > > Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com> > > --- > > > > > +static void npu2_add_interrupt_map(struct npu2 *p, > > + struct dt_node *dn) > > +{ > > + struct dt_node *npu2_dn, *link, *phb_dn; > > + uint32_t npu2_phandle, index = 0, i; > > + uint32_t icsp = get_ics_phandle(); > > + uint32_t *map; > > + size_t map_size; > > + uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7}; > > + int nlinks = 0; > > + > > + npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq"); > > + assert((npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle))); > > + assert((phb_dn = p->phb.dt_node)); > > + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { > > + nlinks++; > > + } > > You could just use p->total_devices here. Ah thanks :-) > > + map_size = 7 * sizeof(*map) * nlinks; > > + map = malloc(map_size); > > + index = 0; > > + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { > > + i = index * 7; > > + map[i + 0] = (p->devices[index].bdfn << 8); > > + map[i + 1] = 0; > > + map[i + 2] = 0; > > + > > + map[i + 3] = 1; /* INT A */ > > + map[i + 4] = icsp; /* interrupt-parent */ > > + map[i + 5] = p->base_lsi + (index * 2) + 1; /* NDL No-Stall Event */ > > + map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL. */ > > + index++; > > + } > > + dt_add_property(phb_dn, "interrupt-map", map, map_size); > > + free(map); > > + dt_add_property(phb_dn, "interrupt-map-mask", mask, sizeof(mask)); > > +} > > + > > static void npu2_add_phb_properties(struct npu2 *p) > > { > > struct dt_node *np = p->phb.dt_node; > > @@ -1617,7 +1682,7 @@ static void npu2_add_phb_properties(struct npu2 *p) > > dt_add_property_cells(np, "clock-frequency", 0x200, 0); > > dt_add_property_cells(np, "interrupt-parent", icsp); > > > > - /* NPU PHB properties */ > > + /* NPU2 PHB properties */ > > dt_add_property_cells(np, "ibm,opal-num-pes", > > NPU2_MAX_PE_NUM); > > dt_add_property_cells(np, "ibm,opal-reserved-pe", > > @@ -1641,6 +1706,77 @@ static void npu2_add_phb_properties(struct npu2 *p) > > hi32(mm_size), lo32(mm_size)); > > } > > > > +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused) > > +{ > > + return IRQ_ATTR_TARGET_LINUX; > > +} > > + > > +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn) > > +{ > > + struct npu2 *p = is->data; > > + uint32_t idx = isn - p->base_lsi; > > + const char *name; > > + > > + switch (idx) { > > + case 0: name = "NDL 0 Stall Event (brick 0)"; break; > > + case 1: name = "NDL 0 No-Stall Event (brick 0)"; break; > > + case 2: name = "NDL 1 Stall Event (brick 1)"; break; > > + case 3: name = "NDL 1 No-Stall Event (brick 1)"; break; > > + case 4: name = "NDL 2 Stall Event (brick 2)"; break; > > + case 5: name = "NDL 2 No-Stall Event (brick 2)"; break; > > + case 6: name = "NDL 5 Stall Event (brick 3)"; break; > > + case 7: name = "NDL 5 No-Stall Event (brick 3)"; break; > > + case 8: name = "NDL 4 Stall Event (brick 4)"; break; > > + case 9: name = "NDL 4 No-Stall Event (brick 4)"; break; > > + case 10: name = "NDL 3 Stall Event (brick 5)"; break; > > + case 11: name = "NDL 3 No-Stall Event (brick 5)"; break; > > + case 12: name = "NTL 0 Event"; break; > > + case 13: name = "NTL 1 Event"; break; > > + case 14: name = "NTL 2 Event"; break; > > + case 15: name = "NTL 3 Event"; break; > > + case 16: name = "NTL 4 Event"; break; > > + case 17: name = "NTL 5 Event"; break; > > + case 18: name = "TCE Event"; break; > > + case 19: name = "ATS Event"; break; > > + case 20: name = "CQ Event"; break; > > + case 21: name = "MISC Event"; break; > > + case 22: name = "NMMU Local Xstop"; break; > > + default: name = "Unknown"; > > + } > > + return strdup(name); > > +} > > + > > +static const struct irq_source_ops npu2_ipi_ops = { > > + .attributes = npu2_ipi_attributes, > > + .name = npu2_ipi_name, > > +}; > > + > > +static void npu2_setup_irqs(struct npu2 *p) > > +{ > > + uint64_t reg, val; > > + void *tp; > > + > > + p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN); > > + if (p->base_lsi == XIVE_IRQ_ERROR) { > > + prlog(PR_ERR, "NPU2: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n"); > > + return; > > + } > > + xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops ); > > + > > + /* Set IPI configuration */ > > + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG); > > + val = npu2_read(p, reg); > > + val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K); > > + val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX); > > + npu2_write(p, reg, val); > > + > > + /* Set IRQ base */ > > + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE); > > + tp = xive_get_trigger_port(p->base_lsi); > > + val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT; > > + npu2_write(p, reg, val); > > +} > > + > > static void npu2_create_phb(struct dt_node *dn) > > { > > const struct dt_property *prop; > > @@ -1678,7 +1814,9 @@ static void npu2_create_phb(struct dt_node *dn) > > list_head_init(&p->phb.devices); > > list_head_init(&p->phb.virt_devices); > > > > + npu2_setup_irqs(p); > > npu2_populate_devices(p, dn); > > + npu2_add_interrupt_map(p, dn); > > npu2_add_phb_properties(p); > > > > slot = npu2_slot_create(&p->phb); > > @@ -1687,9 +1825,9 @@ static void npu2_create_phb(struct dt_node *dn) > > /** > > * @fwts-label NPUCannotCreatePHBSlot > > * @fwts-advice Firmware probably ran out of memory creating > > - * NPU slot. NVLink functionality could be broken. > > + * NPU2 slot. NVLink functionality could be broken. > > */ > > - prlog(PR_ERR, "NPU: Cannot create PHB slot\n"); > > + prlog(PR_ERR, "NPU2: Cannot create PHB slot\n"); > > } > > > > pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID); > > @@ -1710,7 +1848,7 @@ void probe_npu2(void) > > prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal); > > } > > > > - /* Scan NPU XSCOM nodes */ > > + /* Scan NPU2 XSCOM nodes */ > > dt_for_each_compatible(dt_root, np, "ibm,power9-npu") > > npu2_probe_phb(np); > > > > diff --git a/include/npu2-regs.h b/include/npu2-regs.h > > index 86e2658a..759404cc 100644 > > --- a/include/npu2-regs.h > > +++ b/include/npu2-regs.h > > @@ -286,6 +286,16 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask); > > #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020) > > #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028) > > #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030) > > +#define NPU2_MISC_CFG_CONFIG_SYNC_WAIT PPC_BITMASK(0,4) > > +#define NPU2_MISC_CFG_PERF_CONFIG_ENABLE PPC_BIT(5) > > +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MASK PPC_BIT(6) > > +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MATCH PPC_BITMASK(7,10) > > +#define NPU2_MISC_CFG_IPI_PS PPC_BIT(11) > > +#define NPU2_MISC_CFG_IPI_PS_4K 0 > > +#define NPU2_MISC_CFG_IPI_PS_64K 1 > > +#define NPU2_MISC_CFG_IPI_OS PPC_BIT(12) > > +#define NPU2_MISC_CFG_IPI_OS_AIX 0 > > +#define NPU2_MISC_CFG_IPI_OS_LINUX 1 > > #define NPU2_MISC_INHIBIT_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x038) > > #define NPU2_MISC_FREEZE_ENABLE0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x040) > > #define NPU2_MISC_FREEZE_ENABLE1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x048) > >
diff --git a/hw/npu2.c b/hw/npu2.c index 74e33255..8f265525 100644 --- a/hw/npu2.c +++ b/hw/npu2.c @@ -36,6 +36,17 @@ #include <chip.h> #include <phys-map.h> #include <nvram.h> +#include <xive.h> + +#define NPU2_IRQ_BASE_SHIFT 13 +#define NPU2_N_DL_IRQS 23 +#define NPU2_N_DL_IRQS_ALIGN 32 + +#define VENDOR_CAP_START 0x80 +#define VENDOR_CAP_END 0x90 +#define VENDOR_CAP_LEN 0x10 +#define VENDOR_CAP_VERSION 0x01 +#define VENDOR_CAP_PCI_DEV_OFFSET 0x0d /* * NPU2 BAR layout definition. We have 3 stacks and each of them @@ -53,11 +64,6 @@ * configure one particular BAR. */ -#define VENDOR_CAP_START 0x80 -#define VENDOR_CAP_END 0x90 - -#define VENDOR_CAP_PCI_DEV_OFFSET 0x0d - static bool is_p9dd1(void) { struct proc_chip *chip = next_chip(NULL); @@ -70,7 +76,7 @@ static bool is_p9dd1(void) /* * We use the indirect method because it uses the same addresses as - * the MMIO offsets (NPU RING) + * the MMIO offsets (NPU2 RING) */ static void npu2_scom_set_addr(uint64_t gcid, uint64_t scom_base, uint64_t addr, uint64_t size) @@ -451,7 +457,7 @@ static int __npu2_dev_bind_pci_dev(struct phb *phb __unused, pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label"); - prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n", + prlog(PR_DEBUG, "NPU2: comparing GPU '%s' and NPU2 '%s'\n", pcislot, dev->slot_label); if (streq(pcislot, dev->slot_label)) @@ -485,7 +491,7 @@ static void npu2_dev_bind_pci_dev(struct npu2_dev *dev) } } - prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n", + prlog(PR_INFO, "%s: No PCI device for NPU2 device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n", __func__, dev->npu->phb.opal_id, dev->index); } @@ -683,15 +689,15 @@ static int npu2_dn_fixup(struct phb *phb, npu2_dn_fixup_gmb(pd->dn, dev); dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle); - /* NPU devices require a slot location to associate with GPUs */ + /* NPU2 devices require a slot location to associate with GPUs */ dev->slot_label = dt_prop_get_def(pd->dn, "ibm,slot-label", NULL); if (!dev->slot_label) { /** * @fwts-label NPUNoPHBSlotLabel - * @fwts-advice No GPU/NPU slot information was found. + * @fwts-advice No GPU/NPU2 slot information was found. * NVLink2 functionality will not work. */ - prlog(PR_ERR, "NPU: Cannot find GPU slot information\n"); + prlog(PR_ERR, "NPU2: Cannot find GPU slot information\n"); return 0; } @@ -1215,8 +1221,8 @@ static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint } /* - *Probe NPU device node and create PCI root device node - * accordingly. The NPU device node should specify number + * Probe NPU2 device node and create PCI root device node + * accordingly. The NPU2 deivce node should specify number * of links and xscom base address to access links. */ static void npu2_probe_phb(struct dt_node *dn) @@ -1275,7 +1281,7 @@ static void npu2_probe_phb(struct dt_node *dn) index = dt_prop_get_u32(dn, "ibm,npu-index"); phb_index = dt_prop_get_u32(dn, "ibm,phb-index"); links = dt_prop_get_u32(dn, "ibm,npu-links"); - prlog(PR_INFO, "Chip %d Found NPU%d (%d links) at %s\n", + prlog(PR_INFO, "NPU2: Chip %d Found NPU2#%d (%d links) at %s\n", gcid, index, links, path); free(path); @@ -1389,23 +1395,39 @@ static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev, return start + PCICAP_EXP_SCTL2 + 8; } +static int64_t npu2_misc_irq_request(void *dev, struct pci_cfg_reg_filter *pcrf __unused, + uint32_t offset __unused, uint32_t len __unused, uint32_t *data, + bool write) +{ + struct pci_virt_device *pvd = dev; + struct npu2_dev *ndev = pvd->data; + struct npu2 *npu2 = ndev->npu; + + uint32_t idx = (ndev->index * 2) + 1; + uint64_t irq_bit = 1ULL << (63 - idx); + uint64_t reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_REQUEST); + + if (write) + npu2_write(npu2, reg, (*data ? irq_bit : 0)); + else + *data = !!(npu2_read(npu2, reg) & irq_bit); + return OPAL_SUCCESS; +} + static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev, uint32_t start, uint32_t prev_cap) { struct pci_virt_device *pvd = dev->pvd; -#define NPU2_VENDOR_CAP_VERSION 0x00 -#define NPU2_VENDOR_CAP_LEN 0x10 - /* Capbility list */ PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start); PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR); dev->vendor_cap = start; /* Length and version */ - PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, NPU2_VENDOR_CAP_LEN); - PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, NPU2_VENDOR_CAP_VERSION); + PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN); + PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, VENDOR_CAP_VERSION); /* * Defaults when the trap can't handle the read/write (eg. due @@ -1423,7 +1445,16 @@ static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev, /* Link index */ PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->index); - return start + NPU2_VENDOR_CAP_LEN; + /* Note: VENDOR_CAP_PCI_DEV_OFFSET is next at 0x0d + * but it is setup later. */ + + /* Allow triggering of interrupts (MISC_IRQ_REQUEST) by a write to config + * space: */ + pci_virt_add_filter(pvd, start + 0xe, 1, + PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE, + npu2_misc_irq_request, NULL); + + return start + VENDOR_CAP_LEN; } static void npu2_populate_cfg(struct npu2_dev *dev) @@ -1497,10 +1528,7 @@ static void npu2_populate_cfg(struct npu2_dev *dev) PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000); /* 0x3c - INT line/pin/Minimal grant/Maximal latency */ - if (!NPU2DEV_BRICK(dev)) - PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); - else - PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000200); + PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); /* INT A */ /* PCIE and vendor specific capability */ pos = npu2_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP); @@ -1597,6 +1625,43 @@ static void npu2_populate_devices(struct npu2 *p, } } +static void npu2_add_interrupt_map(struct npu2 *p, + struct dt_node *dn) +{ + struct dt_node *npu2_dn, *link, *phb_dn; + uint32_t npu2_phandle, index = 0, i; + uint32_t icsp = get_ics_phandle(); + uint32_t *map; + size_t map_size; + uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7}; + int nlinks = 0; + + npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq"); + assert((npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle))); + assert((phb_dn = p->phb.dt_node)); + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { + nlinks++; + } + map_size = 7 * sizeof(*map) * nlinks; + map = malloc(map_size); + index = 0; + dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") { + i = index * 7; + map[i + 0] = (p->devices[index].bdfn << 8); + map[i + 1] = 0; + map[i + 2] = 0; + + map[i + 3] = 1; /* INT A */ + map[i + 4] = icsp; /* interrupt-parent */ + map[i + 5] = p->base_lsi + (index * 2) + 1; /* NDL No-Stall Event */ + map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL. */ + index++; + } + dt_add_property(phb_dn, "interrupt-map", map, map_size); + free(map); + dt_add_property(phb_dn, "interrupt-map-mask", mask, sizeof(mask)); +} + static void npu2_add_phb_properties(struct npu2 *p) { struct dt_node *np = p->phb.dt_node; @@ -1617,7 +1682,7 @@ static void npu2_add_phb_properties(struct npu2 *p) dt_add_property_cells(np, "clock-frequency", 0x200, 0); dt_add_property_cells(np, "interrupt-parent", icsp); - /* NPU PHB properties */ + /* NPU2 PHB properties */ dt_add_property_cells(np, "ibm,opal-num-pes", NPU2_MAX_PE_NUM); dt_add_property_cells(np, "ibm,opal-reserved-pe", @@ -1641,6 +1706,77 @@ static void npu2_add_phb_properties(struct npu2 *p) hi32(mm_size), lo32(mm_size)); } +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused) +{ + return IRQ_ATTR_TARGET_LINUX; +} + +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn) +{ + struct npu2 *p = is->data; + uint32_t idx = isn - p->base_lsi; + const char *name; + + switch (idx) { + case 0: name = "NDL 0 Stall Event (brick 0)"; break; + case 1: name = "NDL 0 No-Stall Event (brick 0)"; break; + case 2: name = "NDL 1 Stall Event (brick 1)"; break; + case 3: name = "NDL 1 No-Stall Event (brick 1)"; break; + case 4: name = "NDL 2 Stall Event (brick 2)"; break; + case 5: name = "NDL 2 No-Stall Event (brick 2)"; break; + case 6: name = "NDL 5 Stall Event (brick 3)"; break; + case 7: name = "NDL 5 No-Stall Event (brick 3)"; break; + case 8: name = "NDL 4 Stall Event (brick 4)"; break; + case 9: name = "NDL 4 No-Stall Event (brick 4)"; break; + case 10: name = "NDL 3 Stall Event (brick 5)"; break; + case 11: name = "NDL 3 No-Stall Event (brick 5)"; break; + case 12: name = "NTL 0 Event"; break; + case 13: name = "NTL 1 Event"; break; + case 14: name = "NTL 2 Event"; break; + case 15: name = "NTL 3 Event"; break; + case 16: name = "NTL 4 Event"; break; + case 17: name = "NTL 5 Event"; break; + case 18: name = "TCE Event"; break; + case 19: name = "ATS Event"; break; + case 20: name = "CQ Event"; break; + case 21: name = "MISC Event"; break; + case 22: name = "NMMU Local Xstop"; break; + default: name = "Unknown"; + } + return strdup(name); +} + +static const struct irq_source_ops npu2_ipi_ops = { + .attributes = npu2_ipi_attributes, + .name = npu2_ipi_name, +}; + +static void npu2_setup_irqs(struct npu2 *p) +{ + uint64_t reg, val; + void *tp; + + p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN); + if (p->base_lsi == XIVE_IRQ_ERROR) { + prlog(PR_ERR, "NPU2: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n"); + return; + } + xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops ); + + /* Set IPI configuration */ + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG); + val = npu2_read(p, reg); + val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K); + val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX); + npu2_write(p, reg, val); + + /* Set IRQ base */ + reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE); + tp = xive_get_trigger_port(p->base_lsi); + val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT; + npu2_write(p, reg, val); +} + static void npu2_create_phb(struct dt_node *dn) { const struct dt_property *prop; @@ -1678,7 +1814,9 @@ static void npu2_create_phb(struct dt_node *dn) list_head_init(&p->phb.devices); list_head_init(&p->phb.virt_devices); + npu2_setup_irqs(p); npu2_populate_devices(p, dn); + npu2_add_interrupt_map(p, dn); npu2_add_phb_properties(p); slot = npu2_slot_create(&p->phb); @@ -1687,9 +1825,9 @@ static void npu2_create_phb(struct dt_node *dn) /** * @fwts-label NPUCannotCreatePHBSlot * @fwts-advice Firmware probably ran out of memory creating - * NPU slot. NVLink functionality could be broken. + * NPU2 slot. NVLink functionality could be broken. */ - prlog(PR_ERR, "NPU: Cannot create PHB slot\n"); + prlog(PR_ERR, "NPU2: Cannot create PHB slot\n"); } pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID); @@ -1710,7 +1848,7 @@ void probe_npu2(void) prlog(PR_WARNING, "NPU2: Using ZCAL impedance override = %d\n", nv_zcal_nominal); } - /* Scan NPU XSCOM nodes */ + /* Scan NPU2 XSCOM nodes */ dt_for_each_compatible(dt_root, np, "ibm,power9-npu") npu2_probe_phb(np); diff --git a/include/npu2-regs.h b/include/npu2-regs.h index 86e2658a..759404cc 100644 --- a/include/npu2-regs.h +++ b/include/npu2-regs.h @@ -286,6 +286,16 @@ void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask); #define NPU2_MISC_ERR_RPT_HOLD NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x020) #define NPU2_MISC_ERR_RPT_MASK NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x028) #define NPU2_MISC_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x030) +#define NPU2_MISC_CFG_CONFIG_SYNC_WAIT PPC_BITMASK(0,4) +#define NPU2_MISC_CFG_PERF_CONFIG_ENABLE PPC_BIT(5) +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MASK PPC_BIT(6) +#define NPU2_MISC_CFG_PERF_CONFIG_PE_MATCH PPC_BITMASK(7,10) +#define NPU2_MISC_CFG_IPI_PS PPC_BIT(11) +#define NPU2_MISC_CFG_IPI_PS_4K 0 +#define NPU2_MISC_CFG_IPI_PS_64K 1 +#define NPU2_MISC_CFG_IPI_OS PPC_BIT(12) +#define NPU2_MISC_CFG_IPI_OS_AIX 0 +#define NPU2_MISC_CFG_IPI_OS_LINUX 1 #define NPU2_MISC_INHIBIT_CFG NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x038) #define NPU2_MISC_FREEZE_ENABLE0 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x040) #define NPU2_MISC_FREEZE_ENABLE1 NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 0x048)
Allow the NPU2 to trigger "recoverable data link" interrupts and provide a way to test them via a new vendor capability. Signed-off-by: Sam Bobroff <sam.bobroff@au1.ibm.com> --- hw/npu2.c | 194 ++++++++++++++++++++++++++++++++++++++++++++-------- include/npu2-regs.h | 10 +++ 2 files changed, 176 insertions(+), 28 deletions(-)