diff mbox series

[v2,08/15] hw/npu2: Make IRQ setup code common

Message ID 73462a672811115afadcd984d921dda6fd71b91e.1547168645.git-series.andrew.donnellan@au1.ibm.com
State Changes Requested
Headers show
Series Support OpenCAPI and NVLink devices on same NPU on Witherspoon | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning master/apply_patch Patch failed to apply
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Andrew Donnellan Jan. 11, 2019, 1:09 a.m. UTC
From: Frederic Barrat <fbarrat@linux.ibm.com>

The NPU IRQ setup code is currently duplicated between NVLink and
OpenCAPI.

Apart from the NPU2_MISC_IRQ_ENABLE2 register, the setup of IRQs is
basically identical between NVLink and OpenCAPI.

Move the NVLink IRQ setup code into the common path and get rid of all the
OpenCAPI IRQ setup apart from the enable mask.

We allocate a few more interrupts per NPU as OpenCAPI is a bit more greedy
than NVLink (35 IRQs vs 23 previously).

Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>

---
v1->v2:
- updated commit message (Fred)

Might need some testing
---
 hw/npu2-common.c   | 112 ++++++++++++++++++++++++++++++++++++++++++++++-
 hw/npu2-opencapi.c |  50 +--------------------
 hw/npu2.c          | 100 +-----------------------------------------
 include/npu2.h     |   1 +-
 4 files changed, 116 insertions(+), 147 deletions(-)

Comments

Alexey Kardashevskiy Jan. 21, 2019, 4:39 a.m. UTC | #1
On 11/01/2019 12:09, Andrew Donnellan wrote:
> From: Frederic Barrat <fbarrat@linux.ibm.com>
> 
> The NPU IRQ setup code is currently duplicated between NVLink and
> OpenCAPI.
> 
> Apart from the NPU2_MISC_IRQ_ENABLE2 register, the setup of IRQs is
> basically identical between NVLink and OpenCAPI.
> 
> Move the NVLink IRQ setup code into the common path and get rid of all the
> OpenCAPI IRQ setup apart from the enable mask.
> 
> We allocate a few more interrupts per NPU as OpenCAPI is a bit more greedy
> than NVLink (35 IRQs vs 23 previously).
> 
> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> 
> ---
> v1->v2:
> - updated commit message (Fred)>
> Might need some testing
> ---
>  hw/npu2-common.c   | 112 ++++++++++++++++++++++++++++++++++++++++++++++-
>  hw/npu2-opencapi.c |  50 +--------------------
>  hw/npu2.c          | 100 +-----------------------------------------
>  include/npu2.h     |   1 +-
>  4 files changed, 116 insertions(+), 147 deletions(-)
> 
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index 90c7f0ac4d5d..282e4873125a 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -24,6 +24,12 @@
>  #include <nvram.h>
>  #include <i2c.h>
>  #include <phys-map.h>
> +#include <interrupts.h>
> +#include <xive.h>
> +
> +#define NPU2_IRQ_BASE_SHIFT 13
> +#define NPU2_N_DL_IRQS 35
> +#define NPU2_N_DL_IRQS_ALIGN 64
>  
>  /*
>   * We use the indirect method because it uses the same addresses as
> @@ -246,6 +252,111 @@ static void assign_bars(struct npu2 *npu)
>  	npu->mm_size = last_bar.base + last_bar.size - npu->mm_base;
>  }
>  
> +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
> +{
> +	struct npu2 *p = is->data;
> +	uint32_t idx = isn - p->base_lsi;
> +
> +	if (idx == 18)
> +		/* TCE Interrupt - used to detect a frozen PE */
> +		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
> +	else
> +		return IRQ_ATTR_TARGET_LINUX;
> +}
> +
> +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
> +{
> +	struct npu2 *p = is->data;
> +	uint32_t idx = isn - p->base_lsi;
> +	const char *name;
> +
> +	switch (idx) {
> +	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
> +	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
> +	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
> +	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
> +	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
> +	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
> +	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
> +	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
> +	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
> +	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
> +	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
> +	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
> +	case 12: name = "NTL 0 Event"; break;
> +	case 13: name = "NTL 1 Event"; break;
> +	case 14: name = "NTL 2 Event"; break;
> +	case 15: name = "NTL 3 Event"; break;
> +	case 16: name = "NTL 4 Event"; break;
> +	case 17: name = "NTL 5 Event"; break;
> +	case 18: name = "TCE Event"; break;
> +	case 19: name = "ATS Event"; break;
> +	case 20: name = "CQ Event"; break;
> +	case 21: name = "MISC Event"; break;
> +	case 22: name = "NMMU Local Xstop"; break;
> +	case 23: name = "Translate Fail (brick 2)"; break;
> +	case 24: name = "Translate Fail (brick 3)"; break;
> +	case 25: name = "Translate Fail (brick 4)"; break;
> +	case 26: name = "Translate Fail (brick 5)"; break;
> +	case 27: name = "OTL Event (brick 2)"; break;
> +	case 28: name = "OTL Event (brick 3)"; break;
> +	case 29: name = "OTL Event (brick 4)"; break;
> +	case 30: name = "OTL Event (brick 5)"; break;
> +	case 31: name = "XSL Event (brick 2)"; break;
> +	case 32: name = "XSL Event (brick 3)"; break;
> +	case 33: name = "XSL Event (brick 4)"; break;
> +	case 34: name = "XSL Event (brick 5)"; break;


These are new. Commit log?


> +	default: name = "Unknown";
> +	}
> +	return strdup(name);
> +}
> +
> +static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
> +{
> +	struct npu2 *p = is->data;
> +	uint32_t idx = isn - p->base_lsi;
> +
> +	if (idx != 18) {
> +		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
> +		return;
> +	}
> +
> +	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
> +				OPAL_EVENT_PCI_ERROR);
> +}
> +
> +static const struct irq_source_ops npu2_ipi_ops = {
> +	.interrupt	= npu2_err_interrupt,
> +	.attributes	= npu2_ipi_attributes,
> +	.name = npu2_ipi_name,
> +};
> +
> +static void setup_irqs(struct npu2 *p)
> +{
> +	uint64_t reg, val;
> +	void *tp;
> +
> +	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
> +	if (p->base_lsi == XIVE_IRQ_ERROR) {
> +		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources\n");


It looks like setup_irqs() is a copy of removed npu2_setup_irqs() except
the error message. Are there new 12 interrupts from npu2_ipi_name()
those previosly referred as "IRQs for NDL No-stall events"? If so, can
this be a separate patch?




> +		return;
> +	}
> +	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops);
> +
> +	/* Set IPI configuration */
> +	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
> +	val = npu2_read(p, reg);
> +	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
> +	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
> +	npu2_write(p, reg, val);
> +
> +	/* Set IRQ base */
> +	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
> +	tp = xive_get_trigger_port(p->base_lsi);
> +	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
> +	npu2_write(p, reg, val);
> +}
> +
>  static bool _i2c_presence_detect(struct npu2_dev *dev)
>  {
>  	uint8_t state, data;
> @@ -423,6 +534,7 @@ static void setup_devices(struct npu2 *npu)
>  	}
>  
>  	assign_bars(npu);
> +	setup_irqs(npu);
>  
>  	if (nvlink_detected)
>  		npu2_nvlink_init_npu(npu);
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 2a319979eccc..1b1a06ff81ad 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -48,11 +48,9 @@
>  #include <npu2.h>
>  #include <npu2-regs.h>
>  #include <phys-map.h>
> -#include <xive.h>
>  #include <i2c.h>
>  #include <nvram.h>
>  
> -#define NPU_IRQ_LEVELS		35
>  #define NPU_IRQ_LEVELS_XSL	23
>  #define MAX_PE_HANDLE		((1 << 15) - 1)
>  #define TL_MAX_TEMPLATE		63
> @@ -1390,7 +1388,7 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
>  	 * Pass the hw irq number for the translation fault irq
>  	 * irq levels 23 -> 26 are for translation faults, 1 per brick
>  	 */
> -	irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
> +	irq = dev->npu->base_lsi + NPU_IRQ_LEVELS_XSL;
>  	if (stacku == NPU2_STACK_STCK_2U)
>  		irq += 2;
>  	if (block == NPU2_BLOCK_OTL1)
> @@ -1463,43 +1461,9 @@ static void mask_nvlink_fir(struct npu2 *p)
>  			NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
>  }
>  
> -static int setup_irq(struct npu2 *p)
> +static int enable_xsl_irq(struct npu2 *p)
>  {
> -	uint64_t reg, mmio_addr;
> -	uint32_t base;
> -
> -	base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64);
> -	if (base == XIVE_IRQ_ERROR) {
> -		/**
> -		 * @fwts-label OCAPIIRQAllocationFailed
> -		 * @fwts-advice OpenCAPI IRQ setup failed. This is probably
> -		 * a firmware bug. OpenCAPI functionality will be broken.
> -		 */
> -		prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n");
> -		return -1;
> -	}
> -	p->irq_base = base;
> -
> -	xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL);


Instead of (...,NULL, NULL) now it is (.., p, &npu2_ipi_ops); @p is not
so interesting but the new ops for opencapi is suspicious. Especially
with that magic value of 18 in that ops handlers which is better be defined.

Can opencapi trigger "18 - TCE Event - An error event related to TCE
translation"? I.e. does it use TCEs at all?


> -	mmio_addr = (uint64_t ) xive_get_trigger_port(base);
> -	prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr);
> -	reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13;
> -	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE,
> -			NPU2_MISC_DA_LEN_8B, reg);


I do not know what it does really but can it just go with no comments in
the commit log?

> -	/*
> -	 * setup page size = 64k
> -	 *
> -	 * OS type is set to AIX: opal also runs with 2 pages per interrupt,
> -	 * so to cover the max offset for 35 levels of interrupt, we need
> -	 * bits 41 to 46, which is what the AIX setting does. There's no
> -	 * other meaning for that AIX setting.
> -	 */
> -	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
> -			NPU2_MISC_DA_LEN_8B);
> -	reg |= NPU2_MISC_CFG_IPI_PS;
> -	reg &= ~NPU2_MISC_CFG_IPI_OS;


The old code would reset NPU2_MISC_CFG_IPI_OS and the new setup_irqs()
sets it:

val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);

Sure this is correct?



> -	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
> -			NPU2_MISC_DA_LEN_8B, reg);
> +	uint64_t reg;
>  
>  	/* enable translation interrupts for all bricks */
>  	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
> @@ -1625,7 +1589,6 @@ static void read_nvram_training_state(void)
>  int npu2_opencapi_init_npu(struct npu2 *npu)
>  {
>  	struct npu2_dev *dev;
> -	int rc;
>  
>  	assert(platform.ocapi);
>  	read_nvram_training_state();
> @@ -1651,10 +1614,7 @@ int npu2_opencapi_init_npu(struct npu2 *npu)
>  		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>  	}
>  
> -	/* Procedure 13.1.3.10 - Interrupt Configuration */
> -	rc = setup_irq(npu);
> -	if (rc)
> -		goto failed;
> +	enable_xsl_irq(npu);
>  
>  	for (int i = 0; i < npu->total_devices; i++) {
>  		dev = &npu->devices[i];
> @@ -1664,8 +1624,6 @@ int npu2_opencapi_init_npu(struct npu2 *npu)
>  	}
>  
>  	return 0;
> -failed:
> -	return -1;
>  }
>  
>  static const struct phb_ops npu2_opencapi_ops = {
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 78233579d3b7..6c720764e614 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -20,7 +20,6 @@
>  #include <pci.h>
>  #include <pci-slot.h>
>  #include <pci-virt.h>
> -#include <interrupts.h>
>  #include <opal.h>
>  #include <opal-api.h>
>  #include <cpu.h>
> @@ -36,14 +35,9 @@
>  #include <chip.h>
>  #include <phys-map.h>
>  #include <nvram.h>
> -#include <xive.h>
>  #include <xscom-p9-regs.h>
>  #include <phb4.h>
>  
> -#define NPU2_IRQ_BASE_SHIFT 13
> -#define NPU2_N_DL_IRQS 23
> -#define NPU2_N_DL_IRQS_ALIGN 64
> -
>  #define VENDOR_CAP_START    0x80
>  #define VENDOR_CAP_END      0x90
>  #define VENDOR_CAP_LEN      0x10
> @@ -1737,99 +1731,6 @@ static void npu2_add_phb_properties(struct npu2 *p)
>  			      hi32(mm_size), lo32(mm_size));
>  }
>  
> -static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
> -{
> -	struct npu2 *p = is->data;
> -	uint32_t idx = isn - p->base_lsi;
> -
> -	if (idx == 18)
> -		/* TCE Interrupt - used to detect a frozen PE */
> -		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
> -	else
> -		return IRQ_ATTR_TARGET_LINUX;
> -}
> -
> -static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
> -{
> -	struct npu2 *p = is->data;
> -	uint32_t idx = isn - p->base_lsi;
> -	const char *name;
> -
> -	switch (idx) {
> -	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
> -	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
> -	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
> -	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
> -	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
> -	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
> -	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
> -	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
> -	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
> -	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
> -	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
> -	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
> -	case 12: name = "NTL 0 Event"; break;
> -	case 13: name = "NTL 1 Event"; break;
> -	case 14: name = "NTL 2 Event"; break;
> -	case 15: name = "NTL 3 Event"; break;
> -	case 16: name = "NTL 4 Event"; break;
> -	case 17: name = "NTL 5 Event"; break;
> -	case 18: name = "TCE Event"; break;
> -	case 19: name = "ATS Event"; break;
> -	case 20: name = "CQ Event"; break;
> -	case 21: name = "MISC Event"; break;
> -	case 22: name = "NMMU Local Xstop"; break;
> -	default: name = "Unknown";
> -	}
> -	return strdup(name);
> -}
> -
> -static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
> -{
> -	struct npu2 *p = is->data;
> -	uint32_t idx = isn - p->base_lsi;
> -
> -	if (idx != 18) {
> -		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
> -		return;
> -	}
> -
> -	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
> -				OPAL_EVENT_PCI_ERROR);
> -}
> -
> -static const struct irq_source_ops npu2_ipi_ops = {
> -	.interrupt	= npu2_err_interrupt,
> -	.attributes	= npu2_ipi_attributes,
> -	.name = npu2_ipi_name,
> -};
> -
> -static void npu2_setup_irqs(struct npu2 *p)
> -{
> -	uint64_t reg, val;
> -	void *tp;
> -
> -	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
> -	if (p->base_lsi == XIVE_IRQ_ERROR) {
> -		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n");
> -		return;
> -	}
> -	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops );
> -
> -	/* Set IPI configuration */
> -	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
> -	val = npu2_read(p, reg);
> -	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
> -	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
> -	npu2_write(p, reg, val);
> -
> -	/* Set IRQ base */
> -	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
> -	tp = xive_get_trigger_port(p->base_lsi);
> -	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
> -	npu2_write(p, reg, val);
> -}
> -
>  void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>  {
>  	struct pci_slot *slot;
> @@ -1843,7 +1744,6 @@ void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>  	list_head_init(&npu->phb_nvlink.devices);
>  	list_head_init(&npu->phb_nvlink.virt_devices);
>  
> -	npu2_setup_irqs(npu);
>  	npu2_configure_devices(npu);
>  	npu2_add_interrupt_map(npu);
>  	npu2_add_phb_properties(npu);
> diff --git a/include/npu2.h b/include/npu2.h
> index 8d739d8d5659..75c11ff3c26f 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -151,7 +151,6 @@ struct npu2 {
>  	uint64_t	mm_base;
>  	uint64_t	mm_size;
>  	uint32_t	base_lsi;
> -	uint32_t	irq_base;

Why? NPU used only @base_lsi and CAPI used only @irq_base?

Quite hard patch to read - it does too many things and does not comment
on half of them :-/



>  	uint32_t	total_devices;
>  	struct npu2_dev	*devices;
>  	enum phys_map_type gpu_map_type;
>
Frederic Barrat Jan. 21, 2019, 8:39 p.m. UTC | #2
Le 20/01/2019 à 22:39, Alexey Kardashevskiy a écrit :
> 
> 
> On 11/01/2019 12:09, Andrew Donnellan wrote:
>> From: Frederic Barrat <fbarrat@linux.ibm.com>
>>
>> The NPU IRQ setup code is currently duplicated between NVLink and
>> OpenCAPI.
>>
>> Apart from the NPU2_MISC_IRQ_ENABLE2 register, the setup of IRQs is
>> basically identical between NVLink and OpenCAPI.
>>
>> Move the NVLink IRQ setup code into the common path and get rid of all the
>> OpenCAPI IRQ setup apart from the enable mask.
>>
>> We allocate a few more interrupts per NPU as OpenCAPI is a bit more greedy
>> than NVLink (35 IRQs vs 23 previously).
>>
>> Signed-off-by: Frederic Barrat <fbarrat@linux.ibm.com>
>> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>>
>> ---
>> v1->v2:
>> - updated commit message (Fred)>
>> Might need some testing
>> ---
>>   hw/npu2-common.c   | 112 ++++++++++++++++++++++++++++++++++++++++++++++-
>>   hw/npu2-opencapi.c |  50 +--------------------
>>   hw/npu2.c          | 100 +-----------------------------------------
>>   include/npu2.h     |   1 +-
>>   4 files changed, 116 insertions(+), 147 deletions(-)
>>
>> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
>> index 90c7f0ac4d5d..282e4873125a 100644
>> --- a/hw/npu2-common.c
>> +++ b/hw/npu2-common.c
>> @@ -24,6 +24,12 @@
>>   #include <nvram.h>
>>   #include <i2c.h>
>>   #include <phys-map.h>
>> +#include <interrupts.h>
>> +#include <xive.h>
>> +
>> +#define NPU2_IRQ_BASE_SHIFT 13
>> +#define NPU2_N_DL_IRQS 35
>> +#define NPU2_N_DL_IRQS_ALIGN 64
>>   
>>   /*
>>    * We use the indirect method because it uses the same addresses as
>> @@ -246,6 +252,111 @@ static void assign_bars(struct npu2 *npu)
>>   	npu->mm_size = last_bar.base + last_bar.size - npu->mm_base;
>>   }
>>   
>> +static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
>> +{
>> +	struct npu2 *p = is->data;
>> +	uint32_t idx = isn - p->base_lsi;
>> +
>> +	if (idx == 18)
>> +		/* TCE Interrupt - used to detect a frozen PE */
>> +		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
>> +	else
>> +		return IRQ_ATTR_TARGET_LINUX;
>> +}
>> +
>> +static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
>> +{
>> +	struct npu2 *p = is->data;
>> +	uint32_t idx = isn - p->base_lsi;
>> +	const char *name;
>> +
>> +	switch (idx) {
>> +	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
>> +	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
>> +	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
>> +	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
>> +	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
>> +	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
>> +	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
>> +	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
>> +	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
>> +	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
>> +	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
>> +	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
>> +	case 12: name = "NTL 0 Event"; break;
>> +	case 13: name = "NTL 1 Event"; break;
>> +	case 14: name = "NTL 2 Event"; break;
>> +	case 15: name = "NTL 3 Event"; break;
>> +	case 16: name = "NTL 4 Event"; break;
>> +	case 17: name = "NTL 5 Event"; break;
>> +	case 18: name = "TCE Event"; break;
>> +	case 19: name = "ATS Event"; break;
>> +	case 20: name = "CQ Event"; break;
>> +	case 21: name = "MISC Event"; break;
>> +	case 22: name = "NMMU Local Xstop"; break;
>> +	case 23: name = "Translate Fail (brick 2)"; break;
>> +	case 24: name = "Translate Fail (brick 3)"; break;
>> +	case 25: name = "Translate Fail (brick 4)"; break;
>> +	case 26: name = "Translate Fail (brick 5)"; break;
>> +	case 27: name = "OTL Event (brick 2)"; break;
>> +	case 28: name = "OTL Event (brick 3)"; break;
>> +	case 29: name = "OTL Event (brick 4)"; break;
>> +	case 30: name = "OTL Event (brick 5)"; break;
>> +	case 31: name = "XSL Event (brick 2)"; break;
>> +	case 32: name = "XSL Event (brick 3)"; break;
>> +	case 33: name = "XSL Event (brick 4)"; break;
>> +	case 34: name = "XSL Event (brick 5)"; break;
> 
> 
> These are new. Commit log?


Yes, we allocate 12 more interrupts per NPU and the 12 new ones can only 
be used by opencapi. A note was added to the commit message with that 
v2: "We allocate a few more interrupts per NPU as OpenCAPI is a bit more 
greedy than NVLink (35 IRQs vs 23 previously)."

It doesn't mean all those interrupts can be triggered. There are some 
config "interrupt on errors" registers on the NPU to activate most of 
them. For opencapi, so far only the "translate fail" ones are used (but 
the others will be soon).


>> +	default: name = "Unknown";
>> +	}
>> +	return strdup(name);
>> +}
>> +
>> +static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
>> +{
>> +	struct npu2 *p = is->data;
>> +	uint32_t idx = isn - p->base_lsi;
>> +
>> +	if (idx != 18) {
>> +		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
>> +		return;
>> +	}
>> +
>> +	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
>> +				OPAL_EVENT_PCI_ERROR);
>> +}
>> +
>> +static const struct irq_source_ops npu2_ipi_ops = {
>> +	.interrupt	= npu2_err_interrupt,
>> +	.attributes	= npu2_ipi_attributes,
>> +	.name = npu2_ipi_name,
>> +};
>> +
>> +static void setup_irqs(struct npu2 *p)
>> +{
>> +	uint64_t reg, val;
>> +	void *tp;
>> +
>> +	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
>> +	if (p->base_lsi == XIVE_IRQ_ERROR) {
>> +		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources\n");
> 
> 
> It looks like setup_irqs() is a copy of removed npu2_setup_irqs() except
> the error message. 

Yes, I kept the nvlink version instead of the opencapi version, thinking 
it would make the review easier :-)

 >
Are there new 12 interrupts from npu2_ipi_name()
> those previosly referred as "IRQs for NDL No-stall events"? If so, can
> this be a separate patch?

Those new interrupts only make sense for opencapi, they cannot be 
generated by a NPU only configured in nvlink mode.



>> +		return;
>> +	}
>> +	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops);
>> +
>> +	/* Set IPI configuration */
>> +	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
>> +	val = npu2_read(p, reg);
>> +	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
>> +	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
>> +	npu2_write(p, reg, val);
>> +
>> +	/* Set IRQ base */
>> +	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
>> +	tp = xive_get_trigger_port(p->base_lsi);
>> +	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
>> +	npu2_write(p, reg, val);
>> +}
>> +
>>   static bool _i2c_presence_detect(struct npu2_dev *dev)
>>   {
>>   	uint8_t state, data;
>> @@ -423,6 +534,7 @@ static void setup_devices(struct npu2 *npu)
>>   	}
>>   
>>   	assign_bars(npu);
>> +	setup_irqs(npu);
>>   
>>   	if (nvlink_detected)
>>   		npu2_nvlink_init_npu(npu);
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 2a319979eccc..1b1a06ff81ad 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -48,11 +48,9 @@
>>   #include <npu2.h>
>>   #include <npu2-regs.h>
>>   #include <phys-map.h>
>> -#include <xive.h>
>>   #include <i2c.h>
>>   #include <nvram.h>
>>   
>> -#define NPU_IRQ_LEVELS		35
>>   #define NPU_IRQ_LEVELS_XSL	23
>>   #define MAX_PE_HANDLE		((1 << 15) - 1)
>>   #define TL_MAX_TEMPLATE		63
>> @@ -1390,7 +1388,7 @@ static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
>>   	 * Pass the hw irq number for the translation fault irq
>>   	 * irq levels 23 -> 26 are for translation faults, 1 per brick
>>   	 */
>> -	irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
>> +	irq = dev->npu->base_lsi + NPU_IRQ_LEVELS_XSL;
>>   	if (stacku == NPU2_STACK_STCK_2U)
>>   		irq += 2;
>>   	if (block == NPU2_BLOCK_OTL1)
>> @@ -1463,43 +1461,9 @@ static void mask_nvlink_fir(struct npu2 *p)
>>   			NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
>>   }
>>   
>> -static int setup_irq(struct npu2 *p)
>> +static int enable_xsl_irq(struct npu2 *p)
>>   {
>> -	uint64_t reg, mmio_addr;
>> -	uint32_t base;
>> -
>> -	base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64);
>> -	if (base == XIVE_IRQ_ERROR) {
>> -		/**
>> -		 * @fwts-label OCAPIIRQAllocationFailed
>> -		 * @fwts-advice OpenCAPI IRQ setup failed. This is probably
>> -		 * a firmware bug. OpenCAPI functionality will be broken.
>> -		 */
>> -		prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n");
>> -		return -1;
>> -	}
>> -	p->irq_base = base;
>> -
>> -	xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL);
> 
> 
> Instead of (...,NULL, NULL) now it is (.., p, &npu2_ipi_ops); @p is not
> so interesting but the new ops for opencapi is suspicious. Especially
> with that magic value of 18 in that ops handlers which is better be defined.
> Can opencapi trigger "18 - TCE Event - An error event related to TCE
> translation"? I.e. does it use TCEs at all?

Interrupt 18 only makes sense for nvlink, like many of the interrupts. 
All the stall/no stall interrupts are also masked for opencapi in a 
later patch.
At this point, there are only 4 interrupts which can be triggered by 
opencapi (23->26), to raise a page fault when a FPGA tries to access an 
effective address that the nMMU couldn't translate. Those interrupts go 
directly to linux. We have plans to configure the NPU to trigger some 
more interrupts specific to opencapi pretty soon (interrupt 27 and 
higher). So moving the opencapi code to starting using an source ops 
makes sense: it keeps the code common, and is going to be required 
pretty soon anyway.


> 
> 
>> -	mmio_addr = (uint64_t ) xive_get_trigger_port(base);
>> -	prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr);
>> -	reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13;
>> -	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE,
>> -			NPU2_MISC_DA_LEN_8B, reg);
> 
> 
> I do not know what it does really but can it just go with no comments in
> the commit log?

There's the same code in npu2.c and is kept in the common version:

	/* Set IRQ base */
	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, 
NPU2_MISC_IRQ_BASE);
	tp = xive_get_trigger_port(p->base_lsi);
	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
	npu2_write(p, reg, val);


> 
>> -	/*
>> -	 * setup page size = 64k
>> -	 *
>> -	 * OS type is set to AIX: opal also runs with 2 pages per interrupt,
>> -	 * so to cover the max offset for 35 levels of interrupt, we need
>> -	 * bits 41 to 46, which is what the AIX setting does. There's no
>> -	 * other meaning for that AIX setting.
>> -	 */
>> -	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
>> -			NPU2_MISC_DA_LEN_8B);
>> -	reg |= NPU2_MISC_CFG_IPI_PS;
>> -	reg &= ~NPU2_MISC_CFG_IPI_OS;
> 
> 
> The old code would reset NPU2_MISC_CFG_IPI_OS and the new setup_irqs()
> sets it:
> 
> val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
> 
> Sure this is correct?

We want to configure the NPU to use 64k page for interrupts and to be in 
"AIX mode". That's what the old opencapi code was doing and the 
nvlink/common code does the same:

#define   NPU2_MISC_CFG_IPI_OS_AIX		0

So yes, that looks correct.


> 
> 
>> -	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
>> -			NPU2_MISC_DA_LEN_8B, reg);
>> +	uint64_t reg;
>>   
>>   	/* enable translation interrupts for all bricks */
>>   	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
>> @@ -1625,7 +1589,6 @@ static void read_nvram_training_state(void)
>>   int npu2_opencapi_init_npu(struct npu2 *npu)
>>   {
>>   	struct npu2_dev *dev;
>> -	int rc;
>>   
>>   	assert(platform.ocapi);
>>   	read_nvram_training_state();
>> @@ -1651,10 +1614,7 @@ int npu2_opencapi_init_npu(struct npu2 *npu)
>>   		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
>>   	}
>>   
>> -	/* Procedure 13.1.3.10 - Interrupt Configuration */
>> -	rc = setup_irq(npu);
>> -	if (rc)
>> -		goto failed;
>> +	enable_xsl_irq(npu);
>>   
>>   	for (int i = 0; i < npu->total_devices; i++) {
>>   		dev = &npu->devices[i];
>> @@ -1664,8 +1624,6 @@ int npu2_opencapi_init_npu(struct npu2 *npu)
>>   	}
>>   
>>   	return 0;
>> -failed:
>> -	return -1;
>>   }
>>   
>>   static const struct phb_ops npu2_opencapi_ops = {
>> diff --git a/hw/npu2.c b/hw/npu2.c
>> index 78233579d3b7..6c720764e614 100644
>> --- a/hw/npu2.c
>> +++ b/hw/npu2.c
>> @@ -20,7 +20,6 @@
>>   #include <pci.h>
>>   #include <pci-slot.h>
>>   #include <pci-virt.h>
>> -#include <interrupts.h>
>>   #include <opal.h>
>>   #include <opal-api.h>
>>   #include <cpu.h>
>> @@ -36,14 +35,9 @@
>>   #include <chip.h>
>>   #include <phys-map.h>
>>   #include <nvram.h>
>> -#include <xive.h>
>>   #include <xscom-p9-regs.h>
>>   #include <phb4.h>
>>   
>> -#define NPU2_IRQ_BASE_SHIFT 13
>> -#define NPU2_N_DL_IRQS 23
>> -#define NPU2_N_DL_IRQS_ALIGN 64
>> -
>>   #define VENDOR_CAP_START    0x80
>>   #define VENDOR_CAP_END      0x90
>>   #define VENDOR_CAP_LEN      0x10
>> @@ -1737,99 +1731,6 @@ static void npu2_add_phb_properties(struct npu2 *p)
>>   			      hi32(mm_size), lo32(mm_size));
>>   }
>>   
>> -static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
>> -{
>> -	struct npu2 *p = is->data;
>> -	uint32_t idx = isn - p->base_lsi;
>> -
>> -	if (idx == 18)
>> -		/* TCE Interrupt - used to detect a frozen PE */
>> -		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
>> -	else
>> -		return IRQ_ATTR_TARGET_LINUX;
>> -}
>> -
>> -static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
>> -{
>> -	struct npu2 *p = is->data;
>> -	uint32_t idx = isn - p->base_lsi;
>> -	const char *name;
>> -
>> -	switch (idx) {
>> -	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
>> -	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
>> -	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
>> -	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
>> -	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
>> -	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
>> -	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
>> -	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
>> -	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
>> -	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
>> -	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
>> -	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
>> -	case 12: name = "NTL 0 Event"; break;
>> -	case 13: name = "NTL 1 Event"; break;
>> -	case 14: name = "NTL 2 Event"; break;
>> -	case 15: name = "NTL 3 Event"; break;
>> -	case 16: name = "NTL 4 Event"; break;
>> -	case 17: name = "NTL 5 Event"; break;
>> -	case 18: name = "TCE Event"; break;
>> -	case 19: name = "ATS Event"; break;
>> -	case 20: name = "CQ Event"; break;
>> -	case 21: name = "MISC Event"; break;
>> -	case 22: name = "NMMU Local Xstop"; break;
>> -	default: name = "Unknown";
>> -	}
>> -	return strdup(name);
>> -}
>> -
>> -static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
>> -{
>> -	struct npu2 *p = is->data;
>> -	uint32_t idx = isn - p->base_lsi;
>> -
>> -	if (idx != 18) {
>> -		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
>> -		return;
>> -	}
>> -
>> -	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
>> -				OPAL_EVENT_PCI_ERROR);
>> -}
>> -
>> -static const struct irq_source_ops npu2_ipi_ops = {
>> -	.interrupt	= npu2_err_interrupt,
>> -	.attributes	= npu2_ipi_attributes,
>> -	.name = npu2_ipi_name,
>> -};
>> -
>> -static void npu2_setup_irqs(struct npu2 *p)
>> -{
>> -	uint64_t reg, val;
>> -	void *tp;
>> -
>> -	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
>> -	if (p->base_lsi == XIVE_IRQ_ERROR) {
>> -		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n");
>> -		return;
>> -	}
>> -	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops );
>> -
>> -	/* Set IPI configuration */
>> -	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
>> -	val = npu2_read(p, reg);
>> -	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
>> -	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
>> -	npu2_write(p, reg, val);
>> -
>> -	/* Set IRQ base */
>> -	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
>> -	tp = xive_get_trigger_port(p->base_lsi);
>> -	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
>> -	npu2_write(p, reg, val);
>> -}
>> -
>>   void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>>   {
>>   	struct pci_slot *slot;
>> @@ -1843,7 +1744,6 @@ void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
>>   	list_head_init(&npu->phb_nvlink.devices);
>>   	list_head_init(&npu->phb_nvlink.virt_devices);
>>   
>> -	npu2_setup_irqs(npu);
>>   	npu2_configure_devices(npu);
>>   	npu2_add_interrupt_map(npu);
>>   	npu2_add_phb_properties(npu);
>> diff --git a/include/npu2.h b/include/npu2.h
>> index 8d739d8d5659..75c11ff3c26f 100644
>> --- a/include/npu2.h
>> +++ b/include/npu2.h
>> @@ -151,7 +151,6 @@ struct npu2 {
>>   	uint64_t	mm_base;
>>   	uint64_t	mm_size;
>>   	uint32_t	base_lsi;
>> -	uint32_t	irq_base;
> 
> Why? NPU used only @base_lsi and CAPI used only @irq_base?

These are perfectly redundant and a clear sign that things have been 
implemented independently on nvlink and opencapi. They are designating 
the same thing and one was used for an NPU configured for nnvlink, and 
the other for a NPU in opencapi mode, which doesn't make sense any more, 
since a NPU can now be supporting both at the same time.

> Quite hard patch to read - it does too many things and does not comment
> on half of them :-/

I'm surprised and not sure how different it can be done. I basically 
kept the nvlink version of the code, thinking it would make the review 
easier, since it's the "historical" version, and the 2 versions were 
almost identical.

   Fred
diff mbox series

Patch

diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index 90c7f0ac4d5d..282e4873125a 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -24,6 +24,12 @@ 
 #include <nvram.h>
 #include <i2c.h>
 #include <phys-map.h>
+#include <interrupts.h>
+#include <xive.h>
+
+#define NPU2_IRQ_BASE_SHIFT 13
+#define NPU2_N_DL_IRQS 35
+#define NPU2_N_DL_IRQS_ALIGN 64
 
 /*
  * We use the indirect method because it uses the same addresses as
@@ -246,6 +252,111 @@  static void assign_bars(struct npu2 *npu)
 	npu->mm_size = last_bar.base + last_bar.size - npu->mm_base;
 }
 
+static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
+{
+	struct npu2 *p = is->data;
+	uint32_t idx = isn - p->base_lsi;
+
+	if (idx == 18)
+		/* TCE Interrupt - used to detect a frozen PE */
+		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
+	else
+		return IRQ_ATTR_TARGET_LINUX;
+}
+
+static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
+{
+	struct npu2 *p = is->data;
+	uint32_t idx = isn - p->base_lsi;
+	const char *name;
+
+	switch (idx) {
+	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
+	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
+	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
+	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
+	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
+	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
+	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
+	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
+	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
+	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
+	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
+	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
+	case 12: name = "NTL 0 Event"; break;
+	case 13: name = "NTL 1 Event"; break;
+	case 14: name = "NTL 2 Event"; break;
+	case 15: name = "NTL 3 Event"; break;
+	case 16: name = "NTL 4 Event"; break;
+	case 17: name = "NTL 5 Event"; break;
+	case 18: name = "TCE Event"; break;
+	case 19: name = "ATS Event"; break;
+	case 20: name = "CQ Event"; break;
+	case 21: name = "MISC Event"; break;
+	case 22: name = "NMMU Local Xstop"; break;
+	case 23: name = "Translate Fail (brick 2)"; break;
+	case 24: name = "Translate Fail (brick 3)"; break;
+	case 25: name = "Translate Fail (brick 4)"; break;
+	case 26: name = "Translate Fail (brick 5)"; break;
+	case 27: name = "OTL Event (brick 2)"; break;
+	case 28: name = "OTL Event (brick 3)"; break;
+	case 29: name = "OTL Event (brick 4)"; break;
+	case 30: name = "OTL Event (brick 5)"; break;
+	case 31: name = "XSL Event (brick 2)"; break;
+	case 32: name = "XSL Event (brick 3)"; break;
+	case 33: name = "XSL Event (brick 4)"; break;
+	case 34: name = "XSL Event (brick 5)"; break;
+	default: name = "Unknown";
+	}
+	return strdup(name);
+}
+
+static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
+{
+	struct npu2 *p = is->data;
+	uint32_t idx = isn - p->base_lsi;
+
+	if (idx != 18) {
+		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
+		return;
+	}
+
+	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
+				OPAL_EVENT_PCI_ERROR);
+}
+
+static const struct irq_source_ops npu2_ipi_ops = {
+	.interrupt	= npu2_err_interrupt,
+	.attributes	= npu2_ipi_attributes,
+	.name = npu2_ipi_name,
+};
+
+static void setup_irqs(struct npu2 *p)
+{
+	uint64_t reg, val;
+	void *tp;
+
+	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
+	if (p->base_lsi == XIVE_IRQ_ERROR) {
+		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources\n");
+		return;
+	}
+	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops);
+
+	/* Set IPI configuration */
+	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
+	val = npu2_read(p, reg);
+	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
+	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
+	npu2_write(p, reg, val);
+
+	/* Set IRQ base */
+	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
+	tp = xive_get_trigger_port(p->base_lsi);
+	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
+	npu2_write(p, reg, val);
+}
+
 static bool _i2c_presence_detect(struct npu2_dev *dev)
 {
 	uint8_t state, data;
@@ -423,6 +534,7 @@  static void setup_devices(struct npu2 *npu)
 	}
 
 	assign_bars(npu);
+	setup_irqs(npu);
 
 	if (nvlink_detected)
 		npu2_nvlink_init_npu(npu);
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 2a319979eccc..1b1a06ff81ad 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -48,11 +48,9 @@ 
 #include <npu2.h>
 #include <npu2-regs.h>
 #include <phys-map.h>
-#include <xive.h>
 #include <i2c.h>
 #include <nvram.h>
 
-#define NPU_IRQ_LEVELS		35
 #define NPU_IRQ_LEVELS_XSL	23
 #define MAX_PE_HANDLE		((1 << 15) - 1)
 #define TL_MAX_TEMPLATE		63
@@ -1390,7 +1388,7 @@  static int npu2_add_mmio_regs(struct phb *phb, struct pci_device *pd,
 	 * Pass the hw irq number for the translation fault irq
 	 * irq levels 23 -> 26 are for translation faults, 1 per brick
 	 */
-	irq = dev->npu->irq_base + NPU_IRQ_LEVELS_XSL;
+	irq = dev->npu->base_lsi + NPU_IRQ_LEVELS_XSL;
 	if (stacku == NPU2_STACK_STCK_2U)
 		irq += 2;
 	if (block == NPU2_BLOCK_OTL1)
@@ -1463,43 +1461,9 @@  static void mask_nvlink_fir(struct npu2 *p)
 			NPU2_MISC_IRQ_ENABLE1, NPU2_MISC_DA_LEN_8B, reg);
 }
 
-static int setup_irq(struct npu2 *p)
+static int enable_xsl_irq(struct npu2 *p)
 {
-	uint64_t reg, mmio_addr;
-	uint32_t base;
-
-	base = xive_alloc_ipi_irqs(p->chip_id, NPU_IRQ_LEVELS, 64);
-	if (base == XIVE_IRQ_ERROR) {
-		/**
-		 * @fwts-label OCAPIIRQAllocationFailed
-		 * @fwts-advice OpenCAPI IRQ setup failed. This is probably
-		 * a firmware bug. OpenCAPI functionality will be broken.
-		 */
-		prlog(PR_ERR, "OCAPI: Couldn't allocate interrupts for NPU\n");
-		return -1;
-	}
-	p->irq_base = base;
-
-	xive_register_ipi_source(base, NPU_IRQ_LEVELS, NULL, NULL);
-	mmio_addr = (uint64_t ) xive_get_trigger_port(base);
-	prlog(PR_DEBUG, "OCAPI: NPU base irq %d @%llx\n", base, mmio_addr);
-	reg = (mmio_addr & NPU2_MISC_IRQ_BASE_MASK) << 13;
-	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_BASE,
-			NPU2_MISC_DA_LEN_8B, reg);
-	/*
-	 * setup page size = 64k
-	 *
-	 * OS type is set to AIX: opal also runs with 2 pages per interrupt,
-	 * so to cover the max offset for 35 levels of interrupt, we need
-	 * bits 41 to 46, which is what the AIX setting does. There's no
-	 * other meaning for that AIX setting.
-	 */
-	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
-			NPU2_MISC_DA_LEN_8B);
-	reg |= NPU2_MISC_CFG_IPI_PS;
-	reg &= ~NPU2_MISC_CFG_IPI_OS;
-	npu2_scom_write(p->chip_id, p->xscom_base, NPU2_MISC_CFG,
-			NPU2_MISC_DA_LEN_8B, reg);
+	uint64_t reg;
 
 	/* enable translation interrupts for all bricks */
 	reg = npu2_scom_read(p->chip_id, p->xscom_base, NPU2_MISC_IRQ_ENABLE2,
@@ -1625,7 +1589,6 @@  static void read_nvram_training_state(void)
 int npu2_opencapi_init_npu(struct npu2 *npu)
 {
 	struct npu2_dev *dev;
-	int rc;
 
 	assert(platform.ocapi);
 	read_nvram_training_state();
@@ -1651,10 +1614,7 @@  int npu2_opencapi_init_npu(struct npu2 *npu)
 		address_translation_config(npu->chip_id, npu->xscom_base, dev->brick_index);
 	}
 
-	/* Procedure 13.1.3.10 - Interrupt Configuration */
-	rc = setup_irq(npu);
-	if (rc)
-		goto failed;
+	enable_xsl_irq(npu);
 
 	for (int i = 0; i < npu->total_devices; i++) {
 		dev = &npu->devices[i];
@@ -1664,8 +1624,6 @@  int npu2_opencapi_init_npu(struct npu2 *npu)
 	}
 
 	return 0;
-failed:
-	return -1;
 }
 
 static const struct phb_ops npu2_opencapi_ops = {
diff --git a/hw/npu2.c b/hw/npu2.c
index 78233579d3b7..6c720764e614 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -20,7 +20,6 @@ 
 #include <pci.h>
 #include <pci-slot.h>
 #include <pci-virt.h>
-#include <interrupts.h>
 #include <opal.h>
 #include <opal-api.h>
 #include <cpu.h>
@@ -36,14 +35,9 @@ 
 #include <chip.h>
 #include <phys-map.h>
 #include <nvram.h>
-#include <xive.h>
 #include <xscom-p9-regs.h>
 #include <phb4.h>
 
-#define NPU2_IRQ_BASE_SHIFT 13
-#define NPU2_N_DL_IRQS 23
-#define NPU2_N_DL_IRQS_ALIGN 64
-
 #define VENDOR_CAP_START    0x80
 #define VENDOR_CAP_END      0x90
 #define VENDOR_CAP_LEN      0x10
@@ -1737,99 +1731,6 @@  static void npu2_add_phb_properties(struct npu2 *p)
 			      hi32(mm_size), lo32(mm_size));
 }
 
-static uint64_t npu2_ipi_attributes(struct irq_source *is __unused, uint32_t isn __unused)
-{
-	struct npu2 *p = is->data;
-	uint32_t idx = isn - p->base_lsi;
-
-	if (idx == 18)
-		/* TCE Interrupt - used to detect a frozen PE */
-		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_MSI;
-	else
-		return IRQ_ATTR_TARGET_LINUX;
-}
-
-static char *npu2_ipi_name(struct irq_source *is, uint32_t isn)
-{
-	struct npu2 *p = is->data;
-	uint32_t idx = isn - p->base_lsi;
-	const char *name;
-
-	switch (idx) {
-	case 0: name = "NDL 0 Stall Event (brick 0)"; break;
-	case 1: name = "NDL 0 No-Stall Event (brick 0)"; break;
-	case 2: name = "NDL 1 Stall Event (brick 1)"; break;
-	case 3: name = "NDL 1 No-Stall Event (brick 1)"; break;
-	case 4: name = "NDL 2 Stall Event (brick 2)"; break;
-	case 5: name = "NDL 2 No-Stall Event (brick 2)"; break;
-	case 6: name = "NDL 5 Stall Event (brick 3)"; break;
-	case 7: name = "NDL 5 No-Stall Event (brick 3)"; break;
-	case 8: name = "NDL 4 Stall Event (brick 4)"; break;
-	case 9: name = "NDL 4 No-Stall Event (brick 4)"; break;
-	case 10: name = "NDL 3 Stall Event (brick 5)"; break;
-	case 11: name = "NDL 3 No-Stall Event (brick 5)"; break;
-	case 12: name = "NTL 0 Event"; break;
-	case 13: name = "NTL 1 Event"; break;
-	case 14: name = "NTL 2 Event"; break;
-	case 15: name = "NTL 3 Event"; break;
-	case 16: name = "NTL 4 Event"; break;
-	case 17: name = "NTL 5 Event"; break;
-	case 18: name = "TCE Event"; break;
-	case 19: name = "ATS Event"; break;
-	case 20: name = "CQ Event"; break;
-	case 21: name = "MISC Event"; break;
-	case 22: name = "NMMU Local Xstop"; break;
-	default: name = "Unknown";
-	}
-	return strdup(name);
-}
-
-static void npu2_err_interrupt(struct irq_source *is, uint32_t isn)
-{
-	struct npu2 *p = is->data;
-	uint32_t idx = isn - p->base_lsi;
-
-	if (idx != 18) {
-		prerror("OPAL received unknown NPU2 interrupt %d\n", idx);
-		return;
-	}
-
-	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
-				OPAL_EVENT_PCI_ERROR);
-}
-
-static const struct irq_source_ops npu2_ipi_ops = {
-	.interrupt	= npu2_err_interrupt,
-	.attributes	= npu2_ipi_attributes,
-	.name = npu2_ipi_name,
-};
-
-static void npu2_setup_irqs(struct npu2 *p)
-{
-	uint64_t reg, val;
-	void *tp;
-
-	p->base_lsi = xive_alloc_ipi_irqs(p->chip_id, NPU2_N_DL_IRQS, NPU2_N_DL_IRQS_ALIGN);
-	if (p->base_lsi == XIVE_IRQ_ERROR) {
-		prlog(PR_ERR, "NPU: Failed to allocate interrupt sources, IRQs for NDL No-stall events will not be available.\n");
-		return;
-	}
-	xive_register_ipi_source(p->base_lsi, NPU2_N_DL_IRQS, p, &npu2_ipi_ops );
-
-	/* Set IPI configuration */
-	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_CFG);
-	val = npu2_read(p, reg);
-	val = SETFIELD(NPU2_MISC_CFG_IPI_PS, val, NPU2_MISC_CFG_IPI_PS_64K);
-	val = SETFIELD(NPU2_MISC_CFG_IPI_OS, val, NPU2_MISC_CFG_IPI_OS_AIX);
-	npu2_write(p, reg, val);
-
-	/* Set IRQ base */
-	reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC, NPU2_MISC_IRQ_BASE);
-	tp = xive_get_trigger_port(p->base_lsi);
-	val = ((uint64_t)tp) << NPU2_IRQ_BASE_SHIFT;
-	npu2_write(p, reg, val);
-}
-
 void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
 {
 	struct pci_slot *slot;
@@ -1843,7 +1744,6 @@  void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
 	list_head_init(&npu->phb_nvlink.devices);
 	list_head_init(&npu->phb_nvlink.virt_devices);
 
-	npu2_setup_irqs(npu);
 	npu2_configure_devices(npu);
 	npu2_add_interrupt_map(npu);
 	npu2_add_phb_properties(npu);
diff --git a/include/npu2.h b/include/npu2.h
index 8d739d8d5659..75c11ff3c26f 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -151,7 +151,6 @@  struct npu2 {
 	uint64_t	mm_base;
 	uint64_t	mm_size;
 	uint32_t	base_lsi;
-	uint32_t	irq_base;
 	uint32_t	total_devices;
 	struct npu2_dev	*devices;
 	enum phys_map_type gpu_map_type;