diff mbox series

[03/13] hw/npu2: Move PHY/NTL/GENID BAR assignment to common code

Message ID a55f84802bec6191e3f258832dd144011fbb0124.1544597914.git-series.andrew.donnellan@au1.ibm.com
State Superseded
Headers show
Series Support OpenCAPI and NVLink devices on same NPU on Witherspoon | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master

Commit Message

Andrew Donnellan Dec. 12, 2018, 6:58 a.m. UTC
Assignment of PHY/NTL/GENID BARs is currently duplicated between NVLink
and OpenCAPI. This is going to cause us particular issues later on when we
implement support for mixed-mode setups with NVLink and OpenCAPI on the
same NPU.

Centralise the assignment of PHY/NTL/GENID BARs in common code.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
---
 hw/npu2-common.c   |  76 ++++++++++++++++++++++++++++++-
 hw/npu2-opencapi.c |  80 ++++----------------------------
 hw/npu2.c          | 116 +++++-----------------------------------------
 include/npu2.h     |   4 +-
 4 files changed, 104 insertions(+), 172 deletions(-)

Comments

Frederic Barrat Dec. 13, 2018, 3:18 p.m. UTC | #1
Le 12/12/2018 à 07:58, Andrew Donnellan a écrit :
> Assignment of PHY/NTL/GENID BARs is currently duplicated between NVLink
> and OpenCAPI. This is going to cause us particular issues later on when we
> implement support for mixed-mode setups with NVLink and OpenCAPI on the
> same NPU.
> 
> Centralise the assignment of PHY/NTL/GENID BARs in common code.
> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> ---

That one gave me a headache, mostly because I was not that familiar with 
the bar setup on the nvlink side of things (and some bars are written 
twice!). Hopefully we'll get another pair of eyes with a nvlink focus on 
it. A few questions below.



>   hw/npu2-common.c   |  76 ++++++++++++++++++++++++++++++-
>   hw/npu2-opencapi.c |  80 ++++----------------------------
>   hw/npu2.c          | 116 +++++-----------------------------------------
>   include/npu2.h     |   4 +-
>   4 files changed, 104 insertions(+), 172 deletions(-)
> 
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index 3446acb45bea..b140e9ffd064 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -17,6 +17,7 @@
>   #include <skiboot.h>
>   #include <xscom.h>
>   #include <pci.h>
> +#include <pci-cfg.h>
>   #include <npu2.h>
>   #include <npu2-regs.h>
>   #include <bitutils.h>
> @@ -174,6 +175,79 @@ void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
>   	}
>   }
> 
> +static void assign_bars(struct npu2 *npu)
> +{
> +	uint32_t i;
> +	struct npu2_bar *bar;
> +	struct npu2_dev *dev;
> +	struct npu2_bar phy_bars[] = {
> +		/* NPU_REGS must be first in this list */
> +		{ .type = NPU_REGS, .index = 0,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +		{ .type = NPU_PHY, .index = 0,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +		{ .type = NPU_PHY, .index = 1,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +	};
> +	struct npu2_bar last_bar =
> +		{ .type = NPU_GENID, .index = 2,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) };
> +
> +	/* Set common PHY BARs */
> +	for (i = 0; i < ARRAY_SIZE(phy_bars); i++) {
> +		bar = &phy_bars[i];
> +		npu2_get_bar(npu->chip_id, bar);
> +		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
> +	}
> +
> +	/* Device BARs */
> +	for (i = 0; i < npu->total_devices; i++) {
> +		dev = &npu->devices[i];
> +		if (dev->type == NPU2_DEV_TYPE_UNKNOWN)
> +			continue;
> +
> +		/* NTL / OCAPI_MMIO BAR */
> +		bar = &dev->ntl_bar;
> +		if (dev->type == NPU2_DEV_TYPE_NVLINK)
> +			bar->type = NPU_NTL;
> +		else if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
> +			bar->type = NPU_OCAPI_MMIO;
> +		bar->index = dev->brick_index;
> +		bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev),
> +					   0, NPU2DEV_BRICK(dev) == 0 ?
> +					   NPU2_NTL0_BAR : NPU2_NTL1_BAR);
> +		bar->flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
> +		npu2_get_bar(npu->chip_id, bar);
> +		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
> +
> +		/* GENID BAR */
> +		bar = &dev->genid_bar;
> +		bar->type = NPU_GENID;
> +		bar->index = NPU2DEV_STACK(dev);
> +		bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev),
> +					   0, NPU2_GENID_BAR);
> +		bar->flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
> +		npu2_get_bar(npu->chip_id, bar);
> +		bar->size = 0x10000;
> +		if (NPU2DEV_BRICK(dev))
> +			bar->base += 0x10000;
> +		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);

The GENID reg is per stack (same address for the 2 devices). For 2 
devices on the same stack, isn't the 2nd one overwriting the 1st, with 
only half the size available? I don't understand why we're doing it. And 
it seems different from what we were having for opencapi. I'm guessing 
it may not make much of a difference, since we're only addressing 4k of 
config space.

It matches what nvlink was doing though, but the same comment applies.



> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 88e4eb1974a2..b374a1035ac9 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c

> @@ -1582,8 +1530,8 @@ static void setup_device(struct npu2_dev *dev)
>   	uint64_t mm_win[2];
> 
>   	/* Populate PHB device node */
> -	phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
> -		     &mm_win[1]);
> +	mm_win[0] = dev->ntl_bar.base;
> +	mm_win[1] = dev->ntl_bar.size;
>   	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
>   	      mm_win[0], mm_win[1]);
>   	dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
> @@ -1629,7 +1577,8 @@ static void setup_device(struct npu2_dev *dev)
>   	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
>   	setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>   	/* Procedure 13.1.3.9 - AFU Config BARs */
> -	setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
> +	dev->genid_bar.enabled = true;
> +	npu2_write_bar(dev->npu, &dev->genid_bar, dev->npu->chip_id, dev->npu->xscom_base);

For readability, I wouldn't mind keeping a separate 
setup_afu_config_bars() function with the (now minimal) genid setup.
Andrew Donnellan Dec. 14, 2018, 1:13 a.m. UTC | #2
On 14/12/18 2:18 am, Frederic Barrat wrote:
>> +        /* GENID BAR */
>> +        bar = &dev->genid_bar;
>> +        bar->type = NPU_GENID;
>> +        bar->index = NPU2DEV_STACK(dev);
>> +        bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + 
>> NPU2DEV_STACK(dev),
>> +                       0, NPU2_GENID_BAR);
>> +        bar->flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
>> +        npu2_get_bar(npu->chip_id, bar);
>> +        bar->size = 0x10000;
>> +        if (NPU2DEV_BRICK(dev))
>> +            bar->base += 0x10000;
>> +        npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
> 
> The GENID reg is per stack (same address for the 2 devices). For 2 
> devices on the same stack, isn't the 2nd one overwriting the 1st, with 
> only half the size available? I don't understand why we're doing it. And 
> it seems different from what we were having for opencapi. I'm guessing 
> it may not make much of a difference, since we're only addressing 4k of 
> config space.
> 
> It matches what nvlink was doing though, but the same comment applies.

If you look at npu2_write_bar(), you'll see that the size isn't actually 
used when writing the GENID BAR, and the base address is right shifted 
16 bits (i.e. 0x10000) so the adjusted base won't change the value 
that's written to the BAR register either.

The reason that we're doing it is that the NVLink code needs to 
advertise these BARs to Linux for... reasons.

Now that you've mentioned... I think there might be an issue when 
accessing config space through OTL1, will look at it and fix in v2 if 
needed.

> 
> 
> 
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index 88e4eb1974a2..b374a1035ac9 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
> 
>> @@ -1582,8 +1530,8 @@ static void setup_device(struct npu2_dev *dev)
>>       uint64_t mm_win[2];
>>
>>       /* Populate PHB device node */
>> -    phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, 
>> &mm_win[0],
>> -             &mm_win[1]);
>> +    mm_win[0] = dev->ntl_bar.base;
>> +    mm_win[1] = dev->ntl_bar.size;
>>       prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + 
>> %016llx\n",
>>             mm_win[0], mm_win[1]);
>>       dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
>> @@ -1629,7 +1577,8 @@ static void setup_device(struct npu2_dev *dev)
>>       /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
>>       setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>>       /* Procedure 13.1.3.9 - AFU Config BARs */
>> -    setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
>> +    dev->genid_bar.enabled = true;
>> +    npu2_write_bar(dev->npu, &dev->genid_bar, dev->npu->chip_id, 
>> dev->npu->xscom_base);
> 
> For readability, I wouldn't mind keeping a separate 
> setup_afu_config_bars() function with the (now minimal) genid setup.
> 

noted for v2.
Alexey Kardashevskiy Dec. 14, 2018, 4:53 a.m. UTC | #3
On 14/12/2018 02:18, Frederic Barrat wrote:
> 
> 
> Le 12/12/2018 à 07:58, Andrew Donnellan a écrit :
>> Assignment of PHY/NTL/GENID BARs is currently duplicated between NVLink
>> and OpenCAPI. This is going to cause us particular issues later on
>> when we
>> implement support for mixed-mode setups with NVLink and OpenCAPI on the
>> same NPU.
>>
>> Centralise the assignment of PHY/NTL/GENID BARs in common code.
>>
>> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>> ---
> 
> That one gave me a headache, mostly because I was not that familiar with
> the bar setup on the nvlink side of things (and some bars are written
> twice!). Hopefully we'll get another pair of eyes with a nvlink focus on
> it.


There are way too many changes for such a small commit log. There are
actually 2 or 3 patches inside this one trying to get out, this is not
bisecable at all :(
Andrew Donnellan Dec. 14, 2018, 5:35 a.m. UTC | #4
On 14/12/18 3:53 pm, Alexey Kardashevskiy wrote:
> 
> 
> On 14/12/2018 02:18, Frederic Barrat wrote:
>>
>>
>> Le 12/12/2018 à 07:58, Andrew Donnellan a écrit :
>>> Assignment of PHY/NTL/GENID BARs is currently duplicated between NVLink
>>> and OpenCAPI. This is going to cause us particular issues later on
>>> when we
>>> implement support for mixed-mode setups with NVLink and OpenCAPI on the
>>> same NPU.
>>>
>>> Centralise the assignment of PHY/NTL/GENID BARs in common code.
>>>
>>> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>>> ---
>>
>> That one gave me a headache, mostly because I was not that familiar with
>> the bar setup on the nvlink side of things (and some bars are written
>> twice!). Hopefully we'll get another pair of eyes with a nvlink focus on
>> it.
> 
> 
> There are way too many changes for such a small commit log. There are
> actually 2 or 3 patches inside this one trying to get out, this is not
> bisecable at all :(

I put these together as one patch because there's a bunch of 
interdependencies here... there's a few bits I can split out easily so 
I'll do that in v2 but there will probably still be one big chunk for 
the big BAR assigning loop
diff mbox series

Patch

diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index 3446acb45bea..b140e9ffd064 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -17,6 +17,7 @@ 
 #include <skiboot.h>
 #include <xscom.h>
 #include <pci.h>
+#include <pci-cfg.h>
 #include <npu2.h>
 #include <npu2-regs.h>
 #include <bitutils.h>
@@ -174,6 +175,79 @@  void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
 	}
 }
 
+static void assign_bars(struct npu2 *npu)
+{
+	uint32_t i;
+	struct npu2_bar *bar;
+	struct npu2_dev *dev;
+	struct npu2_bar phy_bars[] = {
+		/* NPU_REGS must be first in this list */
+		{ .type = NPU_REGS, .index = 0,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+		{ .type = NPU_PHY, .index = 0,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+		{ .type = NPU_PHY, .index = 1,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+	};
+	struct npu2_bar last_bar =
+		{ .type = NPU_GENID, .index = 2,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) };
+
+	/* Set common PHY BARs */
+	for (i = 0; i < ARRAY_SIZE(phy_bars); i++) {
+		bar = &phy_bars[i];
+		npu2_get_bar(npu->chip_id, bar);
+		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
+	}
+
+	/* Device BARs */
+	for (i = 0; i < npu->total_devices; i++) {
+		dev = &npu->devices[i];
+		if (dev->type == NPU2_DEV_TYPE_UNKNOWN)
+			continue;
+
+		/* NTL / OCAPI_MMIO BAR */
+		bar = &dev->ntl_bar;
+		if (dev->type == NPU2_DEV_TYPE_NVLINK)
+			bar->type = NPU_NTL;
+		else if (dev->type == NPU2_DEV_TYPE_OPENCAPI)
+			bar->type = NPU_OCAPI_MMIO;
+		bar->index = dev->brick_index;
+		bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev),
+					   0, NPU2DEV_BRICK(dev) == 0 ?
+					   NPU2_NTL0_BAR : NPU2_NTL1_BAR);
+		bar->flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
+		npu2_get_bar(npu->chip_id, bar);
+		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
+
+		/* GENID BAR */
+		bar = &dev->genid_bar;
+		bar->type = NPU_GENID;
+		bar->index = NPU2DEV_STACK(dev);
+		bar->reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev),
+					   0, NPU2_GENID_BAR);
+		bar->flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
+		npu2_get_bar(npu->chip_id, bar);
+		bar->size = 0x10000;
+		if (NPU2DEV_BRICK(dev))
+			bar->base += 0x10000;
+		npu2_write_bar(npu, bar, npu->chip_id, npu->xscom_base);
+	};
+
+	/* Global MMIO BAR */
+	npu->regs = (uint64_t *)phy_bars[0].base;
+	npu->regs_size = phy_bars[0].size;
+
+	/* NTL and GENID BARs are exposed to kernel via the mm
+	 * window */
+	npu2_get_bar(npu->chip_id, &last_bar);
+	npu->mm_base = phy_bars[2].base + phy_bars[2].size;
+	npu->mm_size = last_bar.base + last_bar.size - npu->mm_base;
+}
+
 static bool _i2c_presence_detect(struct npu2_dev *dev)
 {
 	uint8_t state, data;
@@ -350,6 +424,8 @@  static void setup_devices(struct npu2 *npu)
 		return;
 	}
 
+	assign_bars(npu);
+
 	if (nvlink_detected)
 		npu2_nvlink_init_npu(npu);
 	else if (ocapi_detected)
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 88e4eb1974a2..b374a1035ac9 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -730,57 +730,22 @@  static void address_translation_config(uint32_t gcid, uint32_t scom_base,
 	}
 }
 
-static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
-				  uint64_t reg[])
-{
-        struct npu2_bar *bar;
-	struct npu2_bar phy_bars[] = {
-		{ .type = NPU_PHY, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-		{ .type = NPU_PHY, .index = 1,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-		{ .type = NPU_REGS, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-	};
-
-	for (int i = 0; i < ARRAY_SIZE(phy_bars); i++) {
-		bar = &phy_bars[i];
-		npu2_get_bar(gcid, bar);
-		npu2_write_bar(NULL, bar, gcid, scom_base);
-	}
-	reg[0] = phy_bars[2].base;
-	reg[1] = phy_bars[2].size;
-}
-
 /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
 static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
 				struct npu2_dev *dev)
 {
 	uint64_t stack = index_to_stack(dev->brick_index);
-	uint64_t offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
-		NPU2_NTL0_BAR : NPU2_NTL1_BAR;
 	uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
 		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
 		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
-	uint64_t addr, size, reg;
+	uint64_t reg;
 
 	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
-	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
-	dev->bars[0].type = NPU_OCAPI_MMIO;
-	dev->bars[0].index = dev->brick_index;
-	dev->bars[0].reg = NPU2_REG_OFFSET(stack, 0, offset);
-	dev->bars[0].enabled = true;
-	npu2_get_bar(gcid, &dev->bars[0]);
-
-	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n",
-	      dev->bars[0].base, dev->bars[0].size);
-	npu2_write_bar(NULL, &dev->bars[0], gcid, scom_base);
-
-	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, dev->bars[0].base >> 16);
-	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(dev->bars[0].size >> 16));
+	dev->ntl_bar.enabled = true;
+	npu2_write_bar(dev->npu, &dev->ntl_bar, gcid, scom_base);
+
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, dev->ntl_bar.base >> 16);
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(dev->ntl_bar.size >> 16));
 	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
 	npu2_scom_write(gcid, scom_base,
 			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
@@ -788,23 +753,6 @@  static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
 			NPU2_MISC_DA_LEN_8B, reg);
 }
 
-/* Procedure 13.1.3.9 - AFU Config BARs */
-static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
-				  struct npu2_dev *dev)
-{
-	uint64_t stack = index_to_stack(dev->brick_index);
-	int stack_num = stack - NPU2_STACK_STCK_0;
-
-	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
-	dev->bars[1].type = NPU_GENID;
-	dev->bars[1].index = stack_num;
-	dev->bars[1].reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
-	dev->bars[1].enabled = true;
-	npu2_get_bar(gcid, &dev->bars[1]);
-	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", dev->bars[1].base);
-	npu2_write_bar(NULL, &dev->bars[1], gcid, scom_base);
-}
-
 static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
 			struct npu2_dev *dev)
 {
@@ -1262,7 +1210,7 @@  static int64_t npu2_opencapi_pcicfg_read(struct phb *phb, uint32_t bdfn,
 	if (rc)
 		return rc;
 
-	genid_base = dev->bars[1].base +
+	genid_base = dev->genid_bar.base +
 		(index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
 
 	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
@@ -1320,7 +1268,7 @@  static int64_t npu2_opencapi_pcicfg_write(struct phb *phb, uint32_t bdfn,
 	if (rc)
 		return rc;
 
-	genid_base = dev->bars[1].base +
+	genid_base = dev->genid_bar.base +
 		(index_to_block(dev->brick_index) == NPU2_BLOCK_OTL1 ? 256 : 0);
 
 	cfg_addr = NPU2_CQ_CTL_CONFIG_ADDR_ENABLE;
@@ -1582,8 +1530,8 @@  static void setup_device(struct npu2_dev *dev)
 	uint64_t mm_win[2];
 
 	/* Populate PHB device node */
-	phys_map_get(dev->npu->chip_id, NPU_OCAPI_MMIO, dev->brick_index, &mm_win[0],
-		     &mm_win[1]);
+	mm_win[0] = dev->ntl_bar.base;
+	mm_win[1] = dev->ntl_bar.size;
 	prlog(PR_DEBUG, "OCAPI: Setting MMIO window to %016llx + %016llx\n",
 	      mm_win[0], mm_win[1]);
 	dn_phb = dt_new_addr(dt_root, "pciex", mm_win[0]);
@@ -1629,7 +1577,8 @@  static void setup_device(struct npu2_dev *dev)
 	/* Procedure 13.1.3.8 - AFU MMIO Range BARs */
 	setup_afu_mmio_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
 	/* Procedure 13.1.3.9 - AFU Config BARs */
-	setup_afu_config_bars(dev->npu->chip_id, dev->npu->xscom_base, dev);
+	dev->genid_bar.enabled = true;
+	npu2_write_bar(dev->npu, &dev->genid_bar, dev->npu->chip_id, dev->npu->xscom_base);
 
 	set_fence_control(dev->npu->chip_id, dev->npu->xscom_base, dev->brick_index, 0b00);
 
@@ -1670,7 +1619,6 @@  static void read_nvram_training_state(void)
 int npu2_opencapi_init_npu(struct npu2 *npu)
 {
 	struct npu2_dev *dev;
-	uint64_t reg[2];
 	int rc;
 
 	assert(platform.ocapi);
@@ -1679,10 +1627,6 @@  int npu2_opencapi_init_npu(struct npu2 *npu)
 	/* TODO: Test OpenCAPI with fast reboot and make it work */
 	disable_fast_reboot("OpenCAPI device enabled");
 
-	setup_global_mmio_bar(npu->chip_id, npu->xscom_base, reg);
-
-	npu->regs = (void *)reg[0];
-
 	for (int i = 0; i < npu->total_devices; i++) {
 		dev = &npu->devices[i];
 		if (dev->type != NPU2_DEV_TYPE_OPENCAPI)
diff --git a/hw/npu2.c b/hw/npu2.c
index 6b0880682427..6aa16a43f803 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -114,7 +114,6 @@  static int64_t npu2_cfg_write_cmd(void *dev,
 {
 	struct pci_virt_device *pvd = dev;
 	struct npu2_dev *ndev = pvd->data;
-	struct npu2_bar *ntl_npu_bar, *genid_npu_bar;
 	bool enabled;
 
 	if (!write)
@@ -130,11 +129,9 @@  static int64_t npu2_cfg_write_cmd(void *dev,
 	 * one GENID BAR, which is exposed via the first brick.
 	 */
 	enabled = !!(*data & PCI_CFG_CMD_MEM_EN);
-	ntl_npu_bar = &ndev->bars[0];
-	genid_npu_bar = &ndev->bars[1];
 
-	ntl_npu_bar->enabled = enabled;
-	npu2_write_bar(ndev->npu, ntl_npu_bar, 0, 0);
+	ndev->ntl_bar.enabled = enabled;
+	npu2_write_bar(ndev->npu, &ndev->ntl_bar, 0, 0);
 
 	/*
 	 * Enable/disable the GENID BAR. Two bricks share one GENID
@@ -142,13 +139,14 @@  static int64_t npu2_cfg_write_cmd(void *dev,
 	 * track the enables separately.
 	 */
 	if (NPU2DEV_BRICK(ndev))
-		genid_npu_bar->enabled1 = enabled;
+		ndev->genid_bar.enabled1 = enabled;
 	else
-		genid_npu_bar->enabled0 = enabled;
+		ndev->genid_bar.enabled0 = enabled;
 
 	/* Enable the BAR if either device requests it enabled, otherwise disable it */
-	genid_npu_bar->enabled = genid_npu_bar->enabled0 || genid_npu_bar->enabled1;
-	npu2_write_bar(ndev->npu, genid_npu_bar, 0, 0);
+	ndev->genid_bar.enabled = ndev->genid_bar.enabled0 ||
+		ndev->genid_bar.enabled1;
+	npu2_write_bar(ndev->npu, &ndev->genid_bar, 0, 0);
 
 	return OPAL_PARTIAL;
 }
@@ -1360,59 +1358,6 @@  static const struct phb_ops npu_ops = {
 	.tce_kill		= npu2_tce_kill,
 };
 
-static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
-{
-	uint32_t i;
-	struct npu2_bar *bar;
-	struct npu2_bar npu2_bars[] = {
-		/* NPU_REGS must be first in this list */
-		{ .type = NPU_REGS, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-		{ .type = NPU_PHY, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-		{ .type = NPU_PHY, .index = 1,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
-		  .enabled = true },
-		{ .type = NPU_NTL, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
-		{ .type = NPU_NTL, .index = 1,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
-		{ .type = NPU_NTL, .index = 2,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
-		{ .type = NPU_NTL, .index = 3,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
-		{ .type = NPU_NTL, .index = 4,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
-		{ .type = NPU_NTL, .index = 5,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
-		{ .type = NPU_GENID, .index = 0,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
-		{ .type = NPU_GENID, .index = 1,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
-		{ .type = NPU_GENID, .index = 2,
-		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
-	};
-
-	for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
-		bar = &npu2_bars[i];
-		npu2_get_bar(gcid, bar);
-		npu2_write_bar(NULL, bar, gcid, scom);
-	}
-
-	/* Global MMIO BAR */
-	reg[0] = npu2_bars[0].base;
-	reg[1] = npu2_bars[0].size;
-
-	/* NTL and GENID BARs are exposed to kernel via the mm
-	 * window */
-	mm_win[0] = npu2_bars[3].base;
-	mm_win[1] = npu2_bars[ARRAY_SIZE(npu2_bars) - 1].base +
-		    npu2_bars[ARRAY_SIZE(npu2_bars) - 1].size -
-		    mm_win[0];
-}
-
 /*
  * Set up NPU for NVLink and create PCI root device node
  * accordingly.
@@ -1474,19 +1419,9 @@  int npu2_nvlink_init_npu(struct npu2 *npu)
 	xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
 	xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
 
-	/* Reassign the BARs */
-	assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
-	npu->regs = (void *)reg[0];
-	npu->mm_base = mm_win[0];
-	npu->mm_size = mm_win[1];
-
-	if (reg[0] && reg[1])
-		prlog(PR_INFO, "   Global MMIO BAR:  %016llx (%lldMB)\n",
-		      reg[0], reg[1] >> 20);
-	else
-		prlog(PR_ERR, "    Global MMIO BAR: Disabled\n");
-
 	/* Populate PCI root device node */
+	reg[0] = (uint64_t)npu->regs;
+	reg[1] = npu->regs_size;
 	np = dt_new_addr(dt_root, "pciex", reg[0]);
 	assert(np);
 	dt_add_property_strings(np,
@@ -1501,6 +1436,8 @@  int npu2_nvlink_init_npu(struct npu2 *npu)
 	dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
 	dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
 	dt_add_property_cells(np, "ibm,links", npu->total_devices);
+	mm_win[0] = npu->mm_base;
+	mm_win[1] = npu->mm_size;
 	dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
 	dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
 
@@ -1651,7 +1588,7 @@  static void npu2_populate_cfg(struct npu2_dev *dev)
 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000);
 
 	/* 0x10/14 - BAR#0, NTL BAR */
-	bar = &dev->bars[0];
+	bar = &dev->ntl_bar;
 	PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4,
 			  (bar->base & 0xfffffff0) | bar->flags,
 			  0x0000000f, 0x00000000);
@@ -1662,7 +1599,7 @@  static void npu2_populate_cfg(struct npu2_dev *dev)
 			    npu2_dev_cfg_bar, bar);
 
 	/* 0x18/1c - BAR#1, GENID BAR */
-	bar = &dev->bars[1];
+	bar = &dev->genid_bar;
 	if (NPU2DEV_BRICK(dev) == 0)
 		PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, (bar->base & 0xfffffff0) |
 				  bar->flags,
@@ -1726,7 +1663,6 @@  static void npu2_populate_devices(struct npu2 *p,
 	struct npu2_dev *dev;
 	struct dt_node *npu2_dn, *link;
 	uint32_t npu_phandle, index = 0;
-	int stack;
 
 	/*
 	 * Get the npu node which has the links which we expand here
@@ -1741,7 +1677,6 @@  static void npu2_populate_devices(struct npu2 *p,
 	p->phb_nvlink.scan_map = 0;
 	dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") {
 		uint32_t group_id;
-		struct npu2_bar *npu2_bar;
 
 		dev = &p->devices[index];
 		dev->type = NPU2_DEV_TYPE_NVLINK;
@@ -1761,31 +1696,6 @@  static void npu2_populate_devices(struct npu2 *p,
 		dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
 		dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
 
-		/* Populate BARs. BAR0/1 is the NTL bar. */
-		stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
-		npu2_bar = &dev->bars[0];
-		npu2_bar->type = NPU_NTL;
-		npu2_bar->index = dev->brick_index;
-		npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
-						NPU2_NTL0_BAR : NPU2_NTL1_BAR);
-	        npu2_get_bar(p->chip_id, npu2_bar);
-
-		dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
-
-		/* BAR2/3 is the GENID bar. */
-		npu2_bar = &dev->bars[1];
-		npu2_bar->type = NPU_GENID;
-		npu2_bar->index = NPU2DEV_STACK(dev);
-		npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
-	        npu2_get_bar(p->chip_id, npu2_bar);
-
-		/* The GENID is a single physical BAR that we split
-		 * for each emulated device */
-		npu2_bar->size = 0x10000;
-		if (NPU2DEV_BRICK(dev))
-			npu2_bar->base += 0x10000;
-		dev->bars[1].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
-
 		/* Initialize PCI virtual device */
 		dev->nvlink.pvd = pci_virt_add_device(&p->phb_nvlink, dev->bdfn, 0x100, dev);
 		if (dev->nvlink.pvd) {
diff --git a/include/npu2.h b/include/npu2.h
index c5c5b43843c3..bf7fb6927dd4 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -119,7 +119,8 @@  struct npu2_dev {
 	uint32_t		brick_index;
 	uint64_t		pl_xscom_base;
 	struct dt_node		*dt_node;
-	struct npu2_bar		bars[2];
+	struct npu2_bar		ntl_bar;
+	struct npu2_bar		genid_bar;
 	struct npu2		*npu;
 
 	uint32_t		bdfn;
@@ -150,6 +151,7 @@  struct npu2 {
 	uint32_t	chip_id;
 	uint64_t	xscom_base;
 	void		*regs;
+	uint64_t	regs_size;
 	uint64_t	mm_base;
 	uint64_t	mm_size;
 	uint32_t	base_lsi;