diff mbox series

[v3] hw/npu2-opencapi: Support multiple LPC devices

Message ID 20200128030249.20467-1-ajd@linux.ibm.com
State Superseded
Headers show
Series [v3] hw/npu2-opencapi: Support multiple LPC devices | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning Failed to apply on branch master (d75e82dbfbb9443efeb3f9a5921ac23605aab469)
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Andrew Donnellan Jan. 28, 2020, 3:02 a.m. UTC
Currently, we only have a single range for LPC memory per chip, and we only
allow a single device to use that range.

With upcoming Hostboot/SBE changes, we'll use the chip address extension
mask to give us multiple ranges by using the masked bits of the group ID.

Each device can now allocate a whole 4TB non-mirrored region. We still
don't do >4TB ranges.

If the extension mask is not set correctly, we'll fall back to only
permitting one device and printing an error suggesting a firmware upgrade.

Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>

---

I've been able to test this under limited configurations, so far so good.

v1->v2:
- fix excessively large range size which I'd left in there for some reason

v2->v3:
- remove npu->lpc_mem_allocated, which we no longer use (Fred)
---
 hw/npu2-opencapi.c | 43 +++++++++++++++++++++++++++++--------------
 hw/phys-map.c      | 15 +++++++++------
 include/npu2.h     |  7 ++-----
 3 files changed, 40 insertions(+), 25 deletions(-)

Comments

Oliver O'Halloran Jan. 28, 2020, 5:21 a.m. UTC | #1
On Tue, 2020-01-28 at 14:02 +1100, Andrew Donnellan wrote:
> Currently, we only have a single range for LPC memory per chip, and we only
> allow a single device to use that range.
> 
> With upcoming Hostboot/SBE changes, we'll use the chip address extension
> mask to give us multiple ranges by using the masked bits of the group ID.
> 
> Each device can now allocate a whole 4TB non-mirrored region. We still
> don't do >4TB ranges.

This patch seems like the sort of thing that'll need to be re-written
entirely when we decide to support >4TB allocations. Oh well.

> 
> If the extension mask is not set correctly, we'll fall back to only
> permitting one device and printing an error suggesting a firmware upgrade.
> 
> Signed-off-by: Andrew Donnellan <ajd@linux.ibm.com>
> 
> ---
> 
> I've been able to test this under limited configurations, so far so good.
> 
> v1->v2:
> - fix excessively large range size which I'd left in there for some reason
> 
> v2->v3:
> - remove npu->lpc_mem_allocated, which we no longer use (Fred)
> ---
>  hw/npu2-opencapi.c | 43 +++++++++++++++++++++++++++++--------------
>  hw/phys-map.c      | 15 +++++++++------
>  include/npu2.h     |  7 ++-----
>  3 files changed, 40 insertions(+), 25 deletions(-)
> 
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index 19589c92d477..37d05340e00b 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -2179,25 +2179,38 @@ static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
>  
>  static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
>  {
> -	uint64_t phys_map_base, phys_map_size;
> +	uint64_t phys_map_base, phys_map_size, val;
>  	int rc = OPAL_SUCCESS;
>  
>  	lock(&dev->npu->lock);
>  
> -	/*
> -	 * Right now, we support 1 allocation per chip, of up to 4TB.
> -	 *
> -	 * In future, we will use chip address extension to support
> -	 * >4TB ranges, and we will implement a more sophisticated
> -	 * allocator to allow an allocation for every link on a chip.
> -	 */
> -
> -	if (dev->npu->lpc_mem_allocated) {
> +	if (dev->lpc_mem_base) {
> +		OCAPIERR(dev, "LPC allocation failed - BAR already in use\n");
>  		rc = OPAL_RESOURCE;
>  		goto out;
>  	}
>  
> -	phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
> +	xscom_read(dev->npu->chip_id, PB_CENT_MODE, &val);

> +	if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0b1100100) {

Magic constant probably needs a comment.

> +		phys_map_get(dev->npu->chip_id, OCAPI_MEM,
> +			     dev->brick_index - 2, &phys_map_base,
> +			     &phys_map_size);
> +	} else {
> +		bool in_use = false;
newline between variable and code

> +		for (int i = 0; i < dev->npu->total_devices; i++) {
> +			if (dev->npu->devices[i].lpc_mem_base)
> +				in_use = true;
> +		}
> +
> +		if (in_use) {
> +			OCAPIERR(dev, "LPC allocation failed - single device per chip limit, FW upgrade required (pb_cent_mode=0x%016llx)\n", val);
> +			rc = OPAL_RESOURCE;
> +			goto out;
> +		}
> +
> +		phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base,
> +			     &phys_map_size);
> +	}
>  
>  	if (size > phys_map_size) {
>  		/**
> @@ -2223,7 +2236,8 @@ static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
>  
>  	set_mem_bar(dev, phys_map_base, size);
>  	*bar = phys_map_base;
> -	dev->npu->lpc_mem_allocated = dev;
> +	dev->lpc_mem_base = phys_map_base;
> +	dev->lpc_mem_size = size;
>  
>  out:
>  	unlock(&dev->npu->lock);
> @@ -2236,13 +2250,14 @@ static int64_t release_mem_bar(struct npu2_dev *dev)
>  
>  	lock(&dev->npu->lock);
>  
> -	if (dev->npu->lpc_mem_allocated != dev) {
> +	if (!dev->lpc_mem_base) {
>  		rc = OPAL_PARAMETER;
>  		goto out;
>  	}
>  
>  	set_mem_bar(dev, 0, 0);
> -	dev->npu->lpc_mem_allocated = NULL;
> +	dev->lpc_mem_base = 0;
> +	dev->lpc_mem_size = 0;
>  
>  out:
>  	unlock(&dev->npu->lock);
> diff --git a/hw/phys-map.c b/hw/phys-map.c
> index 9917da7cdf94..fd79b3c123a4 100644
> --- a/hw/phys-map.c
> +++ b/hw/phys-map.c
> @@ -44,15 +44,18 @@ static const struct phys_map_entry phys_map_table_nimbus[] = {
>  	{ GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
>  
>  	/*
> -	 * OpenCAPI LPC Memory - single 4TB range per chip, fills
> -	 * whole second non-mirrored region.
> +	 * OpenCAPI LPC Memory
>  	 *
> -	 * Longer term, we're going to use chip address extension to
> -	 * enable >4TB to be allocated per chip.  At that point, we
> -	 * may have to find another way of assigning these ranges
> -	 * outside of phys-map.
> +	 * With chip address extension enabled, we allocate 4TB ranges
> +	 * (in the second non-mirrored region) for each OpenCAPI link
> +	 * by varying the upper 2 bits of the group ID.
> +	 *
> +	 * We don't currently support >4TB ranges.
>  	 */
>  	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
> +	{ OCAPI_MEM,	   1, 0x0002200000000000ull, 0x0000040000000000ull },
> +	{ OCAPI_MEM,	   2, 0x0002400000000000ull, 0x0000040000000000ull },
> +	{ OCAPI_MEM,	   3, 0x0002600000000000ull, 0x0000040000000000ull },
>  
>  	/* 0 TB offset @ MMIO 0x0006000000000000ull */
>  	{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
> diff --git a/include/npu2.h b/include/npu2.h
> index d2a3430e3e3a..b69ae729ea12 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -150,6 +150,8 @@ struct npu2_dev {
>  	uint64_t		linux_pe;
>  	unsigned long		train_start;
>  	unsigned long		train_timeout;
> +	uint64_t		lpc_mem_base;
> +	uint64_t		lpc_mem_size;
>  };
>  
>  struct npu2 {
> @@ -185,11 +187,6 @@ struct npu2 {
>  	struct lock	i2c_lock;
>  	uint8_t		i2c_pin_mode;
>  	uint8_t		i2c_pin_wr_state;
> -	/*
> -	 * Which device currently has an LPC allocation.
> -	 * Temporary as long as we only support 1 LPC alloc per chip.
> -	 */
> -	struct npu2_dev	*lpc_mem_allocated;
>  };
>  
>  static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)
Andrew Donnellan Jan. 28, 2020, 7:16 a.m. UTC | #2
On 28/1/20 4:21 pm, Oliver O'Halloran wrote:
> On Tue, 2020-01-28 at 14:02 +1100, Andrew Donnellan wrote:
>> Currently, we only have a single range for LPC memory per chip, and we only
>> allow a single device to use that range.
>>
>> With upcoming Hostboot/SBE changes, we'll use the chip address extension
>> mask to give us multiple ranges by using the masked bits of the group ID.
>>
>> Each device can now allocate a whole 4TB non-mirrored region. We still
>> don't do >4TB ranges.
> 
> This patch seems like the sort of thing that'll need to be re-written
> entirely when we decide to support >4TB allocations. Oh well.

Yep. The way addresses are structured on P8 and the restrictions of the 
chip address mask that has been chosen makes it difficult to do >4TB of 
contiguous memory in any case - we can assign multiple discontiguous 4TB 
ranges to a single card by masking off some bits, but from a Linux 
perspective that's less useful. We'll cross that bridge if/when required 
to...

>>   
>> -	phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
>> +	xscom_read(dev->npu->chip_id, PB_CENT_MODE, &val);
> 
>> +	if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0b1100100) {
> 
> Magic constant probably needs a comment.
> 
>> +		phys_map_get(dev->npu->chip_id, OCAPI_MEM,
>> +			     dev->brick_index - 2, &phys_map_base,
>> +			     &phys_map_size);
>> +	} else {
>> +		bool in_use = false;
> newline between variable and code
Will fix in v4.
Andrew Donnellan Jan. 28, 2020, 7:25 a.m. UTC | #3
On 28/1/20 6:16 pm, Andrew Donnellan wrote:
>> This patch seems like the sort of thing that'll need to be re-written
>> entirely when we decide to support >4TB allocations. Oh well.
> 
> Yep. The way addresses are structured on P8 and the restrictions of the 

I obviously mean P9...

> chip address mask that has been chosen makes it difficult to do >4TB of 
> contiguous memory in any case - we can assign multiple discontiguous 4TB 
> ranges to a single card by masking off some bits, but from a Linux 
> perspective that's less useful. We'll cross that bridge if/when required 
> to...
diff mbox series

Patch

diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index 19589c92d477..37d05340e00b 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -2179,25 +2179,38 @@  static void set_mem_bar(struct npu2_dev *dev, uint64_t base, uint64_t size)
 
 static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
 {
-	uint64_t phys_map_base, phys_map_size;
+	uint64_t phys_map_base, phys_map_size, val;
 	int rc = OPAL_SUCCESS;
 
 	lock(&dev->npu->lock);
 
-	/*
-	 * Right now, we support 1 allocation per chip, of up to 4TB.
-	 *
-	 * In future, we will use chip address extension to support
-	 * >4TB ranges, and we will implement a more sophisticated
-	 * allocator to allow an allocation for every link on a chip.
-	 */
-
-	if (dev->npu->lpc_mem_allocated) {
+	if (dev->lpc_mem_base) {
+		OCAPIERR(dev, "LPC allocation failed - BAR already in use\n");
 		rc = OPAL_RESOURCE;
 		goto out;
 	}
 
-	phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base, &phys_map_size);
+	xscom_read(dev->npu->chip_id, PB_CENT_MODE, &val);
+	if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0b1100100) {
+		phys_map_get(dev->npu->chip_id, OCAPI_MEM,
+			     dev->brick_index - 2, &phys_map_base,
+			     &phys_map_size);
+	} else {
+		bool in_use = false;
+		for (int i = 0; i < dev->npu->total_devices; i++) {
+			if (dev->npu->devices[i].lpc_mem_base)
+				in_use = true;
+		}
+
+		if (in_use) {
+			OCAPIERR(dev, "LPC allocation failed - single device per chip limit, FW upgrade required (pb_cent_mode=0x%016llx)\n", val);
+			rc = OPAL_RESOURCE;
+			goto out;
+		}
+
+		phys_map_get(dev->npu->chip_id, OCAPI_MEM, 0, &phys_map_base,
+			     &phys_map_size);
+	}
 
 	if (size > phys_map_size) {
 		/**
@@ -2223,7 +2236,8 @@  static int64_t alloc_mem_bar(struct npu2_dev *dev, uint64_t size, uint64_t *bar)
 
 	set_mem_bar(dev, phys_map_base, size);
 	*bar = phys_map_base;
-	dev->npu->lpc_mem_allocated = dev;
+	dev->lpc_mem_base = phys_map_base;
+	dev->lpc_mem_size = size;
 
 out:
 	unlock(&dev->npu->lock);
@@ -2236,13 +2250,14 @@  static int64_t release_mem_bar(struct npu2_dev *dev)
 
 	lock(&dev->npu->lock);
 
-	if (dev->npu->lpc_mem_allocated != dev) {
+	if (!dev->lpc_mem_base) {
 		rc = OPAL_PARAMETER;
 		goto out;
 	}
 
 	set_mem_bar(dev, 0, 0);
-	dev->npu->lpc_mem_allocated = NULL;
+	dev->lpc_mem_base = 0;
+	dev->lpc_mem_size = 0;
 
 out:
 	unlock(&dev->npu->lock);
diff --git a/hw/phys-map.c b/hw/phys-map.c
index 9917da7cdf94..fd79b3c123a4 100644
--- a/hw/phys-map.c
+++ b/hw/phys-map.c
@@ -44,15 +44,18 @@  static const struct phys_map_entry phys_map_table_nimbus[] = {
 	{ GPU_MEM_4T_UP,   3, 0x0000046000000000ull, 0x0000002000000000ull },
 
 	/*
-	 * OpenCAPI LPC Memory - single 4TB range per chip, fills
-	 * whole second non-mirrored region.
+	 * OpenCAPI LPC Memory
 	 *
-	 * Longer term, we're going to use chip address extension to
-	 * enable >4TB to be allocated per chip.  At that point, we
-	 * may have to find another way of assigning these ranges
-	 * outside of phys-map.
+	 * With chip address extension enabled, we allocate 4TB ranges
+	 * (in the second non-mirrored region) for each OpenCAPI link
+	 * by varying the upper 2 bits of the group ID.
+	 *
+	 * We don't currently support >4TB ranges.
 	 */
 	{ OCAPI_MEM,	   0, 0x0002000000000000ull, 0x0000040000000000ull },
+	{ OCAPI_MEM,	   1, 0x0002200000000000ull, 0x0000040000000000ull },
+	{ OCAPI_MEM,	   2, 0x0002400000000000ull, 0x0000040000000000ull },
+	{ OCAPI_MEM,	   3, 0x0002600000000000ull, 0x0000040000000000ull },
 
 	/* 0 TB offset @ MMIO 0x0006000000000000ull */
 	{ PHB4_64BIT_MMIO, 0, 0x0006000000000000ull, 0x0000004000000000ull },
diff --git a/include/npu2.h b/include/npu2.h
index d2a3430e3e3a..b69ae729ea12 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -150,6 +150,8 @@  struct npu2_dev {
 	uint64_t		linux_pe;
 	unsigned long		train_start;
 	unsigned long		train_timeout;
+	uint64_t		lpc_mem_base;
+	uint64_t		lpc_mem_size;
 };
 
 struct npu2 {
@@ -185,11 +187,6 @@  struct npu2 {
 	struct lock	i2c_lock;
 	uint8_t		i2c_pin_mode;
 	uint8_t		i2c_pin_wr_state;
-	/*
-	 * Which device currently has an LPC allocation.
-	 * Temporary as long as we only support 1 LPC alloc per chip.
-	 */
-	struct npu2_dev	*lpc_mem_allocated;
 };
 
 static inline struct npu2 *phb_to_npu2_nvlink(struct phb *phb)