diff mbox series

[v2,02/15] hw/npu2: Merge implementations of BAR accessor functions

Message ID 905b7b786e7e50ef3845493c8a467224f5cb5200.1547168645.git-series.andrew.donnellan@au1.ibm.com
State Changes Requested
Headers show
Series Support OpenCAPI and NVLink devices on same NPU on Witherspoon | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch warning master/apply_patch Patch failed to apply
snowpatch_ozlabs/apply_patch fail Failed to apply to any branch

Commit Message

Andrew Donnellan Jan. 11, 2019, 1:09 a.m. UTC
Move npu2_{get,read,write}_bar() to common code. Get rid of the OpenCAPI
write_bar() function. Fix the OpenCAPI code to use the NVLink-style BAR
accessor functions and struct npu2_bar.

While we're there, fix a trivial bug in npu2_read_bar() when reading PHY
BARs - the global MMIO BAR is stack 0, not stack 2. I don't think we ever
read that BAR, so this has never been a problem.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>

---
v1->v2:
- remove dead phys_map_get() (Fred)
---
 hw/npu2-common.c   |  77 ++++++++++++++++++++++++++++++++-
 hw/npu2-opencapi.c | 112 ++++++++++++++++------------------------------
 hw/npu2.c          |  78 +--------------------------------
 include/npu2.h     |   4 ++-
 4 files changed, 121 insertions(+), 150 deletions(-)

Comments

Alexey Kardashevskiy Jan. 17, 2019, 4:55 a.m. UTC | #1
On 11/01/2019 12:09, Andrew Donnellan wrote:
> Move npu2_{get,read,write}_bar() to common code. Get rid of the OpenCAPI
> write_bar() function. Fix the OpenCAPI code to use the NVLink-style BAR
> accessor functions and struct npu2_bar.
> 
> While we're there, fix a trivial bug in npu2_read_bar() when reading PHY
> BARs - the global MMIO BAR is stack 0, not stack 2. I don't think we ever
> read that BAR, so this has never been a problem.

Out of curiosity I tried to locate this trivial bugfix and I could not
;) imho it deserves a separate patch.

> 
> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
> Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
> 
> ---
> v1->v2:
> - remove dead phys_map_get() (Fred)
> ---
>  hw/npu2-common.c   |  77 ++++++++++++++++++++++++++++++++-
>  hw/npu2-opencapi.c | 112 ++++++++++++++++------------------------------
>  hw/npu2.c          |  78 +--------------------------------
>  include/npu2.h     |   4 ++-
>  4 files changed, 121 insertions(+), 150 deletions(-)
> 
> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
> index 6e6b12f0d1ae..3446acb45bea 100644
> --- a/hw/npu2-common.c
> +++ b/hw/npu2-common.c
> @@ -22,6 +22,7 @@
>  #include <bitutils.h>
>  #include <nvram.h>
>  #include <i2c.h>
> +#include <phys-map.h>
>  
>  /*
>   * We use the indirect method because it uses the same addresses as
> @@ -97,6 +98,82 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
>  			(uint64_t)new_val << 32);
>  }
>  
> +void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)


I'd call it npu2_setup_bar and pass type/index/reg/enabled as well.


> +{
> +	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
> +}
> +
> +void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
> +{
> +	uint64_t reg, val;
> +
> +	reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
> +	val = npu2_read(p, reg);
> +
> +	switch (NPU2_REG(bar->reg)) {
> +	case NPU2_PHY_BAR:
> +		bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
> +		bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
> +
> +		if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_0)
> +			/* This is the global MMIO BAR */
> +			bar->size = 0x1000000;
> +		else
> +			bar->size = 0x200000;
> +		break;
> +	case NPU2_NTL0_BAR:
> +	case NPU2_NTL1_BAR:
> +		bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
> +		bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
> +		bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
> +		break;
> +	case NPU2_GENID_BAR:
> +		bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
> +		bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
> +		bar->size = 0x20000;
> +		break;
> +	default:
> +		bar->base = 0ul;
> +		bar->enabled = false;
> +		bar->size = 0;
> +		break;
> +	}
> +}
> +
> +void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
> +		    uint32_t scom)
> +{
> +	uint64_t reg, val;
> +	int block;
> +
> +	switch (NPU2_REG(bar->reg)) {
> +	case NPU2_PHY_BAR:
> +		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
> +		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
> +		break;
> +	case NPU2_NTL0_BAR:
> +	case NPU2_NTL1_BAR:
> +		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
> +		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
> +		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(bar->size >> 16));
> +		break;
> +	case NPU2_GENID_BAR:
> +		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
> +		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
> +		break;
> +	default:
> +		val = 0ul;
> +	}
> +
> +	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> +		reg = NPU2_REG_OFFSET(0, block, bar->reg);
> +		if (p)
> +			npu2_write(p, reg, val);
> +		else
> +			npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
> +	}
> +}
> +
>  static bool _i2c_presence_detect(struct npu2_dev *dev)
>  {
>  	uint8_t state, data;
> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
> index c10cf9863dda..ffa4d706bbad 100644
> --- a/hw/npu2-opencapi.c
> +++ b/hw/npu2-opencapi.c
> @@ -730,63 +730,29 @@ static void address_translation_config(uint32_t gcid, uint32_t scom_base,
>  	}
>  }
>  
> -/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar
> - * wrapper for the PHY BARs yet */
> -static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
> -		      uint64_t addr, uint64_t size)
> -{
> -	uint64_t val;
> -	int block;
> -	switch (NPU2_REG(reg)) {
> -	case NPU2_PHY_BAR:
> -		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
> -		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
> -		break;
> -	case NPU2_NTL0_BAR:
> -	case NPU2_NTL1_BAR:
> -		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
> -		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
> -		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
> -		break;
> -	case NPU2_GENID_BAR:
> -		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
> -		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
> -		break;
> -	default:
> -		val = 0ul;
> -	}
> -
> -	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> -		npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg),
> -				NPU2_MISC_DA_LEN_8B, val);
> -		prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
> -		      NPU2_REG_OFFSET(0, block, reg), val);
> -	}
> -}
> -
>  static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
>  				  uint64_t reg[])
>  {
> -	uint64_t addr, size;
> -
> -	prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
> -	phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
> -	write_bar(gcid, scom_base,
> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> -		addr, size);
> -	prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
> -	phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
> -	write_bar(gcid, scom_base,
> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> -		addr, size);
> -
> -	prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
> -	phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
> -	write_bar(gcid, scom_base,
> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> -		addr, size);
> -	reg[0] = addr;
> -	reg[1] = size;
> +	struct npu2_bar *bar;
> +	struct npu2_bar phy_bars[] = {
> +		{ .type = NPU_PHY, .index = 0,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +		{ .type = NPU_PHY, .index = 1,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +		{ .type = NPU_REGS, .index = 0,
> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
> +		  .enabled = true },
> +	};
> +
> +	for (int i = 0; i < ARRAY_SIZE(phy_bars); i++) {
> +		bar = &phy_bars[i];
> +		npu2_get_bar(gcid, bar);
> +		npu2_write_bar(NULL, bar, gcid, scom_base);
> +	}
> +	reg[0] = phy_bars[2].base;
> +	reg[1] = phy_bars[2].size;
>  }
>  
>  /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
> @@ -799,19 +765,21 @@ static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
>  	uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
>  		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
>  		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
> -	uint64_t addr, size, reg;
> +	uint64_t reg;
>  
>  	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
> -	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
> -
> -	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
> -	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
> -		size);
> -	dev->bars[0].base = addr;
> -	dev->bars[0].size = size;
> -
> -	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
> -	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
> +	dev->bars[0].type = NPU_OCAPI_MMIO;
> +	dev->bars[0].index = dev->brick_index;
> +	dev->bars[0].reg = NPU2_REG_OFFSET(stack, 0, offset);
> +	dev->bars[0].enabled = true;


Here you add type/index/reg/enabled initialization, not move from
elsewhere so there either was a bug with unitialized bar struct or now
we have 2 places where such initialization occurs. The commit log does
not mentioned this. I am confused.



> +	npu2_get_bar(gcid, &dev->bars[0]);
> +
> +	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n",
> +	      dev->bars[0].base, dev->bars[0].size);
> +	npu2_write_bar(NULL, &dev->bars[0], gcid, scom_base);
> +
> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, dev->bars[0].base >> 16);
> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(dev->bars[0].size >> 16));
>  	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
>  	npu2_scom_write(gcid, scom_base,
>  			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
> @@ -825,15 +793,15 @@ static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
>  {
>  	uint64_t stack = index_to_stack(dev->brick_index);
>  	int stack_num = stack - NPU2_STACK_STCK_0;
> -	uint64_t addr, size;
>  
>  	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
> -	phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
> -	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
> -	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
> -		addr, size);
> -	dev->bars[1].base = addr;
> -	dev->bars[1].size = size;
> +	dev->bars[1].type = NPU_GENID;
> +	dev->bars[1].index = stack_num;
> +	dev->bars[1].reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
> +	dev->bars[1].enabled = true;


Same here.

> +	npu2_get_bar(gcid, &dev->bars[1]);
> +	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", dev->bars[1].base);
> +	npu2_write_bar(NULL, &dev->bars[1], gcid, scom_base);
>  }
>  
>  static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
> diff --git a/hw/npu2.c b/hw/npu2.c
> index 79b87f196b3a..8176a81262f0 100644
> --- a/hw/npu2.c
> +++ b/hw/npu2.c
> @@ -106,84 +106,6 @@ static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
>  	return NULL;
>  }
>  
> -static inline void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
> -{
> -	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
> -}
> -
> -static void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
> -{
> -	uint64_t reg, val;
> -
> -	reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
> -	val = npu2_read(p, reg);
> -
> -	switch (NPU2_REG(bar->reg)) {
> -	case NPU2_PHY_BAR:
> -		bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
> -		bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
> -
> -		if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_2)
> -			/* This is the global MMIO BAR */
> -			bar->size = 0x1000000;
> -		else
> -			bar->size = 0x200000;
> -		break;
> -	case NPU2_NTL0_BAR:
> -	case NPU2_NTL1_BAR:
> -		bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
> -		bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
> -		bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
> -		break;
> -	case NPU2_GENID_BAR:
> -		bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
> -		bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
> -		bar->size = 0x20000;
> -		break;
> -	default:
> -		bar->base = 0ul;
> -		bar->enabled = false;
> -		bar->size = 0;
> -		break;
> -	}
> -}
> -
> -static void npu2_write_bar(struct npu2 *p,
> -			   struct npu2_bar *bar,
> -			   uint32_t gcid,
> -			   uint32_t scom)
> -{
> -	uint64_t reg, val;
> -	int block;
> -
> -	switch (NPU2_REG(bar->reg)) {
> -	case NPU2_PHY_BAR:
> -		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
> -		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
> -		break;
> -	case NPU2_NTL0_BAR:
> -	case NPU2_NTL1_BAR:
> -		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
> -		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
> -		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, 1);
> -		break;
> -	case NPU2_GENID_BAR:
> -		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
> -		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
> -		break;
> -	default:
> -		val = 0ul;
> -	}
> -
> -	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
> -		reg = NPU2_REG_OFFSET(0, block, bar->reg);
> -		if (p)
> -			npu2_write(p, reg, val);
> -		else
> -			npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
> -	}
> -}
> -
>  /* Trap for PCI command (0x4) to enable or disable device's BARs */
>  static int64_t npu2_cfg_write_cmd(void *dev,
>  				  struct pci_cfg_reg_filter *pcrf __unused,
> diff --git a/include/npu2.h b/include/npu2.h
> index fcaeb55f67dc..b2a3ead858d2 100644
> --- a/include/npu2.h
> +++ b/include/npu2.h
> @@ -204,6 +204,10 @@ void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
>  uint64_t npu2_read(struct npu2 *p, uint64_t reg);
>  void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
>  void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask);
> +void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar);
> +void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar);
> +void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
> +		    uint32_t scom);
>  int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
>  			   uint32_t offset, uint32_t len, uint32_t *data,
>  			   bool write);
>
Andrew Donnellan Jan. 31, 2019, 3:18 a.m. UTC | #2
On 17/1/19 3:55 pm, Alexey Kardashevskiy wrote:
> 
> 
> On 11/01/2019 12:09, Andrew Donnellan wrote:
>> Move npu2_{get,read,write}_bar() to common code. Get rid of the OpenCAPI
>> write_bar() function. Fix the OpenCAPI code to use the NVLink-style BAR
>> accessor functions and struct npu2_bar.
>>
>> While we're there, fix a trivial bug in npu2_read_bar() when reading PHY
>> BARs - the global MMIO BAR is stack 0, not stack 2. I don't think we ever
>> read that BAR, so this has never been a problem.
> 
> Out of curiosity I tried to locate this trivial bugfix and I could not
> ;) imho it deserves a separate patch.
> 
>>
>> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>> Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
>>
>> ---
>> v1->v2:
>> - remove dead phys_map_get() (Fred)
>> ---
>>   hw/npu2-common.c   |  77 ++++++++++++++++++++++++++++++++-
>>   hw/npu2-opencapi.c | 112 ++++++++++++++++------------------------------
>>   hw/npu2.c          |  78 +--------------------------------
>>   include/npu2.h     |   4 ++-
>>   4 files changed, 121 insertions(+), 150 deletions(-)
>>
>> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
>> index 6e6b12f0d1ae..3446acb45bea 100644
>> --- a/hw/npu2-common.c
>> +++ b/hw/npu2-common.c
>> @@ -22,6 +22,7 @@
>>   #include <bitutils.h>
>>   #include <nvram.h>
>>   #include <i2c.h>
>> +#include <phys-map.h>
>>   
>>   /*
>>    * We use the indirect method because it uses the same addresses as
>> @@ -97,6 +98,82 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
>>   			(uint64_t)new_val << 32);
>>   }
>>   
>> +void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
> 
> 
> I'd call it npu2_setup_bar and pass type/index/reg/enabled as well.

I see what you're getting at but with the changes in later patches I 
prefer to keep as is

> 
> 
>> +{
>> +	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
>> +}
>> +
>> +void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
>> +{
>> +	uint64_t reg, val;
>> +
>> +	reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
>> +	val = npu2_read(p, reg);
>> +
>> +	switch (NPU2_REG(bar->reg)) {
>> +	case NPU2_PHY_BAR:
>> +		bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
>> +		bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
>> +
>> +		if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_0)
>> +			/* This is the global MMIO BAR */
>> +			bar->size = 0x1000000;
>> +		else
>> +			bar->size = 0x200000;
>> +		break;
>> +	case NPU2_NTL0_BAR:
>> +	case NPU2_NTL1_BAR:
>> +		bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
>> +		bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
>> +		bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
>> +		break;
>> +	case NPU2_GENID_BAR:
>> +		bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
>> +		bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
>> +		bar->size = 0x20000;
>> +		break;
>> +	default:
>> +		bar->base = 0ul;
>> +		bar->enabled = false;
>> +		bar->size = 0;
>> +		break;
>> +	}
>> +}
>> +
>> +void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
>> +		    uint32_t scom)
>> +{
>> +	uint64_t reg, val;
>> +	int block;
>> +
>> +	switch (NPU2_REG(bar->reg)) {
>> +	case NPU2_PHY_BAR:
>> +		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
>> +		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
>> +		break;
>> +	case NPU2_NTL0_BAR:
>> +	case NPU2_NTL1_BAR:
>> +		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
>> +		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
>> +		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(bar->size >> 16));
>> +		break;
>> +	case NPU2_GENID_BAR:
>> +		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
>> +		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
>> +		break;
>> +	default:
>> +		val = 0ul;
>> +	}
>> +
>> +	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>> +		reg = NPU2_REG_OFFSET(0, block, bar->reg);
>> +		if (p)
>> +			npu2_write(p, reg, val);
>> +		else
>> +			npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
>> +	}
>> +}
>> +
>>   static bool _i2c_presence_detect(struct npu2_dev *dev)
>>   {
>>   	uint8_t state, data;
>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>> index c10cf9863dda..ffa4d706bbad 100644
>> --- a/hw/npu2-opencapi.c
>> +++ b/hw/npu2-opencapi.c
>> @@ -730,63 +730,29 @@ static void address_translation_config(uint32_t gcid, uint32_t scom_base,
>>   	}
>>   }
>>   
>> -/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar
>> - * wrapper for the PHY BARs yet */
>> -static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
>> -		      uint64_t addr, uint64_t size)
>> -{
>> -	uint64_t val;
>> -	int block;
>> -	switch (NPU2_REG(reg)) {
>> -	case NPU2_PHY_BAR:
>> -		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
>> -		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
>> -		break;
>> -	case NPU2_NTL0_BAR:
>> -	case NPU2_NTL1_BAR:
>> -		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
>> -		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
>> -		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
>> -		break;
>> -	case NPU2_GENID_BAR:
>> -		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
>> -		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
>> -		break;
>> -	default:
>> -		val = 0ul;
>> -	}
>> -
>> -	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>> -		npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg),
>> -				NPU2_MISC_DA_LEN_8B, val);
>> -		prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
>> -		      NPU2_REG_OFFSET(0, block, reg), val);
>> -	}
>> -}
>> -
>>   static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
>>   				  uint64_t reg[])
>>   {
>> -	uint64_t addr, size;
>> -
>> -	prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
>> -	phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
>> -	write_bar(gcid, scom_base,
>> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
>> -		addr, size);
>> -	prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
>> -	phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
>> -	write_bar(gcid, scom_base,
>> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
>> -		addr, size);
>> -
>> -	prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
>> -	phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
>> -	write_bar(gcid, scom_base,
>> -		  NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
>> -		addr, size);
>> -	reg[0] = addr;
>> -	reg[1] = size;
>> +	struct npu2_bar *bar;
>> +	struct npu2_bar phy_bars[] = {
>> +		{ .type = NPU_PHY, .index = 0,
>> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
>> +		  .enabled = true },
>> +		{ .type = NPU_PHY, .index = 1,
>> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
>> +		  .enabled = true },
>> +		{ .type = NPU_REGS, .index = 0,
>> +		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
>> +		  .enabled = true },
>> +	};
>> +
>> +	for (int i = 0; i < ARRAY_SIZE(phy_bars); i++) {
>> +		bar = &phy_bars[i];
>> +		npu2_get_bar(gcid, bar);
>> +		npu2_write_bar(NULL, bar, gcid, scom_base);
>> +	}
>> +	reg[0] = phy_bars[2].base;
>> +	reg[1] = phy_bars[2].size;
>>   }
>>   
>>   /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
>> @@ -799,19 +765,21 @@ static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
>>   	uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
>>   		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
>>   		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
>> -	uint64_t addr, size, reg;
>> +	uint64_t reg;
>>   
>>   	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
>> -	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
>> -
>> -	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
>> -	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
>> -		size);
>> -	dev->bars[0].base = addr;
>> -	dev->bars[0].size = size;
>> -
>> -	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
>> -	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
>> +	dev->bars[0].type = NPU_OCAPI_MMIO;
>> +	dev->bars[0].index = dev->brick_index;
>> +	dev->bars[0].reg = NPU2_REG_OFFSET(stack, 0, offset);
>> +	dev->bars[0].enabled = true;
> 
> 
> Here you add type/index/reg/enabled initialization, not move from
> elsewhere so there either was a bug with unitialized bar struct or now
> we have 2 places where such initialization occurs. The commit log does
> not mentioned this. I am confused.

Previously those fields weren't used at all, we only used base and size. 
Now that we're merging the BAR accessor functions, we need to populate 
those fields because we're using the shared functions npu2_get_bar() and 
npu2_write_bar(), rather than open-coding phys_map_get() and having an 
OpenCAPI-only function called write_bar() that took the reg directly as 
an argument.

This code all disappears in a later patch.

> 
> 
> 
>> +	npu2_get_bar(gcid, &dev->bars[0]);
>> +
>> +	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n",
>> +	      dev->bars[0].base, dev->bars[0].size);
>> +	npu2_write_bar(NULL, &dev->bars[0], gcid, scom_base);
>> +
>> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, dev->bars[0].base >> 16);
>> +	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(dev->bars[0].size >> 16));
>>   	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
>>   	npu2_scom_write(gcid, scom_base,
>>   			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
>> @@ -825,15 +793,15 @@ static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
>>   {
>>   	uint64_t stack = index_to_stack(dev->brick_index);
>>   	int stack_num = stack - NPU2_STACK_STCK_0;
>> -	uint64_t addr, size;
>>   
>>   	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
>> -	phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
>> -	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
>> -	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
>> -		addr, size);
>> -	dev->bars[1].base = addr;
>> -	dev->bars[1].size = size;
>> +	dev->bars[1].type = NPU_GENID;
>> +	dev->bars[1].index = stack_num;
>> +	dev->bars[1].reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
>> +	dev->bars[1].enabled = true;
> 
> 
> Same here.

Same as above
Alexey Kardashevskiy Feb. 1, 2019, 4:44 a.m. UTC | #3
On 31/01/2019 14:18, Andrew Donnellan wrote:
> 
> 
> On 17/1/19 3:55 pm, Alexey Kardashevskiy wrote:
>>
>>
>> On 11/01/2019 12:09, Andrew Donnellan wrote:
>>> Move npu2_{get,read,write}_bar() to common code. Get rid of the OpenCAPI
>>> write_bar() function. Fix the OpenCAPI code to use the NVLink-style BAR
>>> accessor functions and struct npu2_bar.
>>>
>>> While we're there, fix a trivial bug in npu2_read_bar() when reading PHY
>>> BARs - the global MMIO BAR is stack 0, not stack 2. I don't think we
>>> ever
>>> read that BAR, so this has never been a problem.
>>
>> Out of curiosity I tried to locate this trivial bugfix and I could not
>> ;) imho it deserves a separate patch.
>>
>>>
>>> Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
>>> Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com>
>>>
>>> ---
>>> v1->v2:
>>> - remove dead phys_map_get() (Fred)
>>> ---
>>>   hw/npu2-common.c   |  77 ++++++++++++++++++++++++++++++++-
>>>   hw/npu2-opencapi.c | 112
>>> ++++++++++++++++------------------------------
>>>   hw/npu2.c          |  78 +--------------------------------
>>>   include/npu2.h     |   4 ++-
>>>   4 files changed, 121 insertions(+), 150 deletions(-)
>>>
>>> diff --git a/hw/npu2-common.c b/hw/npu2-common.c
>>> index 6e6b12f0d1ae..3446acb45bea 100644
>>> --- a/hw/npu2-common.c
>>> +++ b/hw/npu2-common.c
>>> @@ -22,6 +22,7 @@
>>>   #include <bitutils.h>
>>>   #include <nvram.h>
>>>   #include <i2c.h>
>>> +#include <phys-map.h>
>>>     /*
>>>    * We use the indirect method because it uses the same addresses as
>>> @@ -97,6 +98,82 @@ void npu2_write_mask_4b(struct npu2 *p, uint64_t
>>> reg, uint32_t val, uint32_t mas
>>>               (uint64_t)new_val << 32);
>>>   }
>>>   +void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
>>
>>
>> I'd call it npu2_setup_bar and pass type/index/reg/enabled as well.
> 
> I see what you're getting at but with the changes in later patches I
> prefer to keep as is
>>
>>
>>> +{
>>> +    phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
>>> +}
>>> +
>>> +void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
>>> +{
>>> +    uint64_t reg, val;
>>> +
>>> +    reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
>>> +    val = npu2_read(p, reg);
>>> +
>>> +    switch (NPU2_REG(bar->reg)) {
>>> +    case NPU2_PHY_BAR:
>>> +        bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
>>> +        bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
>>> +
>>> +        if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_0)
>>> +            /* This is the global MMIO BAR */
>>> +            bar->size = 0x1000000;
>>> +        else
>>> +            bar->size = 0x200000;
>>> +        break;
>>> +    case NPU2_NTL0_BAR:
>>> +    case NPU2_NTL1_BAR:
>>> +        bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
>>> +        bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
>>> +        bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
>>> +        break;
>>> +    case NPU2_GENID_BAR:
>>> +        bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
>>> +        bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
>>> +        bar->size = 0x20000;
>>> +        break;
>>> +    default:
>>> +        bar->base = 0ul;
>>> +        bar->enabled = false;
>>> +        bar->size = 0;
>>> +        break;
>>> +    }
>>> +}
>>> +
>>> +void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t
>>> gcid,
>>> +            uint32_t scom)
>>> +{
>>> +    uint64_t reg, val;
>>> +    int block;
>>> +
>>> +    switch (NPU2_REG(bar->reg)) {
>>> +    case NPU2_PHY_BAR:
>>> +        val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
>>> +        val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
>>> +        break;
>>> +    case NPU2_NTL0_BAR:
>>> +    case NPU2_NTL1_BAR:
>>> +        val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
>>> +        val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
>>> +        val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(bar->size >> 16));
>>> +        break;
>>> +    case NPU2_GENID_BAR:
>>> +        val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
>>> +        val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
>>> +        break;
>>> +    default:
>>> +        val = 0ul;
>>> +    }
>>> +
>>> +    for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>>> +        reg = NPU2_REG_OFFSET(0, block, bar->reg);
>>> +        if (p)
>>> +            npu2_write(p, reg, val);
>>> +        else
>>> +            npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
>>> +    }
>>> +}
>>> +
>>>   static bool _i2c_presence_detect(struct npu2_dev *dev)
>>>   {
>>>       uint8_t state, data;
>>> diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
>>> index c10cf9863dda..ffa4d706bbad 100644
>>> --- a/hw/npu2-opencapi.c
>>> +++ b/hw/npu2-opencapi.c
>>> @@ -730,63 +730,29 @@ static void address_translation_config(uint32_t
>>> gcid, uint32_t scom_base,
>>>       }
>>>   }
>>>   -/* TODO: Merge this with NVLink implementation - we don't use the
>>> npu2_bar
>>> - * wrapper for the PHY BARs yet */
>>> -static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
>>> -              uint64_t addr, uint64_t size)
>>> -{
>>> -    uint64_t val;
>>> -    int block;
>>> -    switch (NPU2_REG(reg)) {
>>> -    case NPU2_PHY_BAR:
>>> -        val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
>>> -        val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
>>> -        break;
>>> -    case NPU2_NTL0_BAR:
>>> -    case NPU2_NTL1_BAR:
>>> -        val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
>>> -        val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
>>> -        val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
>>> -        break;
>>> -    case NPU2_GENID_BAR:
>>> -        val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
>>> -        val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
>>> -        break;
>>> -    default:
>>> -        val = 0ul;
>>> -    }
>>> -
>>> -    for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
>>> -        npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block,
>>> reg),
>>> -                NPU2_MISC_DA_LEN_8B, val);
>>> -        prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
>>> -              NPU2_REG_OFFSET(0, block, reg), val);
>>> -    }
>>> -}
>>> -
>>>   static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
>>>                     uint64_t reg[])
>>>   {
>>> -    uint64_t addr, size;
>>> -
>>> -    prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
>>> -    phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
>>> -    write_bar(gcid, scom_base,
>>> -          NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
>>> -        addr, size);
>>> -    prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
>>> -    phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
>>> -    write_bar(gcid, scom_base,
>>> -          NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
>>> -        addr, size);
>>> -
>>> -    prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
>>> -    phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
>>> -    write_bar(gcid, scom_base,
>>> -          NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
>>> -        addr, size);
>>> -    reg[0] = addr;
>>> -    reg[1] = size;
>>> +    struct npu2_bar *bar;
>>> +    struct npu2_bar phy_bars[] = {
>>> +        { .type = NPU_PHY, .index = 0,
>>> +          .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
>>> +          .enabled = true },
>>> +        { .type = NPU_PHY, .index = 1,
>>> +          .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
>>> +          .enabled = true },
>>> +        { .type = NPU_REGS, .index = 0,
>>> +          .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
>>> +          .enabled = true },
>>> +    };
>>> +
>>> +    for (int i = 0; i < ARRAY_SIZE(phy_bars); i++) {
>>> +        bar = &phy_bars[i];
>>> +        npu2_get_bar(gcid, bar);
>>> +        npu2_write_bar(NULL, bar, gcid, scom_base);
>>> +    }
>>> +    reg[0] = phy_bars[2].base;
>>> +    reg[1] = phy_bars[2].size;
>>>   }
>>>     /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
>>> @@ -799,19 +765,21 @@ static void setup_afu_mmio_bars(uint32_t gcid,
>>> uint32_t scom_base,
>>>       uint64_t pa_offset = index_to_block(dev->brick_index) ==
>>> NPU2_BLOCK_OTL0 ?
>>>           NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
>>>           NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
>>> -    uint64_t addr, size, reg;
>>> +    uint64_t reg;
>>>         prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
>>> -    phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
>>> -
>>> -    prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n",
>>> addr, size);
>>> -    write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
>>> -        size);
>>> -    dev->bars[0].base = addr;
>>> -    dev->bars[0].size = size;
>>> -
>>> -    reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
>>> -    reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >>
>>> 16));
>>> +    dev->bars[0].type = NPU_OCAPI_MMIO;
>>> +    dev->bars[0].index = dev->brick_index;
>>> +    dev->bars[0].reg = NPU2_REG_OFFSET(stack, 0, offset);
>>> +    dev->bars[0].enabled = true;
>>
>>
>> Here you add type/index/reg/enabled initialization, not move from
>> elsewhere so there either was a bug with unitialized bar struct or now
>> we have 2 places where such initialization occurs. The commit log does
>> not mentioned this. I am confused.
> 
> Previously those fields weren't used at all, we only used base and size.
> Now that we're merging the BAR accessor functions, we need to populate
> those fields because we're using the shared functions npu2_get_bar() and
> npu2_write_bar(), rather than open-coding phys_map_get() and having an
> OpenCAPI-only function called write_bar() that took the reg directly as
> an argument.
> 
> This code all disappears in a later patch.


This tells us that either this later patch should be squashed into this
one or it should move before this one.
Andrew Donnellan Feb. 1, 2019, 4:57 a.m. UTC | #4
On 1/2/19 3:44 pm, Alexey Kardashevskiy wrote:
>>> Here you add type/index/reg/enabled initialization, not move from
>>> elsewhere so there either was a bug with unitialized bar struct or now
>>> we have 2 places where such initialization occurs. The commit log does
>>> not mentioned this. I am confused.
>>
>> Previously those fields weren't used at all, we only used base and size.
>> Now that we're merging the BAR accessor functions, we need to populate
>> those fields because we're using the shared functions npu2_get_bar() and
>> npu2_write_bar(), rather than open-coding phys_map_get() and having an
>> OpenCAPI-only function called write_bar() that took the reg directly as
>> an argument.
>>
>> This code all disappears in a later patch.
> 
> 
> This tells us that either this later patch should be squashed into this
> one or it should move before this one.

Hang on, I thought patch #4 was the one that was too complex and needed 
to be split out more :)
diff mbox series

Patch

diff --git a/hw/npu2-common.c b/hw/npu2-common.c
index 6e6b12f0d1ae..3446acb45bea 100644
--- a/hw/npu2-common.c
+++ b/hw/npu2-common.c
@@ -22,6 +22,7 @@ 
 #include <bitutils.h>
 #include <nvram.h>
 #include <i2c.h>
+#include <phys-map.h>
 
 /*
  * We use the indirect method because it uses the same addresses as
@@ -97,6 +98,82 @@  void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mas
 			(uint64_t)new_val << 32);
 }
 
+void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
+{
+	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
+}
+
+void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
+{
+	uint64_t reg, val;
+
+	reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
+	val = npu2_read(p, reg);
+
+	switch (NPU2_REG(bar->reg)) {
+	case NPU2_PHY_BAR:
+		bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
+		bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
+
+		if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_0)
+			/* This is the global MMIO BAR */
+			bar->size = 0x1000000;
+		else
+			bar->size = 0x200000;
+		break;
+	case NPU2_NTL0_BAR:
+	case NPU2_NTL1_BAR:
+		bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
+		bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
+		bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
+		break;
+	case NPU2_GENID_BAR:
+		bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
+		bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
+		bar->size = 0x20000;
+		break;
+	default:
+		bar->base = 0ul;
+		bar->enabled = false;
+		bar->size = 0;
+		break;
+	}
+}
+
+void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
+		    uint32_t scom)
+{
+	uint64_t reg, val;
+	int block;
+
+	switch (NPU2_REG(bar->reg)) {
+	case NPU2_PHY_BAR:
+		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
+		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
+		break;
+	case NPU2_NTL0_BAR:
+	case NPU2_NTL1_BAR:
+		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
+		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
+		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(bar->size >> 16));
+		break;
+	case NPU2_GENID_BAR:
+		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
+		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
+		break;
+	default:
+		val = 0ul;
+	}
+
+	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
+		reg = NPU2_REG_OFFSET(0, block, bar->reg);
+		if (p)
+			npu2_write(p, reg, val);
+		else
+			npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
+	}
+}
+
 static bool _i2c_presence_detect(struct npu2_dev *dev)
 {
 	uint8_t state, data;
diff --git a/hw/npu2-opencapi.c b/hw/npu2-opencapi.c
index c10cf9863dda..ffa4d706bbad 100644
--- a/hw/npu2-opencapi.c
+++ b/hw/npu2-opencapi.c
@@ -730,63 +730,29 @@  static void address_translation_config(uint32_t gcid, uint32_t scom_base,
 	}
 }
 
-/* TODO: Merge this with NVLink implementation - we don't use the npu2_bar
- * wrapper for the PHY BARs yet */
-static void write_bar(uint32_t gcid, uint32_t scom_base, uint64_t reg,
-		      uint64_t addr, uint64_t size)
-{
-	uint64_t val;
-	int block;
-	switch (NPU2_REG(reg)) {
-	case NPU2_PHY_BAR:
-		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, addr >> 21);
-		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, 1);
-		break;
-	case NPU2_NTL0_BAR:
-	case NPU2_NTL1_BAR:
-		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, addr >> 16);
-		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, ilog2(size >> 16));
-		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, 1);
-		break;
-	case NPU2_GENID_BAR:
-		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, addr >> 16);
-		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, 1);
-		break;
-	default:
-		val = 0ul;
-	}
-
-	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
-		npu2_scom_write(gcid, scom_base, NPU2_REG_OFFSET(0, block, reg),
-				NPU2_MISC_DA_LEN_8B, val);
-		prlog(PR_DEBUG, "OCAPI: Setting BAR %llx to %llx\n",
-		      NPU2_REG_OFFSET(0, block, reg), val);
-	}
-}
-
 static void setup_global_mmio_bar(uint32_t gcid, uint32_t scom_base,
 				  uint64_t reg[])
 {
-	uint64_t addr, size;
-
-	prlog(PR_DEBUG, "OCAPI: patching up PHY0 bar, %s\n", __func__);
-	phys_map_get(gcid, NPU_PHY, 0, &addr, &size);
-	write_bar(gcid, scom_base,
-		  NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
-		addr, size);
-	prlog(PR_DEBUG, "OCAPI: patching up PHY1 bar, %s\n", __func__);
-	phys_map_get(gcid, NPU_PHY, 1, &addr, &size);
-	write_bar(gcid, scom_base,
-		  NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
-		addr, size);
-
-	prlog(PR_DEBUG, "OCAPI: setup global mmio, %s\n", __func__);
-	phys_map_get(gcid, NPU_REGS, 0, &addr, &size);
-	write_bar(gcid, scom_base,
-		  NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
-		addr, size);
-	reg[0] = addr;
-	reg[1] = size;
+	struct npu2_bar *bar;
+	struct npu2_bar phy_bars[] = {
+		{ .type = NPU_PHY, .index = 0,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+		{ .type = NPU_PHY, .index = 1,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+		{ .type = NPU_REGS, .index = 0,
+		  .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
+		  .enabled = true },
+	};
+
+	for (int i = 0; i < ARRAY_SIZE(phy_bars); i++) {
+		bar = &phy_bars[i];
+		npu2_get_bar(gcid, bar);
+		npu2_write_bar(NULL, bar, gcid, scom_base);
+	}
+	reg[0] = phy_bars[2].base;
+	reg[1] = phy_bars[2].size;
 }
 
 /* Procedure 13.1.3.8 - AFU MMIO Range BARs */
@@ -799,19 +765,21 @@  static void setup_afu_mmio_bars(uint32_t gcid, uint32_t scom_base,
 	uint64_t pa_offset = index_to_block(dev->brick_index) == NPU2_BLOCK_OTL0 ?
 		NPU2_CQ_CTL_MISC_MMIOPA0_CONFIG :
 		NPU2_CQ_CTL_MISC_MMIOPA1_CONFIG;
-	uint64_t addr, size, reg;
+	uint64_t reg;
 
 	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU MMIO BARs\n", __func__);
-	phys_map_get(gcid, NPU_OCAPI_MMIO, dev->brick_index, &addr, &size);
-
-	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n", addr, size);
-	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, offset), addr,
-		size);
-	dev->bars[0].base = addr;
-	dev->bars[0].size = size;
-
-	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, addr >> 16);
-	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(size >> 16));
+	dev->bars[0].type = NPU_OCAPI_MMIO;
+	dev->bars[0].index = dev->brick_index;
+	dev->bars[0].reg = NPU2_REG_OFFSET(stack, 0, offset);
+	dev->bars[0].enabled = true;
+	npu2_get_bar(gcid, &dev->bars[0]);
+
+	prlog(PR_DEBUG, "OCAPI: AFU MMIO set to %llx, size %llx\n",
+	      dev->bars[0].base, dev->bars[0].size);
+	npu2_write_bar(NULL, &dev->bars[0], gcid, scom_base);
+
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_ADDR, 0ull, dev->bars[0].base >> 16);
+	reg = SETFIELD(NPU2_CQ_CTL_MISC_MMIOPA_SIZE, reg, ilog2(dev->bars[0].size >> 16));
 	prlog(PR_DEBUG, "OCAPI: PA translation %llx\n", reg);
 	npu2_scom_write(gcid, scom_base,
 			NPU2_REG_OFFSET(stack, NPU2_BLOCK_CTL,
@@ -825,15 +793,15 @@  static void setup_afu_config_bars(uint32_t gcid, uint32_t scom_base,
 {
 	uint64_t stack = index_to_stack(dev->brick_index);
 	int stack_num = stack - NPU2_STACK_STCK_0;
-	uint64_t addr, size;
 
 	prlog(PR_DEBUG, "OCAPI: %s: Setup AFU Config BARs\n", __func__);
-	phys_map_get(gcid, NPU_GENID, stack_num, &addr, &size);
-	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", addr);
-	write_bar(gcid, scom_base, NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR),
-		addr, size);
-	dev->bars[1].base = addr;
-	dev->bars[1].size = size;
+	dev->bars[1].type = NPU_GENID;
+	dev->bars[1].index = stack_num;
+	dev->bars[1].reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
+	dev->bars[1].enabled = true;
+	npu2_get_bar(gcid, &dev->bars[1]);
+	prlog(PR_DEBUG, "OCAPI: Assigning GENID BAR: %016llx\n", dev->bars[1].base);
+	npu2_write_bar(NULL, &dev->bars[1], gcid, scom_base);
 }
 
 static void otl_enabletx(uint32_t gcid, uint32_t scom_base,
diff --git a/hw/npu2.c b/hw/npu2.c
index 79b87f196b3a..8176a81262f0 100644
--- a/hw/npu2.c
+++ b/hw/npu2.c
@@ -106,84 +106,6 @@  static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
 	return NULL;
 }
 
-static inline void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
-{
-	phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
-}
-
-static void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
-{
-	uint64_t reg, val;
-
-	reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
-	val = npu2_read(p, reg);
-
-	switch (NPU2_REG(bar->reg)) {
-	case NPU2_PHY_BAR:
-		bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
-		bar->enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
-
-		if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_2)
-			/* This is the global MMIO BAR */
-			bar->size = 0x1000000;
-		else
-			bar->size = 0x200000;
-		break;
-	case NPU2_NTL0_BAR:
-	case NPU2_NTL1_BAR:
-		bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
-		bar->enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
-		bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
-		break;
-	case NPU2_GENID_BAR:
-		bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
-		bar->enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
-		bar->size = 0x20000;
-		break;
-	default:
-		bar->base = 0ul;
-		bar->enabled = false;
-		bar->size = 0;
-		break;
-	}
-}
-
-static void npu2_write_bar(struct npu2 *p,
-			   struct npu2_bar *bar,
-			   uint32_t gcid,
-			   uint32_t scom)
-{
-	uint64_t reg, val;
-	int block;
-
-	switch (NPU2_REG(bar->reg)) {
-	case NPU2_PHY_BAR:
-		val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
-		val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, bar->enabled);
-		break;
-	case NPU2_NTL0_BAR:
-	case NPU2_NTL1_BAR:
-		val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
-		val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, bar->enabled);
-		val = SETFIELD(NPU2_NTL_BAR_SIZE, val, 1);
-		break;
-	case NPU2_GENID_BAR:
-		val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
-		val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, bar->enabled);
-		break;
-	default:
-		val = 0ul;
-	}
-
-	for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
-		reg = NPU2_REG_OFFSET(0, block, bar->reg);
-		if (p)
-			npu2_write(p, reg, val);
-		else
-			npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
-	}
-}
-
 /* Trap for PCI command (0x4) to enable or disable device's BARs */
 static int64_t npu2_cfg_write_cmd(void *dev,
 				  struct pci_cfg_reg_filter *pcrf __unused,
diff --git a/include/npu2.h b/include/npu2.h
index fcaeb55f67dc..b2a3ead858d2 100644
--- a/include/npu2.h
+++ b/include/npu2.h
@@ -204,6 +204,10 @@  void npu2_write(struct npu2 *p, uint64_t reg, uint64_t val);
 uint64_t npu2_read(struct npu2 *p, uint64_t reg);
 void npu2_write_mask(struct npu2 *p, uint64_t reg, uint64_t val, uint64_t mask);
 void npu2_write_mask_4b(struct npu2 *p, uint64_t reg, uint32_t val, uint32_t mask);
+void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar);
+void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar);
+void npu2_write_bar(struct npu2 *p, struct npu2_bar *bar, uint32_t gcid,
+		    uint32_t scom);
 int64_t npu2_dev_procedure(void *dev, struct pci_cfg_reg_filter *pcrf,
 			   uint32_t offset, uint32_t len, uint32_t *data,
 			   bool write);