diff mbox

[v12,09/10] skiboot: Add core IMC related counter configuration

Message ID 1495379407-6658-10-git-send-email-maddy@linux.vnet.ibm.com
State Superseded
Headers show

Commit Message

maddy May 21, 2017, 3:10 p.m. UTC
From: Anju T Sudhakar <anju@linux.vnet.ibm.com>

Add support to start, stop and initialize the core IMC counters.
To initialize the core IMC counters, it takes a physical address per
core as an input and writes that address to PDBAR[14:50] bits.
It initializes the htm_mode and event_mask, where it selects the time
interval at which the counter values must be posted to the given memory
location and enables the counters to start running by setting the
appropriate bits.

To disable the core IMC counters (only stop counting), writes into
appropriate bits of htm_mode to disable the counters.
To enable the core IMC counters (only resume counting), writes into
appropriate bits of the htm_mode to enable the counters.

Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
 hw/imc.c           | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 include/imc.h      |  10 ++++
 include/opal-api.h |   1 +
 3 files changed, 143 insertions(+), 6 deletions(-)

Comments

Michael Neuling June 14, 2017, 4:56 a.m. UTC | #1
On Sun, 2017-05-21 at 20:40 +0530, Madhavan Srinivasan wrote:
> > From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
> 
> Add support to start, stop and initialize the core IMC counters.
> To initialize the core IMC counters, it takes a physical address per
> core as an input and writes that address to PDBAR[14:50] bits.
> It initializes the htm_mode and event_mask, where it selects the time

Please spell out an acronym the first time you use it...

htm ?  hardware transactional memory?  hardware trace macro?

> interval at which the counter values must be posted to the given memory
> location and enables the counters to start running by setting the
> appropriate bits.
> 
> To disable the core IMC counters (only stop counting), writes into
> appropriate bits of htm_mode to disable the counters.
> To enable the core IMC counters (only resume counting), writes into
> appropriate bits of the htm_mode to enable the counters.
> 
> > Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
> > Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
> > Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> ---
>  hw/imc.c           | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>  include/imc.h      |  10 ++++
>  include/opal-api.h |   1 +
>  3 files changed, 143 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/imc.c b/hw/imc.c
> index d56b3eb251b2..923a8fe48b5f 100644
> --- a/hw/imc.c
> +++ b/hw/imc.c
> @@ -83,6 +83,26 @@ size_t imc_catalog_size;
>  const char **imc_prop_to_fix(struct dt_node *node);
>  const char *prop_to_fix[] = {"events", NULL};
>  
> +
> +/*
> + * A Quad contains 4 cores in Power 9, and there are 4 addresses for
> + * the CHTM attached to each core.

CHTM?

> + * So, for core index 0 to core index 3, we have a sequential range of
> + * SCOM port addresses in the arrays below, each for PDBAR and HTM mode.

PDBAR?

> + */
> +unsigned int pdbar_scom_index[] = {
> > +	0x1001220B,
> > +	0x1001230B,
> > +	0x1001260B,
> > +	0x1001270B
> +};
> +unsigned int htm_scom_index[] = {
> > +	0x10012200,
> > +	0x10012300,
> > +	0x10012600,
> > +	0x10012700
> +};
> +
>  static struct imc_chip_cb *get_imc_cb(void)
>  {
> >  	uint64_t cb_loc;
> @@ -341,19 +361,83 @@ err:
>   * opal_imc_counters_init : This call initialize the IMC engine.
>   *
>   * For Nest IMC, this is no-op and returns OPAL_SUCCESS at this point.
> + * For Core IMC, this initializes core IMC Engine, by initializing
> + * these scoms "PDBAR", "HTM_MODE" and the "EVENT_MASK" in a given cpu.
>   */
> -static int64_t opal_imc_counters_init(uint32_t type)
> +static int64_t opal_imc_counters_init(uint32_t type, uint64_t addr, uint64_t cpu_pir)
>  {
> > -	return OPAL_SUCCESS;
> > +	struct cpu_thread *c = find_cpu_by_pir(cpu_pir);
> > +	struct proc_chip *chip;
> +	int core_id, phys_core_id, ret = OPAL_SUCCESS;

Again, ret is confusing here.  Below you set ret, then goto hw_err that does
return OPAL_HARDWARE;

> +
> > +	switch (type) {
> > +	case OPAL_IMC_COUNTERS_NEST:
> > +		break;
> > +	case OPAL_IMC_COUNTERS_CORE:
> > +		assert(c);
> > +		chip = get_chip(c->chip_id);
> > +		assert(chip);
> > +		phys_core_id = cpu_get_core_index(c);
> +		core_id = phys_core_id % 4;

core_id or thread_id?  why %4?


> +
> > +		/*
> > +		 * Core IMC hardware mandate initing of three scoms
> > +		 * to enbale or disable of the Core IMC engine.
> > +		 *
> > +		 * PDBAR: Scom contains the real address to store per-core
> > +		 *        counter data in memory along with other bits.
> > +		 *
> > +		 * EventMask: Scom contain bits to denote event to multiplex
> > +		 *            at different MSR[HV PR] values, along with bits for
> > +		 *            sampling duration.
> > +		 *
> > +		 * HTM Scom: scom to enable counter data movement to memory.
> > +		 */
> > +		ret = xscom_write(chip->id,
> > +				XSCOM_ADDR_P9_EP(phys_core_id,
> > +						pdbar_scom_index[core_id]),
> > +				(u64)(CORE_IMC_PDBAR_MASK & addr));
> > +		if (ret < 0) {
> > +			prerror("IMC: error in xscom_write for pdbar\n");
> > +			goto hw_err;
> > +		}
> +
> > +		ret = xscom_write(chip->id,
> > +				XSCOM_ADDR_P9_EC(phys_core_id,
> > +					 CORE_IMC_EVENT_MASK_ADDR),
> > +				(u64)CORE_IMC_EVENT_MASK);
> > +		if (ret < 0) {
> > +			prerror("IMC: error in xscom_write for event mask\n");
> > +			goto hw_err;
> > +		}
> +
> > +		ret = xscom_write(chip->id,
> > +				XSCOM_ADDR_P9_EP(phys_core_id,
> > +						htm_scom_index[core_id]),
> > +				(u64)CORE_IMC_HTM_MODE_DISABLE);
> > +		if (ret < 0) {
> > +			prerror("IMC: error in xscom_write for htm mode\n");
> > +			goto hw_err;
> > +		}
> > +		break;
> > +	default:
> > +		prerror("IMC: Unknown Domain int _INIT\n");
> > +		return OPAL_PARAMETER;
> > +	}
> +
> > +	return ret;
> +hw_err:
> > +	return OPAL_HARDWARE;
>  }
> -opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 1);
> +opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 3);
>  
> -/* opal_imc_counters_control_start: This call starts the nest imc engine. */
> +/* opal_imc_counters_control_start: This call starts the nest/core imc engine. */
>  static int64_t opal_imc_counters_start(uint32_t type)
>  {
> >  	u64 op, status;
> >  	struct imc_chip_cb *cb;
> > -	int ret = OPAL_SUCCESS;
> > +	struct proc_chip *chip;
> +	int core_id, phys_core_id, ret = OPAL_SUCCESS;

again, ret is confusing.

>  
> >  	switch (type) {
> >  	case OPAL_IMC_COUNTERS_NEST:
> @@ -372,6 +456,28 @@ static int64_t opal_imc_counters_start(uint32_t type)
> >  		cb->imc_chip_command = op;
>  
> >  		break;
> > +	case OPAL_IMC_COUNTERS_CORE:
> > +		/*
> > +		* Enables the core imc engine by appropriately setting
> > +		* bits 4-9 of the HTM_MODE scom port. No initialization
> > +		* is done in this call. This just enables the the counters
> > +		* to count with the previous initialization.
> > +		*/
> > +		chip = get_chip(this_cpu()->chip_id);
> > +		phys_core_id = cpu_get_core_index(this_cpu());
> > +		core_id = phys_core_id % 4;
> +
> > +		ret = xscom_write(chip->id,
> > +				XSCOM_ADDR_P9_EP(phys_core_id,
> > +						htm_scom_index[core_id]),
> > +				(u64) CORE_IMC_HTM_MODE_ENABLE);
> +
> > +		if (ret < 0) {
> > +			prerror("IMC: error in xscom_write for htm_mode\n");
> > +			return OPAL_HARDWARE;
> > +		}
> +
> > +		break;
> >  	default:
> >  		prerror("IMC: Unknown Domain in _START\n");
> >  		return OPAL_PARAMETER;
> @@ -386,7 +492,8 @@ static int64_t opal_imc_counters_stop(uint32_t type)
>  {
> >  	u64 op, status;
> >  	struct imc_chip_cb *cb;
> > -	int ret = OPAL_SUCCESS;
> > +	struct proc_chip *chip;
> > +	int core_id, phys_core_id, ret = OPAL_SUCCESS;
>  
> >  	switch (type) {
> >  	case OPAL_IMC_COUNTERS_NEST:
> @@ -405,6 +512,25 @@ static int64_t opal_imc_counters_stop(uint32_t type)
> >  		cb->imc_chip_command = op;
>  
> >  		break;
> > +	case OPAL_IMC_COUNTERS_CORE:
> > +		/*
> > +		* Disables the core imc engine by clearing
> > +		* bits 4-9 of the HTM_MODE scom port.
> > +		*/
> > +		chip = get_chip(this_cpu()->chip_id);
> > +		phys_core_id = cpu_get_core_index(this_cpu());
> +		core_id = phys_core_id % 4;

Again, core_id?  %4?  

> +
> > +		ret = xscom_write(chip->id,
> > +				XSCOM_ADDR_P9_EP(phys_core_id,
> > +						htm_scom_index[core_id]),
> > +				(u64) CORE_IMC_HTM_MODE_DISABLE);
> > +		if (ret < 0) {
> > +			prerror("IMC: error in xscom_write for htm_mode\n");
> > +			return OPAL_HARDWARE;
> > +		}
> +
> > +		break;
> >  	default:
>  		prerror("IMC: Unknown Domain in _STOP\n");

unknown domain?  

>  		return OPAL_PARAMETER;
> diff --git a/include/imc.h b/include/imc.h
> index c7ad5fa933b7..5a3d53c22ca1 100644
> --- a/include/imc.h
> +++ b/include/imc.h
> @@ -116,6 +116,16 @@ struct imc_chip_cb
>  
> >  #define MAX_NEST_UNITS			48
>  
> +/*
> + * Core IMC SCOMs
> + */
> > +#define CORE_IMC_EVENT_MASK_ADDR	0x20010AA8ull
> > +#define CORE_IMC_EVENT_MASK		0x0001020000000000ull
> > +#define CORE_IMC_PDBAR_MASK		0x0003ffffffffe000ull
> > +#define CORE_IMC_NCU_MODE		0x0800000000000000ull
> > +#define CORE_IMC_HTM_MODE_ENABLE	0xE800000000000000ull
> > +#define CORE_IMC_HTM_MODE_DISABLE	0xE000000000000000ull
> +
>  void imc_init(void);
>  void imc_catalog_preload(void);
>  #endif /* __IMC_H */
> diff --git a/include/opal-api.h b/include/opal-api.h
> index 02927356d033..5dbae2d57fde 100644
> --- a/include/opal-api.h
> +++ b/include/opal-api.h
> @@ -1232,6 +1232,7 @@ enum {
>  /* Operation argument to IMC Microcode */
>  enum {
> >  	OPAL_IMC_COUNTERS_NEST = 1,
> +	OPAL_IMC_COUNTERS_CORE = 2,

How do we advertise to linux that it can do CORE vs NEST calls?

Say we need to remove one, how does linux know which one it can call without
trying and getting OPAL_PARAMETER?

If it's based on the device tree, then the patch series the wrong way around
since the earlier patches start adding the device tree before the calls are
added.  So if you apply just patches 1-7, Linux is going to blow up with a bunch
of OPAL_PARAMETER since it'll start seeing the device tree before the OPAL calls
have been added.

Not a big issues for this series, but it does make me wonder.  If someone
updates the device tree in the PNOR to start adding a new section type for IMC,
how do we tell linux that even though the device tree supports it, OPAL itself
doesn't?  

Say we have PHB counters in the future (just made up).  We add them to the IMA
catalog and linux, but not skiboot.  Linux is going to OPAL_PARAMETER and blow
up.  Linux needs to check if the device tree and skiboot have the capability. 
Not just the device tree.

Mikey
Michael Neuling June 14, 2017, 5:16 a.m. UTC | #2
> > diff --git a/include/opal-api.h b/include/opal-api.h
> > index 02927356d033..5dbae2d57fde 100644
> > --- a/include/opal-api.h
> > +++ b/include/opal-api.h
> > @@ -1232,6 +1232,7 @@ enum {
> >  /* Operation argument to IMC Microcode */
> >  enum {
> > >  	OPAL_IMC_COUNTERS_NEST = 1,
> > 
> > +	OPAL_IMC_COUNTERS_CORE = 2,
> 
> How do we advertise to linux that it can do CORE vs NEST calls?
> 
> Say we need to remove one, how does linux know which one it can call without
> trying and getting OPAL_PARAMETER?
> 
> If it's based on the device tree, then the patch series the wrong way around
> since the earlier patches start adding the device tree before the calls are
> added.  So if you apply just patches 1-7, Linux is going to blow up with a
> bunch
> of OPAL_PARAMETER since it'll start seeing the device tree before the OPAL
> calls
> have been added.
> 
> Not a big issues for this series, but it does make me wonder.  If someone
> updates the device tree in the PNOR to start adding a new section type for
> IMC,
> how do we tell linux that even though the device tree supports it, OPAL itself
> doesn't?  
> 
> Say we have PHB counters in the future (just made up).  We add them to the IMA
> catalog and linux, but not skiboot.  Linux is going to OPAL_PARAMETER and blow
> up.  Linux needs to check if the device tree and skiboot have the capability. 
> Not just the device tree.

Talking to benh offline, he suggested skiboot should parse the devicetree
imported from the PNOR and strip out things it doesn't understand.  

That way Linux won't try to touch things it doesn't understand.

Mikey
maddy June 14, 2017, 3:55 p.m. UTC | #3
On Wednesday 14 June 2017 10:26 AM, Michael Neuling wrote:
> On Sun, 2017-05-21 at 20:40 +0530, Madhavan Srinivasan wrote:
>>> From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
>> Add support to start, stop and initialize the core IMC counters.
>> To initialize the core IMC counters, it takes a physical address per
>> core as an input and writes that address to PDBAR[14:50] bits.
>> It initializes the htm_mode and event_mask, where it selects the time
> Please spell out an acronym the first time you use it...
>
> htm ?  hardware transactional memory?  hardware trace macro?

Yes will do. And it is hardware trace macro.
>
>> interval at which the counter values must be posted to the given memory
>> location and enables the counters to start running by setting the
>> appropriate bits.
>>
>> To disable the core IMC counters (only stop counting), writes into
>> appropriate bits of htm_mode to disable the counters.
>> To enable the core IMC counters (only resume counting), writes into
>> appropriate bits of the htm_mode to enable the counters.
>>
>>> Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
>>> Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
>>> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
>> ---
>>   hw/imc.c           | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++---
>>   include/imc.h      |  10 ++++
>>   include/opal-api.h |   1 +
>>   3 files changed, 143 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/imc.c b/hw/imc.c
>> index d56b3eb251b2..923a8fe48b5f 100644
>> --- a/hw/imc.c
>> +++ b/hw/imc.c
>> @@ -83,6 +83,26 @@ size_t imc_catalog_size;
>>   const char **imc_prop_to_fix(struct dt_node *node);
>>   const char *prop_to_fix[] = {"events", NULL};
>>   
>> +
>> +/*
>> + * A Quad contains 4 cores in Power 9, and there are 4 addresses for
>> + * the CHTM attached to each core.
> CHTM?

Yes will update the command.
>
>> + * So, for core index 0 to core index 3, we have a sequential range of
>> + * SCOM port addresses in the arrays below, each for PDBAR and HTM mode.
> PDBAR?
>
>> + */
>> +unsigned int pdbar_scom_index[] = {
>>> +	0x1001220B,
>>> +	0x1001230B,
>>> +	0x1001260B,
>>> +	0x1001270B
>> +};
>> +unsigned int htm_scom_index[] = {
>>> +	0x10012200,
>>> +	0x10012300,
>>> +	0x10012600,
>>> +	0x10012700
>> +};
>> +
>>   static struct imc_chip_cb *get_imc_cb(void)
>>   {
>>>   	uint64_t cb_loc;
>> @@ -341,19 +361,83 @@ err:
>>    * opal_imc_counters_init : This call initialize the IMC engine.
>>    *
>>    * For Nest IMC, this is no-op and returns OPAL_SUCCESS at this point.
>> + * For Core IMC, this initializes core IMC Engine, by initializing
>> + * these scoms "PDBAR", "HTM_MODE" and the "EVENT_MASK" in a given cpu.
>>    */
>> -static int64_t opal_imc_counters_init(uint32_t type)
>> +static int64_t opal_imc_counters_init(uint32_t type, uint64_t addr, uint64_t cpu_pir)
>>   {
>>> -	return OPAL_SUCCESS;
>>> +	struct cpu_thread *c = find_cpu_by_pir(cpu_pir);
>>> +	struct proc_chip *chip;
>> +	int core_id, phys_core_id, ret = OPAL_SUCCESS;
> Again, ret is confusing here.  Below you set ret, then goto hw_err that does
> return OPAL_HARDWARE;

Ok will fix it.

>
>> +
>>> +	switch (type) {
>>> +	case OPAL_IMC_COUNTERS_NEST:
>>> +		break;
>>> +	case OPAL_IMC_COUNTERS_CORE:
>>> +		assert(c);
>>> +		chip = get_chip(c->chip_id);
>>> +		assert(chip);
>>> +		phys_core_id = cpu_get_core_index(c);
>> +		core_id = phys_core_id % 4;
> core_id or thread_id?  why %4?

These are more of hw procedure in setting up the core imc logic.
We use the core id to select the pdbar scom port to program the
address to.

+unsigned int pdbar_scom_index[] = {

> +	0x1001220B,
> +	0x1001230B,
> +	0x1001260B,
> +	0x1001270B

And we need to setup all. I guess i shoud fix the "core_id" to
pdbar_scom_idx to avoid any confusion.

>
>
>> +
>>> +		/*
>>> +		 * Core IMC hardware mandate initing of three scoms
>>> +		 * to enbale or disable of the Core IMC engine.
>>> +		 *
>>> +		 * PDBAR: Scom contains the real address to store per-core
>>> +		 *        counter data in memory along with other bits.
>>> +		 *
>>> +		 * EventMask: Scom contain bits to denote event to multiplex
>>> +		 *            at different MSR[HV PR] values, along with bits for
>>> +		 *            sampling duration.
>>> +		 *
>>> +		 * HTM Scom: scom to enable counter data movement to memory.
>>> +		 */
>>> +		ret = xscom_write(chip->id,
>>> +				XSCOM_ADDR_P9_EP(phys_core_id,
>>> +						pdbar_scom_index[core_id]),
>>> +				(u64)(CORE_IMC_PDBAR_MASK & addr));
>>> +		if (ret < 0) {
>>> +			prerror("IMC: error in xscom_write for pdbar\n");
>>> +			goto hw_err;
>>> +		}
>> +
>>> +		ret = xscom_write(chip->id,
>>> +				XSCOM_ADDR_P9_EC(phys_core_id,
>>> +					 CORE_IMC_EVENT_MASK_ADDR),
>>> +				(u64)CORE_IMC_EVENT_MASK);
>>> +		if (ret < 0) {
>>> +			prerror("IMC: error in xscom_write for event mask\n");
>>> +			goto hw_err;
>>> +		}
>> +
>>> +		ret = xscom_write(chip->id,
>>> +				XSCOM_ADDR_P9_EP(phys_core_id,
>>> +						htm_scom_index[core_id]),
>>> +				(u64)CORE_IMC_HTM_MODE_DISABLE);
>>> +		if (ret < 0) {
>>> +			prerror("IMC: error in xscom_write for htm mode\n");
>>> +			goto hw_err;
>>> +		}
>>> +		break;
>>> +	default:
>>> +		prerror("IMC: Unknown Domain int _INIT\n");
>>> +		return OPAL_PARAMETER;
>>> +	}
>> +
>>> +	return ret;
>> +hw_err:
>>> +	return OPAL_HARDWARE;
>>   }
>> -opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 1);
>> +opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 3);
>>   
>> -/* opal_imc_counters_control_start: This call starts the nest imc engine. */
>> +/* opal_imc_counters_control_start: This call starts the nest/core imc engine. */
>>   static int64_t opal_imc_counters_start(uint32_t type)
>>   {
>>>   	u64 op, status;
>>>   	struct imc_chip_cb *cb;
>>> -	int ret = OPAL_SUCCESS;
>>> +	struct proc_chip *chip;
>> +	int core_id, phys_core_id, ret = OPAL_SUCCESS;
> again, ret is confusing.

yes will fix it.

>
>>   
>>>   	switch (type) {
>>>   	case OPAL_IMC_COUNTERS_NEST:
>> @@ -372,6 +456,28 @@ static int64_t opal_imc_counters_start(uint32_t type)
>>>   		cb->imc_chip_command = op;
>>   
>>>   		break;
>>> +	case OPAL_IMC_COUNTERS_CORE:
>>> +		/*
>>> +		* Enables the core imc engine by appropriately setting
>>> +		* bits 4-9 of the HTM_MODE scom port. No initialization
>>> +		* is done in this call. This just enables the the counters
>>> +		* to count with the previous initialization.
>>> +		*/
>>> +		chip = get_chip(this_cpu()->chip_id);
>>> +		phys_core_id = cpu_get_core_index(this_cpu());
>>> +		core_id = phys_core_id % 4;
>> +
>>> +		ret = xscom_write(chip->id,
>>> +				XSCOM_ADDR_P9_EP(phys_core_id,
>>> +						htm_scom_index[core_id]),
>>> +				(u64) CORE_IMC_HTM_MODE_ENABLE);
>> +
>>> +		if (ret < 0) {
>>> +			prerror("IMC: error in xscom_write for htm_mode\n");
>>> +			return OPAL_HARDWARE;
>>> +		}
>> +
>>> +		break;
>>>   	default:
>>>   		prerror("IMC: Unknown Domain in _START\n");
>>>   		return OPAL_PARAMETER;
>> @@ -386,7 +492,8 @@ static int64_t opal_imc_counters_stop(uint32_t type)
>>   {
>>>   	u64 op, status;
>>>   	struct imc_chip_cb *cb;
>>> -	int ret = OPAL_SUCCESS;
>>> +	struct proc_chip *chip;
>>> +	int core_id, phys_core_id, ret = OPAL_SUCCESS;
>>   
>>>   	switch (type) {
>>>   	case OPAL_IMC_COUNTERS_NEST:
>> @@ -405,6 +512,25 @@ static int64_t opal_imc_counters_stop(uint32_t type)
>>>   		cb->imc_chip_command = op;
>>   
>>>   		break;
>>> +	case OPAL_IMC_COUNTERS_CORE:
>>> +		/*
>>> +		* Disables the core imc engine by clearing
>>> +		* bits 4-9 of the HTM_MODE scom port.
>>> +		*/
>>> +		chip = get_chip(this_cpu()->chip_id);
>>> +		phys_core_id = cpu_get_core_index(this_cpu());
>> +		core_id = phys_core_id % 4;
> Again, core_id?  %4?
>
>> +
>>> +		ret = xscom_write(chip->id,
>>> +				XSCOM_ADDR_P9_EP(phys_core_id,
>>> +						htm_scom_index[core_id]),
>>> +				(u64) CORE_IMC_HTM_MODE_DISABLE);
>>> +		if (ret < 0) {
>>> +			prerror("IMC: error in xscom_write for htm_mode\n");
>>> +			return OPAL_HARDWARE;
>>> +		}
>> +
>>> +		break;
>>>   	default:
>>   		prerror("IMC: Unknown Domain in _STOP\n");
> unknown domain?
>
>>   		return OPAL_PARAMETER;
>> diff --git a/include/imc.h b/include/imc.h
>> index c7ad5fa933b7..5a3d53c22ca1 100644
>> --- a/include/imc.h
>> +++ b/include/imc.h
>> @@ -116,6 +116,16 @@ struct imc_chip_cb
>>   
>>>   #define MAX_NEST_UNITS			48
>>   
>> +/*
>> + * Core IMC SCOMs
>> + */
>>> +#define CORE_IMC_EVENT_MASK_ADDR	0x20010AA8ull
>>> +#define CORE_IMC_EVENT_MASK		0x0001020000000000ull
>>> +#define CORE_IMC_PDBAR_MASK		0x0003ffffffffe000ull
>>> +#define CORE_IMC_NCU_MODE		0x0800000000000000ull
>>> +#define CORE_IMC_HTM_MODE_ENABLE	0xE800000000000000ull
>>> +#define CORE_IMC_HTM_MODE_DISABLE	0xE000000000000000ull
>> +
>>   void imc_init(void);
>>   void imc_catalog_preload(void);
>>   #endif /* __IMC_H */
>> diff --git a/include/opal-api.h b/include/opal-api.h
>> index 02927356d033..5dbae2d57fde 100644
>> --- a/include/opal-api.h
>> +++ b/include/opal-api.h
>> @@ -1232,6 +1232,7 @@ enum {
>>   /* Operation argument to IMC Microcode */
>>   enum {
>>>   	OPAL_IMC_COUNTERS_NEST = 1,
>> +	OPAL_IMC_COUNTERS_CORE = 2,
> How do we advertise to linux that it can do CORE vs NEST calls?
>
> Say we need to remove one, how does linux know which one it can call without
> trying and getting OPAL_PARAMETER?
>
> If it's based on the device tree, then the patch series the wrong way around
> since the earlier patches start adding the device tree before the calls are
> added.  So if you apply just patches 1-7, Linux is going to blow up with a bunch
> of OPAL_PARAMETER since it'll start seeing the device tree before the OPAL calls
> have been added.
>
> Not a big issues for this series, but it does make me wonder.  If someone
> updates the device tree in the PNOR to start adding a new section type for IMC,
> how do we tell linux that even though the device tree supports it, OPAL itself
> doesn't?
>
> Say we have PHB counters in the future (just made up).  We add them to the IMA
> catalog and linux, but not skiboot.  Linux is going to OPAL_PARAMETER and blow
> up.  Linux needs to check if the device tree and skiboot have the capability.
> Not just the device tree.

Currently we do have a disable_unavailable_units() function which
will remove the unsupported units by the microcode from the device tree.
So even if we have add a new nest unit in the imc catalog dts file, only if
the nest microcode supports it, it will be enabled.

But that said, disable_unavailable_units() is only for the nest units.
If we have  new type (or class) of imc device, then OPAL will not
catch that and we could end up in the mess.

As ben suggested, I will add another device tree loop for the incoming
device tree to remove any unsupported or unknown type of IMC device
in the OPAL.

Nice catch.

Thanks for review.

Maddy

>
> Mikey
maddy June 14, 2017, 3:57 p.m. UTC | #4
On Wednesday 14 June 2017 10:46 AM, Michael Neuling wrote:
>>> diff --git a/include/opal-api.h b/include/opal-api.h
>>> index 02927356d033..5dbae2d57fde 100644
>>> --- a/include/opal-api.h
>>> +++ b/include/opal-api.h
>>> @@ -1232,6 +1232,7 @@ enum {
>>>   /* Operation argument to IMC Microcode */
>>>   enum {
>>>>   	OPAL_IMC_COUNTERS_NEST = 1,
>>> +	OPAL_IMC_COUNTERS_CORE = 2,
>> How do we advertise to linux that it can do CORE vs NEST calls?
>>
>> Say we need to remove one, how does linux know which one it can call without
>> trying and getting OPAL_PARAMETER?
>>
>> If it's based on the device tree, then the patch series the wrong way around
>> since the earlier patches start adding the device tree before the calls are
>> added.  So if you apply just patches 1-7, Linux is going to blow up with a
>> bunch
>> of OPAL_PARAMETER since it'll start seeing the device tree before the OPAL
>> calls
>> have been added.
>>
>> Not a big issues for this series, but it does make me wonder.  If someone
>> updates the device tree in the PNOR to start adding a new section type for
>> IMC,
>> how do we tell linux that even though the device tree supports it, OPAL itself
>> doesn't?
>>
>> Say we have PHB counters in the future (just made up).  We add them to the IMA
>> catalog and linux, but not skiboot.  Linux is going to OPAL_PARAMETER and blow
>> up.  Linux needs to check if the device tree and skiboot have the capability.
>> Not just the device tree.
> Talking to benh offline, he suggested skiboot should parse the devicetree
> imported from the PNOR and strip out things it doesn't understand.
>
> That way Linux won't try to touch things it doesn't understand.
Yes this make sense. Nice catch mikey.

Thanks ben/mikey, will add a parser loop for the incoming
device tree from pnor to remove any unsupported "type"
of IMC devices by the OPAL.

Maddy

>
> Mikey
Michael Neuling June 15, 2017, 3:51 a.m. UTC | #5
> > > +		core_id = phys_core_id % 4;
> > 
> > core_id or thread_id?  why %4?
> 
> These are more of hw procedure in setting up the core imc logic.
> We use the core id to select the pdbar scom port to program the
> address to.
> 
> +unsigned int pdbar_scom_index[] = {
> 
> > +	0x1001220B,
> > +	0x1001230B,
> > +	0x1001260B,
> > +	0x1001270B
> 
> And we need to setup all. I guess i shoud fix the "core_id" to
> pdbar_scom_idx to avoid any confusion.

Ok, thanks.  Needed some documentation.

>>>  	OPAL_IMC_COUNTERS_NEST = 1,
> > > 
> > > +	OPAL_IMC_COUNTERS_CORE = 2,
> > 
> > How do we advertise to linux that it can do CORE vs NEST calls?
> > 
> > Say we need to remove one, how does linux know which one it can call without
> > trying and getting OPAL_PARAMETER?
> > 
> > If it's based on the device tree, then the patch series the wrong way around
> > since the earlier patches start adding the device tree before the calls are
> > added.  So if you apply just patches 1-7, Linux is going to blow up with a
> > bunch
> > of OPAL_PARAMETER since it'll start seeing the device tree before the OPAL
> > calls
> > have been added.
> > 
> > Not a big issues for this series, but it does make me wonder.  If someone
> > updates the device tree in the PNOR to start adding a new section type for
> > IMC,
> > how do we tell linux that even though the device tree supports it, OPAL
> > itself
> > doesn't?
> > 
> > Say we have PHB counters in the future (just made up).  We add them to the
> > IMA
> > catalog and linux, but not skiboot.  Linux is going to OPAL_PARAMETER and
> > blow
> > up.  Linux needs to check if the device tree and skiboot have the
> > capability.
> > Not just the device tree.
> 
> Currently we do have a disable_unavailable_units() function which
> will remove the unsupported units by the microcode from the device tree.
> So even if we have add a new nest unit in the imc catalog dts file, only if
> the nest microcode supports it, it will be enabled.
> 
> But that said, disable_unavailable_units() is only for the nest units.
> If we have  new type (or class) of imc device, then OPAL will not
> catch that and we could end up in the mess.
> 
> As ben suggested, I will add another device tree loop for the incoming
> device tree to remove any unsupported or unknown type of IMC device
> in the OPAL.

Yep, perfect.  That locks down all the firmware to a base level of support so we
don't advertise something that someone doesn't actually support.

Mikey
diff mbox

Patch

diff --git a/hw/imc.c b/hw/imc.c
index d56b3eb251b2..923a8fe48b5f 100644
--- a/hw/imc.c
+++ b/hw/imc.c
@@ -83,6 +83,26 @@  size_t imc_catalog_size;
 const char **imc_prop_to_fix(struct dt_node *node);
 const char *prop_to_fix[] = {"events", NULL};
 
+
+/*
+ * A Quad contains 4 cores in Power 9, and there are 4 addresses for
+ * the CHTM attached to each core.
+ * So, for core index 0 to core index 3, we have a sequential range of
+ * SCOM port addresses in the arrays below, each for PDBAR and HTM mode.
+ */
+unsigned int pdbar_scom_index[] = {
+	0x1001220B,
+	0x1001230B,
+	0x1001260B,
+	0x1001270B
+};
+unsigned int htm_scom_index[] = {
+	0x10012200,
+	0x10012300,
+	0x10012600,
+	0x10012700
+};
+
 static struct imc_chip_cb *get_imc_cb(void)
 {
 	uint64_t cb_loc;
@@ -341,19 +361,83 @@  err:
  * opal_imc_counters_init : This call initialize the IMC engine.
  *
  * For Nest IMC, this is no-op and returns OPAL_SUCCESS at this point.
+ * For Core IMC, this initializes core IMC Engine, by initializing
+ * these scoms "PDBAR", "HTM_MODE" and the "EVENT_MASK" in a given cpu.
  */
-static int64_t opal_imc_counters_init(uint32_t type)
+static int64_t opal_imc_counters_init(uint32_t type, uint64_t addr, uint64_t cpu_pir)
 {
-	return OPAL_SUCCESS;
+	struct cpu_thread *c = find_cpu_by_pir(cpu_pir);
+	struct proc_chip *chip;
+	int core_id, phys_core_id, ret = OPAL_SUCCESS;
+
+	switch (type) {
+	case OPAL_IMC_COUNTERS_NEST:
+		break;
+	case OPAL_IMC_COUNTERS_CORE:
+		assert(c);
+		chip = get_chip(c->chip_id);
+		assert(chip);
+		phys_core_id = cpu_get_core_index(c);
+		core_id = phys_core_id % 4;
+
+		/*
+		 * Core IMC hardware mandate initing of three scoms
+		 * to enbale or disable of the Core IMC engine.
+		 *
+		 * PDBAR: Scom contains the real address to store per-core
+		 *        counter data in memory along with other bits.
+		 *
+		 * EventMask: Scom contain bits to denote event to multiplex
+		 *            at different MSR[HV PR] values, along with bits for
+		 *            sampling duration.
+		 *
+		 * HTM Scom: scom to enable counter data movement to memory.
+		 */
+		ret = xscom_write(chip->id,
+				XSCOM_ADDR_P9_EP(phys_core_id,
+						pdbar_scom_index[core_id]),
+				(u64)(CORE_IMC_PDBAR_MASK & addr));
+		if (ret < 0) {
+			prerror("IMC: error in xscom_write for pdbar\n");
+			goto hw_err;
+		}
+
+		ret = xscom_write(chip->id,
+				XSCOM_ADDR_P9_EC(phys_core_id,
+					 CORE_IMC_EVENT_MASK_ADDR),
+				(u64)CORE_IMC_EVENT_MASK);
+		if (ret < 0) {
+			prerror("IMC: error in xscom_write for event mask\n");
+			goto hw_err;
+		}
+
+		ret = xscom_write(chip->id,
+				XSCOM_ADDR_P9_EP(phys_core_id,
+						htm_scom_index[core_id]),
+				(u64)CORE_IMC_HTM_MODE_DISABLE);
+		if (ret < 0) {
+			prerror("IMC: error in xscom_write for htm mode\n");
+			goto hw_err;
+		}
+		break;
+	default:
+		prerror("IMC: Unknown Domain int _INIT\n");
+		return OPAL_PARAMETER;
+	}
+
+	return ret;
+hw_err:
+	return OPAL_HARDWARE;
 }
-opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 1);
+opal_call(OPAL_IMC_COUNTERS_INIT, opal_imc_counters_init, 3);
 
-/* opal_imc_counters_control_start: This call starts the nest imc engine. */
+/* opal_imc_counters_control_start: This call starts the nest/core imc engine. */
 static int64_t opal_imc_counters_start(uint32_t type)
 {
 	u64 op, status;
 	struct imc_chip_cb *cb;
-	int ret = OPAL_SUCCESS;
+	struct proc_chip *chip;
+	int core_id, phys_core_id, ret = OPAL_SUCCESS;
 
 	switch (type) {
 	case OPAL_IMC_COUNTERS_NEST:
@@ -372,6 +456,28 @@  static int64_t opal_imc_counters_start(uint32_t type)
 		cb->imc_chip_command = op;
 
 		break;
+	case OPAL_IMC_COUNTERS_CORE:
+		/*
+		* Enables the core imc engine by appropriately setting
+		* bits 4-9 of the HTM_MODE scom port. No initialization
+		* is done in this call. This just enables the the counters
+		* to count with the previous initialization.
+		*/
+		chip = get_chip(this_cpu()->chip_id);
+		phys_core_id = cpu_get_core_index(this_cpu());
+		core_id = phys_core_id % 4;
+
+		ret = xscom_write(chip->id,
+				XSCOM_ADDR_P9_EP(phys_core_id,
+						htm_scom_index[core_id]),
+				(u64) CORE_IMC_HTM_MODE_ENABLE);
+
+		if (ret < 0) {
+			prerror("IMC: error in xscom_write for htm_mode\n");
+			return OPAL_HARDWARE;
+		}
+
+		break;
 	default:
 		prerror("IMC: Unknown Domain in _START\n");
 		return OPAL_PARAMETER;
@@ -386,7 +492,8 @@  static int64_t opal_imc_counters_stop(uint32_t type)
 {
 	u64 op, status;
 	struct imc_chip_cb *cb;
-	int ret = OPAL_SUCCESS;
+	struct proc_chip *chip;
+	int core_id, phys_core_id, ret = OPAL_SUCCESS;
 
 	switch (type) {
 	case OPAL_IMC_COUNTERS_NEST:
@@ -405,6 +512,25 @@  static int64_t opal_imc_counters_stop(uint32_t type)
 		cb->imc_chip_command = op;
 
 		break;
+	case OPAL_IMC_COUNTERS_CORE:
+		/*
+		* Disables the core imc engine by clearing
+		* bits 4-9 of the HTM_MODE scom port.
+		*/
+		chip = get_chip(this_cpu()->chip_id);
+		phys_core_id = cpu_get_core_index(this_cpu());
+		core_id = phys_core_id % 4;
+
+		ret = xscom_write(chip->id,
+				XSCOM_ADDR_P9_EP(phys_core_id,
+						htm_scom_index[core_id]),
+				(u64) CORE_IMC_HTM_MODE_DISABLE);
+		if (ret < 0) {
+			prerror("IMC: error in xscom_write for htm_mode\n");
+			return OPAL_HARDWARE;
+		}
+
+		break;
 	default:
 		prerror("IMC: Unknown Domain in _STOP\n");
 		return OPAL_PARAMETER;
diff --git a/include/imc.h b/include/imc.h
index c7ad5fa933b7..5a3d53c22ca1 100644
--- a/include/imc.h
+++ b/include/imc.h
@@ -116,6 +116,16 @@  struct imc_chip_cb
 
 #define MAX_NEST_UNITS			48
 
+/*
+ * Core IMC SCOMs
+ */
+#define CORE_IMC_EVENT_MASK_ADDR	0x20010AA8ull
+#define CORE_IMC_EVENT_MASK		0x0001020000000000ull
+#define CORE_IMC_PDBAR_MASK		0x0003ffffffffe000ull
+#define CORE_IMC_NCU_MODE		0x0800000000000000ull
+#define CORE_IMC_HTM_MODE_ENABLE	0xE800000000000000ull
+#define CORE_IMC_HTM_MODE_DISABLE	0xE000000000000000ull
+
 void imc_init(void);
 void imc_catalog_preload(void);
 #endif /* __IMC_H */
diff --git a/include/opal-api.h b/include/opal-api.h
index 02927356d033..5dbae2d57fde 100644
--- a/include/opal-api.h
+++ b/include/opal-api.h
@@ -1232,6 +1232,7 @@  enum {
 /* Operation argument to IMC Microcode */
 enum {
 	OPAL_IMC_COUNTERS_NEST = 1,
+	OPAL_IMC_COUNTERS_CORE = 2,
 };