diff mbox

[RESEND,01/11] ibm-fsp/firenze: nest data structure definitions

Message ID 1436115333-18657-2-git-send-email-maddy@linux.vnet.ibm.com
State Changes Requested
Headers show

Commit Message

maddy July 5, 2015, 4:55 p.m. UTC
Patch adds the data structures and macros needed for
Nest instrumentation support. Patch creates new file in
include dir called "nest.h".

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
 include/nest.h | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 include/nest.h

Comments

Stewart Smith July 10, 2015, 6:50 a.m. UTC | #1
Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
> Patch adds the data structures and macros needed for
> Nest instrumentation support. Patch creates new file in
> include dir called "nest.h".

Are any parts of this ABI to kernel?

It looks as though we have an ABI to PORE though, and I can't see
anywhere where you check things like the magic number in these data
structures.
maddy July 15, 2015, 9:27 a.m. UTC | #2
On Friday 10 July 2015 12:20 PM, Stewart Smith wrote:
> Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
>> Patch adds the data structures and macros needed for
>> Nest instrumentation support. Patch creates new file in
>> include dir called "nest.h".
> Are any parts of this ABI to kernel?
>
> It looks as though we have an ABI to PORE though, and I can't see
> anywhere where you check things like the magic number in these data
> structures.
>
Structures defined here are not ABI to kernel. Some of these
structures are related to catalog lid (meta-data file) parsing
and some of the structures are related to PORE SLW IMA
microcode.  Idea is to abstract the kernel from meta-data file
mess, so that kernel can have a generic interface.
Joel Stanley July 16, 2015, 1:57 a.m. UTC | #3
Hello,

On Sun, 2015-07-05 at 22:25 +0530, Madhavan Srinivasan wrote:
> diff --git a/include/nest.h b/include/nest.h
> new file mode 100644
> index 0000000..0dcd946
> --- /dev/null
> +++ b/include/nest.h
> +/*
> + * Power8 has Nest Instrumentation support with which per-chip
> + * utilisation metrics like memory bandwidth, Xlink/Alink bandwidth and
> + * many other component metrics can be obtained. These Nest
> + * counters can be programmed via scoms or HW PORE Engine,
> + * called PORE_SLW_IMA.
> + *
> + * PORE_SLW_IMA:
> + * PORE_SLW_IMA is a firmware that runs on PORE Engine.
> + * This firmware programs the nest counter and moves counter values to
> + * per chip HOMER region in a fixed offset for each unit. Engine
> + * has a control block structure for communication with Hyperviosr.

Where you say hypervisor, are you referring to skiboot? If so, perhaps
write skiboot.

> + */
> +
> +/*
> + * Control Block structure offset in HOMER IMA Region
> + */
> +#define CB_STRUCT_OFFSET	0x39FC00
> +#define CB_STRUCT_CMD		0x39FC08
> +#define CB_STRUCT_SPEED		0x39FC10
> +#define SLW_IMA_PAUSE		0x2
> +#define SLW_IMA_RESUME		0x1
> +#define SLW_IMA_NOP		0
> +/*
> + * Control Block Structure:
> + *
> + * Name          Producer        Consumer        Values  Desc
> + * IMARunStatus   IMA Code       Hypervisor      0       Initializing

s/Hypervisor/Skiboot/ in this block as well.

> + *                                               1       Running
> + *                                               2       Paused
> + *
> + * IMACommand     Hypervisor     IMA Code        0       NOP
> + *                                               1       Resume
> + *                                               2       Pause
> + *                                               3       Clear and Restart
> + *
> + * IMACollection Hypervisor      IMA Code        0       128us
> + * Speed					 1       256us
> + *                                               2       1ms
> + *                                               3       4ms
> + *                                               4       16ms
> + *                                               5       64ms
> + *                                               6       256ms
> + *                                               7       1000ms
> + */
> +struct ima_chip_cb
> +{
> +        uint64_t ima_chip_run_status;
> +        uint64_t ima_chip_command;
> +        uint64_t ima_chip_collection_speed;
> +};
> +
> +/*
> + * PORE_SLW_IMA reserved memory (in HOMER region)
> + */
> +#define SLW_IMA_OFFSET		0x00320000
> +#define SLW_IMA_TOTAL_SIZE	0x80000
> +
> +/*
> + * Counter Storage size (exposed as part of DT)
> + */
> +#define SLW_IMA_SIZE		0x10000
> +
> +/*
> + * PTS Scoms and values
> + */
> +#define IMA_PTS_SCOM		0x00068009
> +#define IMA_PTS_ENABLE		0x00F0000000000000
> +#define IMA_PTS_DISABLE		0x00E0000000000000
> +#define IMA_PTS_START		0x1
> +#define IMA_PTS_STOP		0
> +#define IMA_PTS_ERROR		-1
> +
> +/*
> + * Catalogue structures.
> + * Catalogue is a meta data file provided as part of FW lid.
> + * This file contains information about the various events the
> + * HW supports under the "24x7" umbrella. Events are classified under
> + * 3 different Domains.
> + *	Domain 1  -- Chip Events (PORE_SLW_IMA)
> + *	Domain 2  -- Core Events (24x7 Core IMA)
> + *	Domain 3  -- per-Thread PMU Events
> + */
> +
> +struct ima_catalog_page_0 {
> +#define CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */
> +	__be32 magic;
> +	__be32 length; /* In 4096 byte pages */
> +	__be64 version; /* XXX: arbitrary? what's the meaning/useage/purpose? */

usage

> +	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */
> +	__u8 reserved2[32];
> +	__be16 schema_data_offs; /* in 4096 byte pages */
> +	__be16 schema_data_len; /* in 4096 byte pages */
> +	__be16 schema_entry_count;
> +	__u8 reserved3[2];
> +	__be16 event_data_offs;
> +	__be16 event_data_len;
> +	__be16 event_entry_count;
> +	__u8 reserved4[2];
> +	__be16 group_data_offs; /* in 4096 byte pages */
> +	__be16 group_data_len; /* in 4096 byte pages */
> +	__be16 group_entry_count;
> +	__u8 reserved5[2];
> +	__be16 formula_data_offs; /* in 4096 byte pages */
> +	__be16 formula_data_len; /* in 4096 byte pages */
> +	__be16 formula_entry_count;
> +	__u8 reserved6[2];
> +	__be32 core_event_offset;
> +	__be32 thread_event_offset;
> +	__be32 chip_event_offset;
> +	__be32 core_group_offset;
> +	__be32 thread_group_offset;
> +	__be32 chip_group_offset;
> +} __packed;
> +
> +struct ima_catalogue_group_data {
> +	__be16 length; /* in bytes, must be a multiple of 16 */
> +	__u8 reserved1[2];
> +	/* verified_state, unverified_state, caveat_state, broken_state, ... */
> +	__be32 flags;
> +	__u8 domain; /* Chip = 1, Core = 2 */
> +	__u8 reserved2[1];
> +	__be16 event_group_record_start_offs; /* in bytes, must be 8 byte aligned */
> +	__be16 event_group_record_len; /* in bytes */
> +	/* in bytes, offset from event_group_record */
> +	__u8 group_schema_index;
> +	__u8 event_count;
> +	__be16 event_index[16]; /* in bytes */
> +	__be16 group_name_len;
> +	__u8 remainder[];
> +	/* __u8 event_name[event_name_len - 2]; */
> +	/* __be16 event_description_len; */
> +	/* __u8 event_desc[event_description_len - 2]; */
> +	/* __be16 detailed_desc_len; */
> +	/* __u8 detailed_desc[detailed_desc_len - 2]; */
> +} __packed;
> +
> +struct ima_catalogue_event_data {
> +	__be16 length; /* in bytes, must be a multiple of 16 */
> +	__be16 formula_index;
> +	__u8 domain; /* Chip = 1, Core = 2 */
> +	__u8 reserved2[1];
> +	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
> +	__be16 event_group_record_len; /* in bytes */
> +
> +	/* in bytes, offset from event_group_record */
> +	__be16 event_counter_offs;
> +
> +	/* verified_state, unverified_state, caveat_state, broken_state, ... */
> +	__be32 flags;
> +
> +	__be16 primary_group_ix;
> +	__be16 group_count;
> +	__be16 event_name_len;
> +	__u8 remainder[];
> +	/* __u8 event_name[event_name_len - 2]; */
> +	/* __be16 event_description_len; */
> +	/* __u8 event_desc[event_description_len - 2]; */
> +	/* __be16 detailed_desc_len; */
> +	/* __u8 detailed_desc[detailed_desc_len - 2]; */
> +} __packed;
> +
> +
> +#define CHIP_EVENTS_SUPPORTED	1
> +#define CHIP_EVENTS_NOT_SUPPORTED	0
> +
> +/*
> + * Just for optimisation, save only relavent addrs

relevant 

> + */
> +struct page0_offsets {
> +	char *page0;
> +	char *group_entry;
> +	char *event_entry;
> +	char *thread_event_entry;
> +	char *core_event_entry;
> +	char *chip_event_entry;
> +	char *thread_group_entry;
> +	char *core_group_entry;
> +	char *chip_group_entry;
> +};
> +
> +#define PAGE0(x)		x->page0
> +#define GROUP_ENTRY(x)		x->group_entry
> +#define EVENT_ENTRY(x)		x->event_entry
> +#define THREAD_EVENT_ENTRY(x)	x->thread_event_entry
> +#define CORE_EVENT_ENTRY(x)	x->core_event_entry
> +#define CHIP_EVENT_ENTRY(x)	x->chip_event_entry
> +#define THREAD_GROUP_ENTRY(x)	x->thread_group_entry
> +#define CORE_GROUP_ENTRY(x)	x->core_group_entry
> +#define CHIP_GROUP_ENTRY(x)	x->chip_group_entry
> +
> +/* Event Domains, Chip=1, Core=2 */
> +#define DOMAIN_CHIP	1
> +#define DOMAIN_CORE	2
> +
> +/* dimm information for utilssation metrics */

utilisation

> +#define MURANO_CENTAUR_DIMM	24000
> +#define VENICE_CENTAUR_DIMM	27000
> +
> +#endif	/* __NEST_H__ */
Stewart Smith July 16, 2015, 2:11 a.m. UTC | #4
Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
> On Friday 10 July 2015 12:20 PM, Stewart Smith wrote:
>> Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
>>> Patch adds the data structures and macros needed for
>>> Nest instrumentation support. Patch creates new file in
>>> include dir called "nest.h".
>> Are any parts of this ABI to kernel?
>>
>> It looks as though we have an ABI to PORE though, and I can't see
>> anywhere where you check things like the magic number in these data
>> structures.
>>
> Structures defined here are not ABI to kernel. Some of these
> structures are related to catalog lid (meta-data file) parsing
> and some of the structures are related to PORE SLW IMA
> microcode.  Idea is to abstract the kernel from meta-data file
> mess, so that kernel can have a generic interface.

and that's a good thing.

We should likely check the magic numbers and validity of the data
structures though.
maddy July 16, 2015, 3:15 a.m. UTC | #5
On Thursday 16 July 2015 07:27 AM, Joel Stanley wrote:
> Hello,
>
> On Sun, 2015-07-05 at 22:25 +0530, Madhavan Srinivasan wrote:
>> diff --git a/include/nest.h b/include/nest.h
>> new file mode 100644
>> index 0000000..0dcd946
>> --- /dev/null
>> +++ b/include/nest.h
>> +/*
>> + * Power8 has Nest Instrumentation support with which per-chip
>> + * utilisation metrics like memory bandwidth, Xlink/Alink bandwidth and
>> + * many other component metrics can be obtained. These Nest
>> + * counters can be programmed via scoms or HW PORE Engine,
>> + * called PORE_SLW_IMA.
>> + *
>> + * PORE_SLW_IMA:
>> + * PORE_SLW_IMA is a firmware that runs on PORE Engine.
>> + * This firmware programs the nest counter and moves counter values to
>> + * per chip HOMER region in a fixed offset for each unit. Engine
>> + * has a control block structure for communication with Hyperviosr.
> Where you say hypervisor, are you referring to skiboot? If so, perhaps
> write skiboot.

No, I am referring to host kernel (Linux).

>> + */
>> +
>> +/*
>> + * Control Block structure offset in HOMER IMA Region
>> + */
>> +#define CB_STRUCT_OFFSET	0x39FC00
>> +#define CB_STRUCT_CMD		0x39FC08
>> +#define CB_STRUCT_SPEED		0x39FC10
>> +#define SLW_IMA_PAUSE		0x2
>> +#define SLW_IMA_RESUME		0x1
>> +#define SLW_IMA_NOP		0
>> +/*
>> + * Control Block Structure:
>> + *
>> + * Name          Producer        Consumer        Values  Desc
>> + * IMARunStatus   IMA Code       Hypervisor      0       Initializing
> s/Hypervisor/Skiboot/ in this block as well.
>
>> + *                                               1       Running
>> + *                                               2       Paused
>> + *
>> + * IMACommand     Hypervisor     IMA Code        0       NOP
>> + *                                               1       Resume
>> + *                                               2       Pause
>> + *                                               3       Clear and Restart
>> + *
>> + * IMACollection Hypervisor      IMA Code        0       128us
>> + * Speed					 1       256us
>> + *                                               2       1ms
>> + *                                               3       4ms
>> + *                                               4       16ms
>> + *                                               5       64ms
>> + *                                               6       256ms
>> + *                                               7       1000ms
>> + */
>> +struct ima_chip_cb
>> +{
>> +        uint64_t ima_chip_run_status;
>> +        uint64_t ima_chip_command;
>> +        uint64_t ima_chip_collection_speed;
>> +};
>> +
>> +/*
>> + * PORE_SLW_IMA reserved memory (in HOMER region)
>> + */
>> +#define SLW_IMA_OFFSET		0x00320000
>> +#define SLW_IMA_TOTAL_SIZE	0x80000
>> +
>> +/*
>> + * Counter Storage size (exposed as part of DT)
>> + */
>> +#define SLW_IMA_SIZE		0x10000
>> +
>> +/*
>> + * PTS Scoms and values
>> + */
>> +#define IMA_PTS_SCOM		0x00068009
>> +#define IMA_PTS_ENABLE		0x00F0000000000000
>> +#define IMA_PTS_DISABLE		0x00E0000000000000
>> +#define IMA_PTS_START		0x1
>> +#define IMA_PTS_STOP		0
>> +#define IMA_PTS_ERROR		-1
>> +
>> +/*
>> + * Catalogue structures.
>> + * Catalogue is a meta data file provided as part of FW lid.
>> + * This file contains information about the various events the
>> + * HW supports under the "24x7" umbrella. Events are classified under
>> + * 3 different Domains.
>> + *	Domain 1  -- Chip Events (PORE_SLW_IMA)
>> + *	Domain 2  -- Core Events (24x7 Core IMA)
>> + *	Domain 3  -- per-Thread PMU Events
>> + */
>> +
>> +struct ima_catalog_page_0 {
>> +#define CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */
>> +	__be32 magic;
>> +	__be32 length; /* In 4096 byte pages */
>> +	__be64 version; /* XXX: arbitrary? what's the meaning/useage/purpose? */
> usage

Yes. Will change it.

>
>> +	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */
>> +	__u8 reserved2[32];
>> +	__be16 schema_data_offs; /* in 4096 byte pages */
>> +	__be16 schema_data_len; /* in 4096 byte pages */
>> +	__be16 schema_entry_count;
>> +	__u8 reserved3[2];
>> +	__be16 event_data_offs;
>> +	__be16 event_data_len;
>> +	__be16 event_entry_count;
>> +	__u8 reserved4[2];
>> +	__be16 group_data_offs; /* in 4096 byte pages */
>> +	__be16 group_data_len; /* in 4096 byte pages */
>> +	__be16 group_entry_count;
>> +	__u8 reserved5[2];
>> +	__be16 formula_data_offs; /* in 4096 byte pages */
>> +	__be16 formula_data_len; /* in 4096 byte pages */
>> +	__be16 formula_entry_count;
>> +	__u8 reserved6[2];
>> +	__be32 core_event_offset;
>> +	__be32 thread_event_offset;
>> +	__be32 chip_event_offset;
>> +	__be32 core_group_offset;
>> +	__be32 thread_group_offset;
>> +	__be32 chip_group_offset;
>> +} __packed;
>> +
>> +struct ima_catalogue_group_data {
>> +	__be16 length; /* in bytes, must be a multiple of 16 */
>> +	__u8 reserved1[2];
>> +	/* verified_state, unverified_state, caveat_state, broken_state, ... */
>> +	__be32 flags;
>> +	__u8 domain; /* Chip = 1, Core = 2 */
>> +	__u8 reserved2[1];
>> +	__be16 event_group_record_start_offs; /* in bytes, must be 8 byte aligned */
>> +	__be16 event_group_record_len; /* in bytes */
>> +	/* in bytes, offset from event_group_record */
>> +	__u8 group_schema_index;
>> +	__u8 event_count;
>> +	__be16 event_index[16]; /* in bytes */
>> +	__be16 group_name_len;
>> +	__u8 remainder[];
>> +	/* __u8 event_name[event_name_len - 2]; */
>> +	/* __be16 event_description_len; */
>> +	/* __u8 event_desc[event_description_len - 2]; */
>> +	/* __be16 detailed_desc_len; */
>> +	/* __u8 detailed_desc[detailed_desc_len - 2]; */
>> +} __packed;
>> +
>> +struct ima_catalogue_event_data {
>> +	__be16 length; /* in bytes, must be a multiple of 16 */
>> +	__be16 formula_index;
>> +	__u8 domain; /* Chip = 1, Core = 2 */
>> +	__u8 reserved2[1];
>> +	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
>> +	__be16 event_group_record_len; /* in bytes */
>> +
>> +	/* in bytes, offset from event_group_record */
>> +	__be16 event_counter_offs;
>> +
>> +	/* verified_state, unverified_state, caveat_state, broken_state, ... */
>> +	__be32 flags;
>> +
>> +	__be16 primary_group_ix;
>> +	__be16 group_count;
>> +	__be16 event_name_len;
>> +	__u8 remainder[];
>> +	/* __u8 event_name[event_name_len - 2]; */
>> +	/* __be16 event_description_len; */
>> +	/* __u8 event_desc[event_description_len - 2]; */
>> +	/* __be16 detailed_desc_len; */
>> +	/* __u8 detailed_desc[detailed_desc_len - 2]; */
>> +} __packed;
>> +
>> +
>> +#define CHIP_EVENTS_SUPPORTED	1
>> +#define CHIP_EVENTS_NOT_SUPPORTED	0
>> +
>> +/*
>> + * Just for optimisation, save only relavent addrs
> relevant 

Yes. will change it.

>
>> + */
>> +struct page0_offsets {
>> +	char *page0;
>> +	char *group_entry;
>> +	char *event_entry;
>> +	char *thread_event_entry;
>> +	char *core_event_entry;
>> +	char *chip_event_entry;
>> +	char *thread_group_entry;
>> +	char *core_group_entry;
>> +	char *chip_group_entry;
>> +};
>> +
>> +#define PAGE0(x)		x->page0
>> +#define GROUP_ENTRY(x)		x->group_entry
>> +#define EVENT_ENTRY(x)		x->event_entry
>> +#define THREAD_EVENT_ENTRY(x)	x->thread_event_entry
>> +#define CORE_EVENT_ENTRY(x)	x->core_event_entry
>> +#define CHIP_EVENT_ENTRY(x)	x->chip_event_entry
>> +#define THREAD_GROUP_ENTRY(x)	x->thread_group_entry
>> +#define CORE_GROUP_ENTRY(x)	x->core_group_entry
>> +#define CHIP_GROUP_ENTRY(x)	x->chip_group_entry
>> +
>> +/* Event Domains, Chip=1, Core=2 */
>> +#define DOMAIN_CHIP	1
>> +#define DOMAIN_CORE	2
>> +
>> +/* dimm information for utilssation metrics */
> utilisation

Yes. will change it.

>
>> +#define MURANO_CENTAUR_DIMM	24000
>> +#define VENICE_CENTAUR_DIMM	27000
>> +
>> +#endif	/* __NEST_H__ */
>
maddy July 16, 2015, 3:59 a.m. UTC | #6
On Thursday 16 July 2015 07:41 AM, Stewart Smith wrote:
> Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
>> On Friday 10 July 2015 12:20 PM, Stewart Smith wrote:
>>> Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
>>>> Patch adds the data structures and macros needed for
>>>> Nest instrumentation support. Patch creates new file in
>>>> include dir called "nest.h".
>>> Are any parts of this ABI to kernel?
>>>
>>> It looks as though we have an ABI to PORE though, and I can't see
>>> anywhere where you check things like the magic number in these data
>>> structures.
>>>
>> Structures defined here are not ABI to kernel. Some of these
>> structures are related to catalog lid (meta-data file) parsing
>> and some of the structures are related to PORE SLW IMA
>> microcode.  Idea is to abstract the kernel from meta-data file
>> mess, so that kernel can have a generic interface.
> and that's a good thing.
>
> We should likely check the magic numbers and validity of the data
> structures though.

Yes. I will add the checks and spin off  a new version soon.

Also, I missing your review comments mail on patch 2 of this series,
I could see it in the mailing list archives, can you forward it?

Maddy
Stewart Smith July 16, 2015, 4:26 a.m. UTC | #7
Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:
> Also, I missing your review comments mail on patch 2 of this series,
> I could see it in the mailing list archives, can you forward it?

Sure, here it is:

  Madhavan Srinivasan <maddy@linux.vnet.ibm.com> writes:

  > Patch adds nest init function and Nest meta-data load functions.
  > Nest events supported by HW are passed as lid (meta-data), called as
  [ 79 more citation lines. Click/Enter to show. ]
  > "catalog"
  > "Catalog" lid is loaded to detect chip nest instrumentation support.
  > New file call "nest.c" added to hw/ to contain nest support code.
  >
  > Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
  > ---
  >  hw/fsp/fsp.c       |   3 ++
  >  hw/nest.c          | 107
  > +++++++++++++++++++++++++++++++++++++++++++++++++++++
  >  include/mem-map.h  |   2 +
  >  include/nest.h     |  10 +++++
  >  include/platform.h |   1 +
  >  include/types.h    |   1 +
  >  6 files changed, 124 insertions(+)
  >  create mode 100644 hw/nest.c
  >
  > diff --git a/hw/fsp/fsp.c b/hw/fsp/fsp.c
  > index 12e162d..6b5d87c 100644
  > --- a/hw/fsp/fsp.c
  > +++ b/hw/fsp/fsp.c
  > @@ -38,6 +38,7 @@
  >  #include <opal.h>
  >  #include <opal-msg.h>
  >  #include <ccan/list/list.h>
  > +#include <chip.h>
  >
  >  DEFINE_LOG_ENTRY(OPAL_RC_FSP_POLL_TIMEOUT, OPAL_PLATFORM_ERR_EVT,
  > OPAL_FSP,
  >  		 OPAL_PLATFORM_FIRMWARE, OPAL_ERROR_PANIC, OPAL_NA, NULL);
  > @@ -106,6 +107,7 @@ static u64 fsp_hir_timeout;
  >  #define KERNEL_LID_PHYP			0x80a00701
  >  #define KERNEL_LID_OPAL			0x80f00101
  >  #define INITRAMFS_LID_OPAL		0x80f00102
  > +#define NEST_CATALOGUE_LID		0x81e00610
  >
  >  /*
  >   * We keep track on last logged values for some things to print only on
  > @@ -2222,6 +2224,7 @@ static struct {
  >  	{ RESOURCE_ID_CAPP,	CAPP_IDX_MURANO_DD21,	0x80a02001 },
  >  	{ RESOURCE_ID_CAPP,	CAPP_IDX_VENICE_DD10,	0x80a02003 },
  >  	{ RESOURCE_ID_CAPP,	CAPP_IDX_VENICE_DD20,	0x80a02004 },
  > +	{ RESOURCE_ID_CATALOGUE,RESOURCE_SUBID_NONE,	NEST_CATALOGUE_LID},
  >  };
  >
  >  static void fsp_start_fetching_next_lid(void);
  > diff --git a/hw/nest.c b/hw/nest.c
  > new file mode 100644
  > index 0000000..fd22a0e
  > --- /dev/null
  > +++ b/hw/nest.c
  > @@ -0,0 +1,107 @@
  > +/* Copyright 2015 IBM Corp.
  > + *
  > + * Licensed under the Apache License, Version 2.0 (the "License");
  > + * you may not use this file except in compliance with the License.
  > + * You may obtain a copy of the License at
  > + *
  > + *	http://www.apache.org/licenses/LICENSE-2.0
  > + *
  > + * Unless required by applicable law or agreed to in writing, software
  > + * distributed under the License is distributed on an "AS IS" BASIS,
  > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
  > + * implied.
  > + * See the License for the specific language governing permissions and
  > + * limitations under the License.
  > + * Locking notes:
  > + */
  > +
  > +#include <skiboot.h>
  > +#include <device.h>
  > +#include <nest.h>
  > +#include <chip.h>
  > +#include <cpu.h>
  > +#include <xscom.h>
  > +#include <timebase.h>
  > +#include <mem-map.h>
  > +#include <opal-api.h>
  > +#include <io.h>
  > +
  > +/*
  > + * Optimize different array/index
  > + */

  I don't understand what this comment means.

  > +struct ima_catalog_page_0 *page0_ptr;
  > +struct page0_offsets *pg0_offsets;

  what does page0 mean?

  is ima a well known abbreviation?

  > +
  > +int load_catalogue_lid()
  [ 21 more citation lines. Click/Enter to show. ]
  > +{
  > +	struct proc_chip *chip = get_chip(pir_to_chip_id(this_cpu()->pir));
  > +	size_t size = NEST_CATALOGUE_SIZE;
  > +	int rc=0, loaded;
  > +
  > +	pg0_offsets = (struct page0_offsets *)malloc(
  > +					sizeof(struct page0_offsets));
  > +	if (!pg0_offsets) {
  > +		prerror("Nest_IMA: No mem for pg0_offsets structure\n");;
  > +		rc = OPAL_NO_MEM;
  > +		return rc;
  > +	}
  > +
  > +	pg0_offsets->page0 = (char *)malloc(NEST_CATALOGUE_SIZE);
  > +	if (!pg0_offsets->page0) {
  > +		prerror("Nest_IMA: No mem for catalogue lid \n");
  > +		rc = OPAL_NO_MEM;
  > +		return rc;
  > +	}
  > +
  > +	loaded = start_preload_resource (RESOURCE_ID_CATALOGUE,
  > +						RESOURCE_SUBID_NONE,
  > +						pg0_offsets->page0,
  > &size);

  No space after function name.

  > +	if (loaded == OPAL_SUCCESS)
  > +		loaded = wait_for_resource_loaded(RESOURCE_ID_CATALOGUE,
  > +
  > RESOURCE_SUBID_NONE);

  Doing an immediate wait_for after start_preload means you're doing a
  synchronous load rather than an asynchronous one.

  This is generally frowned upon as it increases boot time.

  Instead, start preload early on and then wait_for only when you
  absolutely cannot wait any longer.

  You may also want to check the order in which things are queued to load,
  as this can affect boot time (e.g. if you register your preload after
  kernel and initramfs, you may be waiting longer than you like).

  > +
  > +/*
  > + * powerpc Nest instrumentation support
  > + */

  don't need the comment

  > +void nest_ima_init(void)
  > +{
  > +	if (load_catalogue_lid()) {
  > +		printf("IMA Catalog lid failed to load, Exiting \n");
  > +		return;

  We're not exiting though, just skipping nest instrumentation.

  > diff --git a/include/mem-map.h b/include/mem-map.h
  > index 1258d87..8bd8054 100644
  [ 5 more citation lines. Click/Enter to show. ]
  > --- a/include/mem-map.h
  > +++ b/include/mem-map.h
  > @@ -121,5 +121,7 @@
  >  /* Size allocated to build the device-tree */
  >  #define	DEVICE_TREE_MAX_SIZE	0x80000
  >
  > +/*Size of IMA Catalogue LID. 256KBytes. Fixed */
  > +#define NEST_CATALOGUE_SIZE		0x40000

  I don't see why this should be in mem-map.h rather than in nest.c
diff mbox

Patch

diff --git a/include/nest.h b/include/nest.h
new file mode 100644
index 0000000..0dcd946
--- /dev/null
+++ b/include/nest.h
@@ -0,0 +1,219 @@ 
+/* Copyright 2015 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef __NEST_H
+#define __NEST_H
+
+/*
+ * Power8 has Nest Instrumentation support with which per-chip
+ * utilisation metrics like memory bandwidth, Xlink/Alink bandwidth and
+ * many other component metrics can be obtained. These Nest
+ * counters can be programmed via scoms or HW PORE Engine,
+ * called PORE_SLW_IMA.
+ *
+ * PORE_SLW_IMA:
+ * PORE_SLW_IMA is a firmware that runs on PORE Engine.
+ * This firmware programs the nest counter and moves counter values to
+ * per chip HOMER region in a fixed offset for each unit. Engine
+ * has a control block structure for communication with Hyperviosr.
+ */
+
+/*
+ * Control Block structure offset in HOMER IMA Region
+ */
+#define CB_STRUCT_OFFSET	0x39FC00
+#define CB_STRUCT_CMD		0x39FC08
+#define CB_STRUCT_SPEED		0x39FC10
+#define SLW_IMA_PAUSE		0x2
+#define SLW_IMA_RESUME		0x1
+#define SLW_IMA_NOP		0
+/*
+ * Control Block Structure:
+ *
+ * Name          Producer        Consumer        Values  Desc
+ * IMARunStatus   IMA Code       Hypervisor      0       Initializing
+ *                                               1       Running
+ *                                               2       Paused
+ *
+ * IMACommand     Hypervisor     IMA Code        0       NOP
+ *                                               1       Resume
+ *                                               2       Pause
+ *                                               3       Clear and Restart
+ *
+ * IMACollection Hypervisor      IMA Code        0       128us
+ * Speed					 1       256us
+ *                                               2       1ms
+ *                                               3       4ms
+ *                                               4       16ms
+ *                                               5       64ms
+ *                                               6       256ms
+ *                                               7       1000ms
+ */
+struct ima_chip_cb
+{
+        uint64_t ima_chip_run_status;
+        uint64_t ima_chip_command;
+        uint64_t ima_chip_collection_speed;
+};
+
+/*
+ * PORE_SLW_IMA reserved memory (in HOMER region)
+ */
+#define SLW_IMA_OFFSET		0x00320000
+#define SLW_IMA_TOTAL_SIZE	0x80000
+
+/*
+ * Counter Storage size (exposed as part of DT)
+ */
+#define SLW_IMA_SIZE		0x10000
+
+/*
+ * PTS Scoms and values
+ */
+#define IMA_PTS_SCOM		0x00068009
+#define IMA_PTS_ENABLE		0x00F0000000000000
+#define IMA_PTS_DISABLE		0x00E0000000000000
+#define IMA_PTS_START		0x1
+#define IMA_PTS_STOP		0
+#define IMA_PTS_ERROR		-1
+
+/*
+ * Catalogue structures.
+ * Catalogue is a meta data file provided as part of FW lid.
+ * This file contains information about the various events the
+ * HW supports under the "24x7" umbrella. Events are classified under
+ * 3 different Domains.
+ *	Domain 1  -- Chip Events (PORE_SLW_IMA)
+ *	Domain 2  -- Core Events (24x7 Core IMA)
+ *	Domain 3  -- per-Thread PMU Events
+ */
+
+struct ima_catalog_page_0 {
+#define CATALOG_MAGIC 0x32347837 /* "24x7" in ASCII */
+	__be32 magic;
+	__be32 length; /* In 4096 byte pages */
+	__be64 version; /* XXX: arbitrary? what's the meaning/useage/purpose? */
+	__u8 build_time_stamp[16]; /* "YYYYMMDDHHMMSS\0\0" */
+	__u8 reserved2[32];
+	__be16 schema_data_offs; /* in 4096 byte pages */
+	__be16 schema_data_len; /* in 4096 byte pages */
+	__be16 schema_entry_count;
+	__u8 reserved3[2];
+	__be16 event_data_offs;
+	__be16 event_data_len;
+	__be16 event_entry_count;
+	__u8 reserved4[2];
+	__be16 group_data_offs; /* in 4096 byte pages */
+	__be16 group_data_len; /* in 4096 byte pages */
+	__be16 group_entry_count;
+	__u8 reserved5[2];
+	__be16 formula_data_offs; /* in 4096 byte pages */
+	__be16 formula_data_len; /* in 4096 byte pages */
+	__be16 formula_entry_count;
+	__u8 reserved6[2];
+	__be32 core_event_offset;
+	__be32 thread_event_offset;
+	__be32 chip_event_offset;
+	__be32 core_group_offset;
+	__be32 thread_group_offset;
+	__be32 chip_group_offset;
+} __packed;
+
+struct ima_catalogue_group_data {
+	__be16 length; /* in bytes, must be a multiple of 16 */
+	__u8 reserved1[2];
+	/* verified_state, unverified_state, caveat_state, broken_state, ... */
+	__be32 flags;
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_start_offs; /* in bytes, must be 8 byte aligned */
+	__be16 event_group_record_len; /* in bytes */
+	/* in bytes, offset from event_group_record */
+	__u8 group_schema_index;
+	__u8 event_count;
+	__be16 event_index[16]; /* in bytes */
+	__be16 group_name_len;
+	__u8 remainder[];
+	/* __u8 event_name[event_name_len - 2]; */
+	/* __be16 event_description_len; */
+	/* __u8 event_desc[event_description_len - 2]; */
+	/* __be16 detailed_desc_len; */
+	/* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
+struct ima_catalogue_event_data {
+	__be16 length; /* in bytes, must be a multiple of 16 */
+	__be16 formula_index;
+	__u8 domain; /* Chip = 1, Core = 2 */
+	__u8 reserved2[1];
+	__be16 event_group_record_offs; /* in bytes, must be 8 byte aligned */
+	__be16 event_group_record_len; /* in bytes */
+
+	/* in bytes, offset from event_group_record */
+	__be16 event_counter_offs;
+
+	/* verified_state, unverified_state, caveat_state, broken_state, ... */
+	__be32 flags;
+
+	__be16 primary_group_ix;
+	__be16 group_count;
+	__be16 event_name_len;
+	__u8 remainder[];
+	/* __u8 event_name[event_name_len - 2]; */
+	/* __be16 event_description_len; */
+	/* __u8 event_desc[event_description_len - 2]; */
+	/* __be16 detailed_desc_len; */
+	/* __u8 detailed_desc[detailed_desc_len - 2]; */
+} __packed;
+
+
+#define CHIP_EVENTS_SUPPORTED	1
+#define CHIP_EVENTS_NOT_SUPPORTED	0
+
+/*
+ * Just for optimisation, save only relavent addrs
+ */
+struct page0_offsets {
+	char *page0;
+	char *group_entry;
+	char *event_entry;
+	char *thread_event_entry;
+	char *core_event_entry;
+	char *chip_event_entry;
+	char *thread_group_entry;
+	char *core_group_entry;
+	char *chip_group_entry;
+};
+
+#define PAGE0(x)		x->page0
+#define GROUP_ENTRY(x)		x->group_entry
+#define EVENT_ENTRY(x)		x->event_entry
+#define THREAD_EVENT_ENTRY(x)	x->thread_event_entry
+#define CORE_EVENT_ENTRY(x)	x->core_event_entry
+#define CHIP_EVENT_ENTRY(x)	x->chip_event_entry
+#define THREAD_GROUP_ENTRY(x)	x->thread_group_entry
+#define CORE_GROUP_ENTRY(x)	x->core_group_entry
+#define CHIP_GROUP_ENTRY(x)	x->chip_group_entry
+
+/* Event Domains, Chip=1, Core=2 */
+#define DOMAIN_CHIP	1
+#define DOMAIN_CORE	2
+
+/* dimm information for utilssation metrics */
+#define MURANO_CENTAUR_DIMM	24000
+#define VENICE_CENTAUR_DIMM	27000
+
+#endif	/* __NEST_H__ */