diff mbox series

[RFC,02/11] perf/core: Data structure to present hazard data

Message ID 20200302052355.36365-3-ravi.bangoria@linux.ibm.com (mailing list archive)
State Not Applicable
Headers show
Series perf: Enhancing perf to export processor hazard information | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch powerpc/merge (e3a1ab299346a9a415f334e91a78da7ea84aa5a2)
snowpatch_ozlabs/checkpatch success total: 0 errors, 0 warnings, 0 checks, 135 lines checked
snowpatch_ozlabs/needsstable success Patch has no Fixes tags

Commit Message

Ravi Bangoria March 2, 2020, 5:23 a.m. UTC
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>

Introduce new perf sample_type PERF_SAMPLE_PIPELINE_HAZ to request kernel
to provide cpu pipeline hazard data. Also, introduce arch independent
structure 'perf_pipeline_haz_data' to pass hazard data to userspace. This
is generic structure and arch specific data needs to be converted to this
format.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
---
 include/linux/perf_event.h            |  7 ++++++
 include/uapi/linux/perf_event.h       | 32 ++++++++++++++++++++++++++-
 kernel/events/core.c                  |  6 +++++
 tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++-
 4 files changed, 75 insertions(+), 2 deletions(-)

Comments

Peter Zijlstra March 2, 2020, 9:55 a.m. UTC | #1
On Mon, Mar 02, 2020 at 10:53:46AM +0530, Ravi Bangoria wrote:
> From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> 
> Introduce new perf sample_type PERF_SAMPLE_PIPELINE_HAZ to request kernel
> to provide cpu pipeline hazard data. Also, introduce arch independent
> structure 'perf_pipeline_haz_data' to pass hazard data to userspace. This
> is generic structure and arch specific data needs to be converted to this
> format.
> 
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  include/linux/perf_event.h            |  7 ++++++
>  include/uapi/linux/perf_event.h       | 32 ++++++++++++++++++++++++++-
>  kernel/events/core.c                  |  6 +++++
>  tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++-
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 547773f5894e..d5b606e3c57d 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1001,6 +1001,7 @@ struct perf_sample_data {
>  	u64				stack_user_size;
>  
>  	u64				phys_addr;
> +	struct perf_pipeline_haz_data	pipeline_haz;
>  } ____cacheline_aligned;
>  
>  /* default value for data source */
> @@ -1021,6 +1022,12 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>  	data->weight = 0;
>  	data->data_src.val = PERF_MEM_NA;
>  	data->txn = 0;
> +	data->pipeline_haz.itype = PERF_HAZ__ITYPE_NA;
> +	data->pipeline_haz.icache = PERF_HAZ__ICACHE_NA;
> +	data->pipeline_haz.hazard_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.hazard_reason = PERF_HAZ__HREASON_NA;
> +	data->pipeline_haz.stall_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.stall_reason = PERF_HAZ__SREASON_NA;
>  }

NAK, Don't touch anything outside of the first cacheline here.
Madhavan Srinivasan March 2, 2020, 2:23 p.m. UTC | #2
On 3/2/20 3:25 PM, Peter Zijlstra wrote:
> On Mon, Mar 02, 2020 at 10:53:46AM +0530, Ravi Bangoria wrote:
>> From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
>>
>> Introduce new perf sample_type PERF_SAMPLE_PIPELINE_HAZ to request kernel
>> to provide cpu pipeline hazard data. Also, introduce arch independent
>> structure 'perf_pipeline_haz_data' to pass hazard data to userspace. This
>> is generic structure and arch specific data needs to be converted to this
>> format.
>>
>> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
>> ---
>>   include/linux/perf_event.h            |  7 ++++++
>>   include/uapi/linux/perf_event.h       | 32 ++++++++++++++++++++++++++-
>>   kernel/events/core.c                  |  6 +++++
>>   tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++-
>>   4 files changed, 75 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index 547773f5894e..d5b606e3c57d 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -1001,6 +1001,7 @@ struct perf_sample_data {
>>   	u64				stack_user_size;
>>   
>>   	u64				phys_addr;
>> +	struct perf_pipeline_haz_data	pipeline_haz;
>>   } ____cacheline_aligned;
>>   
>>   /* default value for data source */
>> @@ -1021,6 +1022,12 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>>   	data->weight = 0;
>>   	data->data_src.val = PERF_MEM_NA;
>>   	data->txn = 0;
>> +	data->pipeline_haz.itype = PERF_HAZ__ITYPE_NA;
>> +	data->pipeline_haz.icache = PERF_HAZ__ICACHE_NA;
>> +	data->pipeline_haz.hazard_stage = PERF_HAZ__PIPE_STAGE_NA;
>> +	data->pipeline_haz.hazard_reason = PERF_HAZ__HREASON_NA;
>> +	data->pipeline_haz.stall_stage = PERF_HAZ__PIPE_STAGE_NA;
>> +	data->pipeline_haz.stall_reason = PERF_HAZ__SREASON_NA;
>>   }
> NAK, Don't touch anything outside of the first cacheline here.

My bad, should have looked at the comment in "struct perf_sample_data {".
Will move it to perf_prepare_sample().

Thanks for comments.
Maddy
Mark Rutland March 2, 2020, 2:48 p.m. UTC | #3
On Mon, Mar 02, 2020 at 10:53:46AM +0530, Ravi Bangoria wrote:
> From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> 
> Introduce new perf sample_type PERF_SAMPLE_PIPELINE_HAZ to request kernel
> to provide cpu pipeline hazard data. Also, introduce arch independent
> structure 'perf_pipeline_haz_data' to pass hazard data to userspace. This
> is generic structure and arch specific data needs to be converted to this
> format.
> 
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  include/linux/perf_event.h            |  7 ++++++
>  include/uapi/linux/perf_event.h       | 32 ++++++++++++++++++++++++++-
>  kernel/events/core.c                  |  6 +++++
>  tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++-
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 547773f5894e..d5b606e3c57d 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1001,6 +1001,7 @@ struct perf_sample_data {
>  	u64				stack_user_size;
>  
>  	u64				phys_addr;
> +	struct perf_pipeline_haz_data	pipeline_haz;
>  } ____cacheline_aligned;
>  
>  /* default value for data source */
> @@ -1021,6 +1022,12 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>  	data->weight = 0;
>  	data->data_src.val = PERF_MEM_NA;
>  	data->txn = 0;
> +	data->pipeline_haz.itype = PERF_HAZ__ITYPE_NA;
> +	data->pipeline_haz.icache = PERF_HAZ__ICACHE_NA;
> +	data->pipeline_haz.hazard_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.hazard_reason = PERF_HAZ__HREASON_NA;
> +	data->pipeline_haz.stall_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.stall_reason = PERF_HAZ__SREASON_NA;
>  }
>  
>  extern void perf_output_sample(struct perf_output_handle *handle,
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 377d794d3105..ff252618ca93 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -142,8 +142,9 @@ enum perf_event_sample_format {
>  	PERF_SAMPLE_REGS_INTR			= 1U << 18,
>  	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
>  	PERF_SAMPLE_AUX				= 1U << 20,
> +	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,

Can we please have perf_event_open() reject this sample flag for PMUs
without the new callback (introduced in the next patch)?

That way it'll be possible to detect whether the PMU exposes this.

Thanks,
Mark.
Mark Rutland March 2, 2020, 2:54 p.m. UTC | #4
On Mon, Mar 02, 2020 at 10:53:46AM +0530, Ravi Bangoria wrote:
> From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> 
> Introduce new perf sample_type PERF_SAMPLE_PIPELINE_HAZ to request kernel
> to provide cpu pipeline hazard data. Also, introduce arch independent
> structure 'perf_pipeline_haz_data' to pass hazard data to userspace. This
> is generic structure and arch specific data needs to be converted to this
> format.
> 
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  include/linux/perf_event.h            |  7 ++++++
>  include/uapi/linux/perf_event.h       | 32 ++++++++++++++++++++++++++-
>  kernel/events/core.c                  |  6 +++++
>  tools/include/uapi/linux/perf_event.h | 32 ++++++++++++++++++++++++++-
>  4 files changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 547773f5894e..d5b606e3c57d 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1001,6 +1001,7 @@ struct perf_sample_data {
>  	u64				stack_user_size;
>  
>  	u64				phys_addr;
> +	struct perf_pipeline_haz_data	pipeline_haz;
>  } ____cacheline_aligned;

I don't think you can add this here, see below.

>  /* default value for data source */
> @@ -1021,6 +1022,12 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
>  	data->weight = 0;
>  	data->data_src.val = PERF_MEM_NA;
>  	data->txn = 0;
> +	data->pipeline_haz.itype = PERF_HAZ__ITYPE_NA;
> +	data->pipeline_haz.icache = PERF_HAZ__ICACHE_NA;
> +	data->pipeline_haz.hazard_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.hazard_reason = PERF_HAZ__HREASON_NA;
> +	data->pipeline_haz.stall_stage = PERF_HAZ__PIPE_STAGE_NA;
> +	data->pipeline_haz.stall_reason = PERF_HAZ__SREASON_NA;
>  }
>  
>  extern void perf_output_sample(struct perf_output_handle *handle,
> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
> index 377d794d3105..ff252618ca93 100644
> --- a/include/uapi/linux/perf_event.h
> +++ b/include/uapi/linux/perf_event.h
> @@ -142,8 +142,9 @@ enum perf_event_sample_format {
>  	PERF_SAMPLE_REGS_INTR			= 1U << 18,
>  	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
>  	PERF_SAMPLE_AUX				= 1U << 20,
> +	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,
>  
> -	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
> +	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
>  
>  	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
>  };
> @@ -870,6 +871,13 @@ enum perf_event_type {
>  	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
>  	 *	{ u64			size;
>  	 *	  char			data[size]; } && PERF_SAMPLE_AUX
> +	 *	{ u8			itype;
> +	 *	  u8			icache;
> +	 *	  u8			hazard_stage;
> +	 *	  u8			hazard_reason;
> +	 *	  u8			stall_stage;
> +	 *	  u8			stall_reason;
> +	 *	  u16			pad;} && PERF_SAMPLE_PIPELINE_HAZ
>  	 * };

The existing comment shows the aux data *immediately* after ther
phys_addr field, where you've placed struct perf_pipeline_haz_data.

If adding to struct perf_sample_data is fine, this needs to come before
the aux data in this comment. If adding to struct perf_sample_data is
not fine. struct perf_pipeline_haz_data cannot live there.

I suspect the latter is true, but you're getting away with it because
you're not using both PERF_SAMPLE_AUX and PERF_SAMPLE_PIPELINE_HAZ
simultaneously.

Thanks,
Mark.

>  	 */
>  	PERF_RECORD_SAMPLE			= 9,
> @@ -1185,4 +1193,26 @@ struct perf_branch_entry {
>  		reserved:40;
>  };
>  
> +struct perf_pipeline_haz_data {
> +	/* Instruction/Opcode type: Load, Store, Branch .... */
> +	__u8	itype;
> +	/* Instruction Cache source */
> +	__u8	icache;
> +	/* Instruction suffered hazard in pipeline stage */
> +	__u8	hazard_stage;
> +	/* Hazard reason */
> +	__u8	hazard_reason;
> +	/* Instruction suffered stall in pipeline stage */
> +	__u8	stall_stage;
> +	/* Stall reason */
> +	__u8	stall_reason;
> +	__u16	pad;
> +};
> +
> +#define PERF_HAZ__ITYPE_NA	0x0
> +#define PERF_HAZ__ICACHE_NA	0x0
> +#define PERF_HAZ__PIPE_STAGE_NA	0x0
> +#define PERF_HAZ__HREASON_NA	0x0
> +#define PERF_HAZ__SREASON_NA	0x0
> +
>  #endif /* _UAPI_LINUX_PERF_EVENT_H */
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index e453589da97c..d00037c77ccf 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -1754,6 +1754,9 @@ static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
>  	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
>  		size += sizeof(data->phys_addr);
>  
> +	if (sample_type & PERF_SAMPLE_PIPELINE_HAZ)
> +		size += sizeof(data->pipeline_haz);
> +
>  	event->header_size = size;
>  }
>  
> @@ -6712,6 +6715,9 @@ void perf_output_sample(struct perf_output_handle *handle,
>  			perf_aux_sample_output(event, handle, data);
>  	}
>  
> +	if (sample_type & PERF_SAMPLE_PIPELINE_HAZ)
> +		perf_output_put(handle, data->pipeline_haz);
> +
>  	if (!event->attr.watermark) {
>  		int wakeup_events = event->attr.wakeup_events;
>  
> diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
> index 377d794d3105..ff252618ca93 100644
> --- a/tools/include/uapi/linux/perf_event.h
> +++ b/tools/include/uapi/linux/perf_event.h
> @@ -142,8 +142,9 @@ enum perf_event_sample_format {
>  	PERF_SAMPLE_REGS_INTR			= 1U << 18,
>  	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
>  	PERF_SAMPLE_AUX				= 1U << 20,
> +	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,
>  
> -	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
> +	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
>  
>  	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
>  };
> @@ -870,6 +871,13 @@ enum perf_event_type {
>  	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
>  	 *	{ u64			size;
>  	 *	  char			data[size]; } && PERF_SAMPLE_AUX
> +	 *	{ u8			itype;
> +	 *	  u8			icache;
> +	 *	  u8			hazard_stage;
> +	 *	  u8			hazard_reason;
> +	 *	  u8			stall_stage;
> +	 *	  u8			stall_reason;
> +	 *	  u16			pad;} && PERF_SAMPLE_PIPELINE_HAZ
>  	 * };
>  	 */
>  	PERF_RECORD_SAMPLE			= 9,
> @@ -1185,4 +1193,26 @@ struct perf_branch_entry {
>  		reserved:40;
>  };
>  
> +struct perf_pipeline_haz_data {
> +	/* Instruction/Opcode type: Load, Store, Branch .... */
> +	__u8	itype;
> +	/* Instruction Cache source */
> +	__u8	icache;
> +	/* Instruction suffered hazard in pipeline stage */
> +	__u8	hazard_stage;
> +	/* Hazard reason */
> +	__u8	hazard_reason;
> +	/* Instruction suffered stall in pipeline stage */
> +	__u8	stall_stage;
> +	/* Stall reason */
> +	__u8	stall_reason;
> +	__u16	pad;
> +};
> +
> +#define PERF_HAZ__ITYPE_NA	0x0
> +#define PERF_HAZ__ICACHE_NA	0x0
> +#define PERF_HAZ__PIPE_STAGE_NA	0x0
> +#define PERF_HAZ__HREASON_NA	0x0
> +#define PERF_HAZ__SREASON_NA	0x0
> +
>  #endif /* _UAPI_LINUX_PERF_EVENT_H */
> -- 
> 2.21.1
>
Ravi Bangoria March 3, 2020, 2:31 p.m. UTC | #5
On 3/2/20 8:24 PM, Mark Rutland wrote:
>> @@ -870,6 +871,13 @@ enum perf_event_type {
>>   	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
>>   	 *	{ u64			size;
>>   	 *	  char			data[size]; } && PERF_SAMPLE_AUX
>> +	 *	{ u8			itype;
>> +	 *	  u8			icache;
>> +	 *	  u8			hazard_stage;
>> +	 *	  u8			hazard_reason;
>> +	 *	  u8			stall_stage;
>> +	 *	  u8			stall_reason;
>> +	 *	  u16			pad;} && PERF_SAMPLE_PIPELINE_HAZ
>>   	 * };
> 
> The existing comment shows the aux data *immediately* after ther
> phys_addr field, where you've placed struct perf_pipeline_haz_data.
> 
> If adding to struct perf_sample_data is fine, this needs to come before
> the aux data in this comment. If adding to struct perf_sample_data is
> not fine. struct perf_pipeline_haz_data cannot live there.
> 
> I suspect the latter is true, but you're getting away with it because
> you're not using both PERF_SAMPLE_AUX and PERF_SAMPLE_PIPELINE_HAZ
> simultaneously.

Right. Thanks for pointing it out. Will change it.

Ravi
Ravi Bangoria March 3, 2020, 2:32 p.m. UTC | #6
On 3/2/20 8:18 PM, Mark Rutland wrote:
>> diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
>> index 377d794d3105..ff252618ca93 100644
>> --- a/include/uapi/linux/perf_event.h
>> +++ b/include/uapi/linux/perf_event.h
>> @@ -142,8 +142,9 @@ enum perf_event_sample_format {
>>   	PERF_SAMPLE_REGS_INTR			= 1U << 18,
>>   	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
>>   	PERF_SAMPLE_AUX				= 1U << 20,
>> +	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,
> 
> Can we please have perf_event_open() reject this sample flag for PMUs
> without the new callback (introduced in the next patch)?
> 
> That way it'll be possible to detect whether the PMU exposes this.

Sure. Will change it.

Ravi
diff mbox series

Patch

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 547773f5894e..d5b606e3c57d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1001,6 +1001,7 @@  struct perf_sample_data {
 	u64				stack_user_size;
 
 	u64				phys_addr;
+	struct perf_pipeline_haz_data	pipeline_haz;
 } ____cacheline_aligned;
 
 /* default value for data source */
@@ -1021,6 +1022,12 @@  static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->weight = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
+	data->pipeline_haz.itype = PERF_HAZ__ITYPE_NA;
+	data->pipeline_haz.icache = PERF_HAZ__ICACHE_NA;
+	data->pipeline_haz.hazard_stage = PERF_HAZ__PIPE_STAGE_NA;
+	data->pipeline_haz.hazard_reason = PERF_HAZ__HREASON_NA;
+	data->pipeline_haz.stall_stage = PERF_HAZ__PIPE_STAGE_NA;
+	data->pipeline_haz.stall_reason = PERF_HAZ__SREASON_NA;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 377d794d3105..ff252618ca93 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -142,8 +142,9 @@  enum perf_event_sample_format {
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 	PERF_SAMPLE_AUX				= 1U << 20,
+	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,
 
-	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -870,6 +871,13 @@  enum perf_event_type {
 	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 *	{ u64			size;
 	 *	  char			data[size]; } && PERF_SAMPLE_AUX
+	 *	{ u8			itype;
+	 *	  u8			icache;
+	 *	  u8			hazard_stage;
+	 *	  u8			hazard_reason;
+	 *	  u8			stall_stage;
+	 *	  u8			stall_reason;
+	 *	  u16			pad;} && PERF_SAMPLE_PIPELINE_HAZ
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -1185,4 +1193,26 @@  struct perf_branch_entry {
 		reserved:40;
 };
 
+struct perf_pipeline_haz_data {
+	/* Instruction/Opcode type: Load, Store, Branch .... */
+	__u8	itype;
+	/* Instruction Cache source */
+	__u8	icache;
+	/* Instruction suffered hazard in pipeline stage */
+	__u8	hazard_stage;
+	/* Hazard reason */
+	__u8	hazard_reason;
+	/* Instruction suffered stall in pipeline stage */
+	__u8	stall_stage;
+	/* Stall reason */
+	__u8	stall_reason;
+	__u16	pad;
+};
+
+#define PERF_HAZ__ITYPE_NA	0x0
+#define PERF_HAZ__ICACHE_NA	0x0
+#define PERF_HAZ__PIPE_STAGE_NA	0x0
+#define PERF_HAZ__HREASON_NA	0x0
+#define PERF_HAZ__SREASON_NA	0x0
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e453589da97c..d00037c77ccf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1754,6 +1754,9 @@  static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
 	if (sample_type & PERF_SAMPLE_PHYS_ADDR)
 		size += sizeof(data->phys_addr);
 
+	if (sample_type & PERF_SAMPLE_PIPELINE_HAZ)
+		size += sizeof(data->pipeline_haz);
+
 	event->header_size = size;
 }
 
@@ -6712,6 +6715,9 @@  void perf_output_sample(struct perf_output_handle *handle,
 			perf_aux_sample_output(event, handle, data);
 	}
 
+	if (sample_type & PERF_SAMPLE_PIPELINE_HAZ)
+		perf_output_put(handle, data->pipeline_haz);
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 377d794d3105..ff252618ca93 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -142,8 +142,9 @@  enum perf_event_sample_format {
 	PERF_SAMPLE_REGS_INTR			= 1U << 18,
 	PERF_SAMPLE_PHYS_ADDR			= 1U << 19,
 	PERF_SAMPLE_AUX				= 1U << 20,
+	PERF_SAMPLE_PIPELINE_HAZ		= 1U << 21,
 
-	PERF_SAMPLE_MAX = 1U << 21,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 22,		/* non-ABI */
 
 	__PERF_SAMPLE_CALLCHAIN_EARLY		= 1ULL << 63, /* non-ABI; internal use */
 };
@@ -870,6 +871,13 @@  enum perf_event_type {
 	 *	{ u64			phys_addr;} && PERF_SAMPLE_PHYS_ADDR
 	 *	{ u64			size;
 	 *	  char			data[size]; } && PERF_SAMPLE_AUX
+	 *	{ u8			itype;
+	 *	  u8			icache;
+	 *	  u8			hazard_stage;
+	 *	  u8			hazard_reason;
+	 *	  u8			stall_stage;
+	 *	  u8			stall_reason;
+	 *	  u16			pad;} && PERF_SAMPLE_PIPELINE_HAZ
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -1185,4 +1193,26 @@  struct perf_branch_entry {
 		reserved:40;
 };
 
+struct perf_pipeline_haz_data {
+	/* Instruction/Opcode type: Load, Store, Branch .... */
+	__u8	itype;
+	/* Instruction Cache source */
+	__u8	icache;
+	/* Instruction suffered hazard in pipeline stage */
+	__u8	hazard_stage;
+	/* Hazard reason */
+	__u8	hazard_reason;
+	/* Instruction suffered stall in pipeline stage */
+	__u8	stall_stage;
+	/* Stall reason */
+	__u8	stall_reason;
+	__u16	pad;
+};
+
+#define PERF_HAZ__ITYPE_NA	0x0
+#define PERF_HAZ__ICACHE_NA	0x0
+#define PERF_HAZ__PIPE_STAGE_NA	0x0
+#define PERF_HAZ__HREASON_NA	0x0
+#define PERF_HAZ__SREASON_NA	0x0
+
 #endif /* _UAPI_LINUX_PERF_EVENT_H */