diff mbox

[v3,1/6] powerpc/perf: Define big-endian version of perf_mem_data_src

Message ID 1491875470-17904-2-git-send-email-maddy@linux.vnet.ibm.com (mailing list archive)
State Accepted
Commit 8c5073db0ee680c7e70e123918c9b260e49f757d
Headers show

Commit Message

maddy April 11, 2017, 1:51 a.m. UTC
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>

perf_mem_data_src is an union that is initialized via the ->val field
and accessed via the bitmap fields. For this to work on big endian
platforms (Which is broken now), we also need a big-endian represenation
of perf_mem_data_src. i.e, in a big endian system, if user request
PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
is constructed using shifts:

  /* TLB access */
  #define PERF_MEM_TLB_NA		0x01 /* not available */
  ...
  #define PERF_MEM_TLB_SHIFT	26

  #define PERF_MEM_S(a, s) \
	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)

  #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
		    PERF_MEM_S(LVL, NA)   |\
		    PERF_MEM_S(SNOOP, NA) |\
		    PERF_MEM_S(LOCK, NA)  |\
		    PERF_MEM_S(TLB, NA))

Which works out as:

  ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26))

Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021
in CPU endian.

But then in the perf tool, the code uses the bitfields to inspect the
value, and currently the bitfields are defined using little endian
ordering.

So eg. in perf_mem__tlb_scnprintf() we see:
  data_src->val = 0x5080021
             op = 0x0
            lvl = 0x0
          snoop = 0x0
           lock = 0x0
           dtlb = 0x0
           rsvd = 0x5080021

Patch does a minimal fix of adding big endian definition of the bitfields
to match the values that are already exported by the kernel on big endian.
And it makes no change on little endian.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
---
 include/uapi/linux/perf_event.h       | 16 ++++++++++++++++
 tools/include/uapi/linux/perf_event.h | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

Comments

Peter Zijlstra April 13, 2017, 12:38 p.m. UTC | #1
On Tue, Apr 11, 2017 at 07:21:05AM +0530, Madhavan Srinivasan wrote:
> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> 
> perf_mem_data_src is an union that is initialized via the ->val field
> and accessed via the bitmap fields. For this to work on big endian
> platforms (Which is broken now), we also need a big-endian represenation
> of perf_mem_data_src. i.e, in a big endian system, if user request
> PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
> perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
> is constructed using shifts:
> 
>   /* TLB access */
>   #define PERF_MEM_TLB_NA		0x01 /* not available */
>   ...
>   #define PERF_MEM_TLB_SHIFT	26
> 
>   #define PERF_MEM_S(a, s) \
> 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
> 
>   #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
> 		    PERF_MEM_S(LVL, NA)   |\
> 		    PERF_MEM_S(SNOOP, NA) |\
> 		    PERF_MEM_S(LOCK, NA)  |\
> 		    PERF_MEM_S(TLB, NA))
> 
> Which works out as:
> 
>   ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26))
> 
> Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021
> in CPU endian.
> 
> But then in the perf tool, the code uses the bitfields to inspect the
> value, and currently the bitfields are defined using little endian
> ordering.
> 
> So eg. in perf_mem__tlb_scnprintf() we see:
>   data_src->val = 0x5080021
>              op = 0x0
>             lvl = 0x0
>           snoop = 0x0
>            lock = 0x0
>            dtlb = 0x0
>            rsvd = 0x5080021
> 
> Patch does a minimal fix of adding big endian definition of the bitfields
> to match the values that are already exported by the kernel on big endian.
> And it makes no change on little endian.

I think it is important to note that there are no current big-endian
users. So 'fixing' this will not break anybody and will ensure future
users (next patch) will work correctly.

Aside from that amendment,

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Michael Ellerman April 13, 2017, 1:23 p.m. UTC | #2
Peter Zijlstra <peterz@infradead.org> writes:

> On Tue, Apr 11, 2017 at 07:21:05AM +0530, Madhavan Srinivasan wrote:
>> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
>> 
>> perf_mem_data_src is an union that is initialized via the ->val field
>> and accessed via the bitmap fields. For this to work on big endian
>> platforms (Which is broken now), we also need a big-endian represenation
>> of perf_mem_data_src. i.e, in a big endian system, if user request
>> PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
>> perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
>> is constructed using shifts:
>> 
>>   /* TLB access */
>>   #define PERF_MEM_TLB_NA		0x01 /* not available */
>>   ...
>>   #define PERF_MEM_TLB_SHIFT	26
>> 
>>   #define PERF_MEM_S(a, s) \
>> 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>> 
>>   #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
>> 		    PERF_MEM_S(LVL, NA)   |\
>> 		    PERF_MEM_S(SNOOP, NA) |\
>> 		    PERF_MEM_S(LOCK, NA)  |\
>> 		    PERF_MEM_S(TLB, NA))
>> 
>> Which works out as:
>> 
>>   ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26))
>> 
>> Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021
>> in CPU endian.
>> 
>> But then in the perf tool, the code uses the bitfields to inspect the
>> value, and currently the bitfields are defined using little endian
>> ordering.
>> 
>> So eg. in perf_mem__tlb_scnprintf() we see:
>>   data_src->val = 0x5080021
>>              op = 0x0
>>             lvl = 0x0
>>           snoop = 0x0
>>            lock = 0x0
>>            dtlb = 0x0
>>            rsvd = 0x5080021
>> 
>> Patch does a minimal fix of adding big endian definition of the bitfields
>> to match the values that are already exported by the kernel on big endian.
>> And it makes no change on little endian.
>
> I think it is important to note that there are no current big-endian
> users. So 'fixing' this will not break anybody and will ensure future
> users (next patch) will work correctly.

Sure I'll fold in something along those lines.

> Aside from that amendment,
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks.

cheers
maddy April 17, 2017, 3:46 a.m. UTC | #3
On Thursday 13 April 2017 06:08 PM, Peter Zijlstra wrote:
> On Tue, Apr 11, 2017 at 07:21:05AM +0530, Madhavan Srinivasan wrote:
>> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
>>
>> perf_mem_data_src is an union that is initialized via the ->val field
>> and accessed via the bitmap fields. For this to work on big endian
>> platforms (Which is broken now), we also need a big-endian represenation
>> of perf_mem_data_src. i.e, in a big endian system, if user request
>> PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
>> perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
>> is constructed using shifts:
>>
>>    /* TLB access */
>>    #define PERF_MEM_TLB_NA		0x01 /* not available */
>>    ...
>>    #define PERF_MEM_TLB_SHIFT	26
>>
>>    #define PERF_MEM_S(a, s) \
>> 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>>
>>    #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
>> 		    PERF_MEM_S(LVL, NA)   |\
>> 		    PERF_MEM_S(SNOOP, NA) |\
>> 		    PERF_MEM_S(LOCK, NA)  |\
>> 		    PERF_MEM_S(TLB, NA))
>>
>> Which works out as:
>>
>>    ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26))
>>
>> Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021
>> in CPU endian.
>>
>> But then in the perf tool, the code uses the bitfields to inspect the
>> value, and currently the bitfields are defined using little endian
>> ordering.
>>
>> So eg. in perf_mem__tlb_scnprintf() we see:
>>    data_src->val = 0x5080021
>>               op = 0x0
>>              lvl = 0x0
>>            snoop = 0x0
>>             lock = 0x0
>>             dtlb = 0x0
>>             rsvd = 0x5080021
>>
>> Patch does a minimal fix of adding big endian definition of the bitfields
>> to match the values that are already exported by the kernel on big endian.
>> And it makes no change on little endian.
> I think it is important to note that there are no current big-endian
> users. So 'fixing' this will not break anybody and will ensure future
> users (next patch) will work correctly.
>
> Aside from that amendment,
>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Thanks
Maddy
maddy April 17, 2017, 3:46 a.m. UTC | #4
On Thursday 13 April 2017 06:53 PM, Michael Ellerman wrote:
> Peter Zijlstra <peterz@infradead.org> writes:
>
>> On Tue, Apr 11, 2017 at 07:21:05AM +0530, Madhavan Srinivasan wrote:
>>> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
>>>
>>> perf_mem_data_src is an union that is initialized via the ->val field
>>> and accessed via the bitmap fields. For this to work on big endian
>>> platforms (Which is broken now), we also need a big-endian represenation
>>> of perf_mem_data_src. i.e, in a big endian system, if user request
>>> PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
>>> perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
>>> is constructed using shifts:
>>>
>>>    /* TLB access */
>>>    #define PERF_MEM_TLB_NA		0x01 /* not available */
>>>    ...
>>>    #define PERF_MEM_TLB_SHIFT	26
>>>
>>>    #define PERF_MEM_S(a, s) \
>>> 	(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
>>>
>>>    #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
>>> 		    PERF_MEM_S(LVL, NA)   |\
>>> 		    PERF_MEM_S(SNOOP, NA) |\
>>> 		    PERF_MEM_S(LOCK, NA)  |\
>>> 		    PERF_MEM_S(TLB, NA))
>>>
>>> Which works out as:
>>>
>>>    ((0x01 << 0) | (0x01 << 5) | (0x01 << 19) | (0x01 << 24) | (0x01 << 26))
>>>
>>> Which means the PERF_MEM_NA value comes out of the kernel as 0x5080021
>>> in CPU endian.
>>>
>>> But then in the perf tool, the code uses the bitfields to inspect the
>>> value, and currently the bitfields are defined using little endian
>>> ordering.
>>>
>>> So eg. in perf_mem__tlb_scnprintf() we see:
>>>    data_src->val = 0x5080021
>>>               op = 0x0
>>>              lvl = 0x0
>>>            snoop = 0x0
>>>             lock = 0x0
>>>             dtlb = 0x0
>>>             rsvd = 0x5080021
>>>
>>> Patch does a minimal fix of adding big endian definition of the bitfields
>>> to match the values that are already exported by the kernel on big endian.
>>> And it makes no change on little endian.
>> I think it is important to note that there are no current big-endian
>> users. So 'fixing' this will not break anybody and will ensure future
>> users (next patch) will work correctly.
> Sure I'll fold in something along those lines.

Thanks mpe.

Maddy

>
>> Aside from that amendment,
>>
>> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Thanks.
>
> cheers
>
Michael Ellerman April 19, 2017, 10:04 p.m. UTC | #5
On Tue, 2017-04-11 at 01:51:05 UTC, Madhavan Srinivasan wrote:
> From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> 
> perf_mem_data_src is an union that is initialized via the ->val field
> and accessed via the bitmap fields. For this to work on big endian
> platforms (Which is broken now), we also need a big-endian represenation
> of perf_mem_data_src. i.e, in a big endian system, if user request
> PERF_SAMPLE_DATA_SRC (perf report -d), will get the default value from
> perf_sample_data_init(), which is PERF_MEM_NA. Value for PERF_MEM_NA
> is constructed using shifts:
...
> 
> Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Series applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/8c5073db0ee680c7e70e123918c9b2

cheers
diff mbox

Patch

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c66a485a24ac..c4af1159a200 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -891,6 +891,7 @@  enum perf_callchain_context {
 #define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
 #define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
 
+#if defined(__LITTLE_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
@@ -902,6 +903,21 @@  union perf_mem_data_src {
 			mem_rsvd:31;
 	};
 };
+#elif defined(__BIG_ENDIAN_BITFIELD)
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64	mem_rsvd:31,
+			mem_dtlb:7,	/* tlb access */
+			mem_lock:2,	/* lock instr */
+			mem_snoop:5,	/* snoop mode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_op:5;	/* type of opcode */
+	};
+};
+#else
+#error "Unknown endianness"
+#endif
 
 /* type of opcode (load/store/prefetch,code) */
 #define PERF_MEM_OP_NA		0x01 /* not available */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index c66a485a24ac..c4af1159a200 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -891,6 +891,7 @@  enum perf_callchain_context {
 #define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
 #define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
 
+#if defined(__LITTLE_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
@@ -902,6 +903,21 @@  union perf_mem_data_src {
 			mem_rsvd:31;
 	};
 };
+#elif defined(__BIG_ENDIAN_BITFIELD)
+union perf_mem_data_src {
+	__u64 val;
+	struct {
+		__u64	mem_rsvd:31,
+			mem_dtlb:7,	/* tlb access */
+			mem_lock:2,	/* lock instr */
+			mem_snoop:5,	/* snoop mode */
+			mem_lvl:14,	/* memory hierarchy level */
+			mem_op:5;	/* type of opcode */
+	};
+};
+#else
+#error "Unknown endianness"
+#endif
 
 /* type of opcode (load/store/prefetch,code) */
 #define PERF_MEM_OP_NA		0x01 /* not available */