diff mbox series

[RFC] core/opal: Add OPAL call statistics

Message ID 20200228183419.13612-1-clg@kaod.org
State RFC
Headers show
Series [RFC] core/opal: Add OPAL call statistics | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (82aed17a5468aff6b600ee1694a10a60f942c018)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot success Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Cédric Le Goater Feb. 28, 2020, 6:34 p.m. UTC
Here is a proposal to collect OPAL call statistics, counts and duration,
and track areas we could possibly improve.

With a small Linux driver to dump the stats in debugfs, here is what
we get on a P9 after boot:

OPAL_CONSOLE_WRITE : #22318 0/0/47
OPAL_RTC_READ : #9 0/4/15
OPAL_READ_NVRAM : #3468 0/0/6
OPAL_HANDLE_INTERRUPT : #4724 0/57/10026
OPAL_POLL_EVENTS : #508 2/141/10033
OPAL_PCI_CONFIG_READ_BYTE : #3623 0/0/4
OPAL_PCI_CONFIG_READ_HALF_WORD : #5579 0/0/8
OPAL_PCI_CONFIG_READ_WORD : #6156 0/0/7
OPAL_PCI_CONFIG_WRITE_BYTE : #2 0/0/0
OPAL_PCI_CONFIG_WRITE_HALF_WORD : #1282 0/0/1
OPAL_PCI_CONFIG_WRITE_WORD : #1335 0/0/1
OPAL_PCI_EEH_FREEZE_STATUS : #11123 0/0/2
OPAL_CONSOLE_WRITE_BUFFER_SPACE : #139088 0/0/11
OPAL_PCI_EEH_FREEZE_CLEAR : #148 1/2/8
OPAL_PCI_PHB_MMIO_ENABLE : #22 0/0/0
OPAL_PCI_SET_PHB_MEM_WINDOW : #22 0/0/1
OPAL_PCI_MAP_PE_MMIO_WINDOW : #56 0/0/0
OPAL_PCI_SET_PE : #44 279/284/293
OPAL_PCI_SET_PELTV : #66 0/0/0
OPAL_PCI_SET_XIVE_PE : #1123 0/0/1
OPAL_GET_MSI_64 : #1120 0/0/0
OPAL_START_CPU : #238 8/21/35
OPAL_QUERY_CPU_STATUS : #357 0/11/69
OPAL_PCI_MAP_PE_DMA_WINDOW : #16 0/0/1
OPAL_PCI_MAP_PE_DMA_WINDOW_REAL : #16 0/0/1
OPAL_PCI_RESET : #35 0/471/851
OPAL62 : #6 0/10/46
OPAL_XSCOM_READ : #26 0/0/2
OPAL_XSCOM_WRITE : #8 0/0/1
OPAL_REINIT_CPUS : #4 348/8247/11061
OPAL_CHECK_TOKEN : #134112 0/0/0
OPAL_GET_MSG : #30 0/0/1
OPAL87 : #1 0/0/0
OPAL_PCI_SET_PHB_CAPI_MODE : #2 0/60/121
OPAL_SLW_SET_REG : #1080 3/3/13
OPAL_IPMI_SEND : #53 0/5/11
OPAL_IPMI_RECV : #53 0/0/2
OPAL_I2C_REQUEST : #20 6/10/19
OPAL_FLASH_READ : #10 19/10452/58305
OPAL_PRD_MSG : #1 0/3/3
OPAL_CONSOLE_FLUSH : #134079 0/0/12
OPAL_PCI_GET_PRESENCE_STATE : #7 1/1/3
OPAL_PCI_GET_POWER_STATE : #9 0/0/0
OPAL_PCI_TCE_KILL : #20 1/8/133
OPAL_NMMU_SET_PTCR : #3 253/255/257
OPAL_XIVE_RESET : #3 0/114709/115403
OPAL_XIVE_GET_IRQ_INFO : #1427 0/0/6
OPAL_XIVE_SET_IRQ_CONFIG : #1113 0/125/2810
OPAL_XIVE_GET_QUEUE_INFO : #240 0/0/2
OPAL_XIVE_SET_QUEUE_INFO : #360 0/60/1216
OPAL_XIVE_ALLOCATE_VP_BLOCK : #2 0/59/60
OPAL_XIVE_GET_VP_INFO : #240 0/0/0
OPAL_XIVE_SET_VP_INFO : #360 0/298/3080
OPAL_XIVE_ALLOCATE_IRQ : #240 0/0/3
OPAL140 : #119 0/253/1109
OPAL_IMC_COUNTERS_INIT : #60 9/10/20
OPAL_IMC_COUNTERS_STOP : #36 0/0/2
OPAL_PCI_GET_PBCQ_TUNNEL_BAR : #1 2/2/2
OPAL_PCI_SET_PBCQ_TUNNEL_BAR : #1 1/1/1
OPAL_NX_COPROC_INIT : #2 3/4/5

Signed-off-by: Cédric Le Goater <clg@kaod.org>
---
 core/opal.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Nicholas Piggin Feb. 29, 2020, 9:27 a.m. UTC | #1
Cédric Le Goater's on February 29, 2020 4:34 am:
> Here is a proposal to collect OPAL call statistics, counts and duration,
> and track areas we could possibly improve.
> 
> With a small Linux driver to dump the stats in debugfs, here is what
> we get on a P9 after boot:

Seems interesting... you could just do it all on the Linux side though.
Any reason you want these stats? I'd like to try get basic perf and
possibly even ftrace support for OPAL (driven from Linux). It's still
a way off but coming along slowly.

Thanks,
Nick
Cédric Le Goater Feb. 29, 2020, 10:09 a.m. UTC | #2
On 2/29/20 10:27 AM, Nicholas Piggin wrote:
> Cédric Le Goater's on February 29, 2020 4:34 am:
>> Here is a proposal to collect OPAL call statistics, counts and duration,
>> and track areas we could possibly improve.
>>
>> With a small Linux driver to dump the stats in debugfs, here is what
>> we get on a P9 after boot:
> 
> Seems interesting... you could just do it all on the Linux side though.

I thought we might collect more data from OPAL in opal_exit. 

> Any reason you want these stats? 

HW configuration (XIVE) is in mind but it could be applied to other 
areas.

1. To see which calls are being used and how frequently 
2. to track average and max values which would reflect issues in 
   OPAL, locking or polling.
3. to see impact of some changes

> I'd like to try get basic perf and possibly even ftrace support for 
> OPAL (driven from Linux). It's still a way off but coming along slowly.

Nice. 

Thanks,

C.
Naveen N. Rao March 12, 2020, 2:09 p.m. UTC | #3
Cédric Le Goater wrote:
> On 2/29/20 10:27 AM, Nicholas Piggin wrote:
>> Cédric Le Goater's on February 29, 2020 4:34 am:
>>> Here is a proposal to collect OPAL call statistics, counts and duration,
>>> and track areas we could possibly improve.
>>>
>>> With a small Linux driver to dump the stats in debugfs, here is what
>>> we get on a P9 after boot:
>> 
>> Seems interesting... you could just do it all on the Linux side though.
> 
> I thought we might collect more data from OPAL in opal_exit. 

As Nick points out, this can be done from Linux through the use of 
tracepoints. We already have similar statistics for hcalls through a 
perf script. A similar script should be able to support OPAL calls.

See:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/scripts/python/powerpc-hcalls.py

> 
>> Any reason you want these stats? 
> 
> HW configuration (XIVE) is in mind but it could be applied to other 
> areas.
> 
> 1. To see which calls are being used and how frequently 
> 2. to track average and max values which would reflect issues in 
>    OPAL, locking or polling.
> 3. to see impact of some changes
> 
>> I'd like to try get basic perf and possibly even ftrace support for 
>> OPAL (driven from Linux). It's still a way off but coming along slowly.

By 'perf', do you mean PMU support?
I guess we already have basic _mcount() support in skiboot. Do you plan 
to have this data be fed to Linux ftrace in some manner? Would be good 
to hear your plans.


Thanks,
Naveen
Cédric Le Goater March 13, 2020, 2:35 p.m. UTC | #4
On 3/12/20 3:09 PM, Naveen N. Rao wrote:
> Cédric Le Goater wrote:
>> On 2/29/20 10:27 AM, Nicholas Piggin wrote:
>>> Cédric Le Goater's on February 29, 2020 4:34 am:
>>>> Here is a proposal to collect OPAL call statistics, counts and duration,
>>>> and track areas we could possibly improve.
>>>>
>>>> With a small Linux driver to dump the stats in debugfs, here is what
>>>> we get on a P9 after boot:
>>>
>>> Seems interesting... you could just do it all on the Linux side though.
>>
>> I thought we might collect more data from OPAL in opal_exit. 
> 
> As Nick points out, this can be done from Linux through the use of tracepoints. We already have similar statistics for hcalls through a perf script. A similar script should be able to support OPAL calls.
> 
> See:
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/scripts/python/powerpc-hcalls.py

Thanks,


I need to go a little deeper to collect the statistics  I am 
interested in. Some low level HW procedures do polling.

I have cooked a set of routines to collect statistics on 
function calls in :

	struct stat_range {
		uint64_t count;
		uint64_t sum;
		uint64_t min;
		uint64_t max;
	};

	struct stat {
		const char *name;
		uint64_t nr_ranges;
		uint64_t count;
		struct stat_range all;
		struct stat_range ranges[STAT_NR_RANGES];
	};

The stat structure addresses are exported in the DT under 
"ibm,opal/stats" and the values are exposed to user space 
using a generic sysfs driver. 

It's simple and good enough for my needs. 

Cheers,

C.

 
> 
>>
>>> Any reason you want these stats? 
>>
>> HW configuration (XIVE) is in mind but it could be applied to other areas.
>>
>> 1. To see which calls are being used and how frequently 2. to track average and max values which would reflect issues in    OPAL, locking or polling.
>> 3. to see impact of some changes
>>
>>> I'd like to try get basic perf and possibly even ftrace support for OPAL (driven from Linux). It's still a way off but coming along slowly.
> 
> By 'perf', do you mean PMU support?
> I guess we already have basic _mcount() support in skiboot. Do you plan to have this data be fed to Linux ftrace in some manner? Would be good to hear your plans.
> 
> 
> Thanks,
> Naveen
>
Oliver O'Halloran March 30, 2020, 7:12 a.m. UTC | #5
On Sat, Mar 14, 2020 at 5:14 AM Cédric Le Goater <clg@kaod.org> wrote:
>
> On 3/12/20 3:09 PM, Naveen N. Rao wrote:
> > Cédric Le Goater wrote:
> >> On 2/29/20 10:27 AM, Nicholas Piggin wrote:
> >>> Cédric Le Goater's on February 29, 2020 4:34 am:
> >>>> Here is a proposal to collect OPAL call statistics, counts and duration,
> >>>> and track areas we could possibly improve.
> >>>>
> >>>> With a small Linux driver to dump the stats in debugfs, here is what
> >>>> we get on a P9 after boot:
> >>>
> >>> Seems interesting... you could just do it all on the Linux side though.
> >>
> >> I thought we might collect more data from OPAL in opal_exit.
> >
> > As Nick points out, this can be done from Linux through the use of tracepoints. We already have similar statistics for hcalls through a perf script. A similar script should be able to support OPAL calls.
> >
> > See:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/scripts/python/powerpc-hcalls.py
>
> Thanks,
>
>
> I need to go a little deeper to collect the statistics  I am
> interested in. Some low level HW procedures do polling.
>
> I have cooked a set of routines to collect statistics on
> function calls in :
>
>         struct stat_range {
>                 uint64_t count;
>                 uint64_t sum;
>                 uint64_t min;
>                 uint64_t max;
>         };
>
>         struct stat {
>                 const char *name;
>                 uint64_t nr_ranges;
>                 uint64_t count;
>                 struct stat_range all;
>                 struct stat_range ranges[STAT_NR_RANGES];
>         };
>
> The stat structure addresses are exported in the DT under
> "ibm,opal/stats" and the values are exposed to user space
> using a generic sysfs driver.
>
> It's simple and good enough for my needs.

If you're going to do this then put it in /ibm,opal/exports/ with
hdat_map and friends. That'll work out of the box with existing
kernels.
Cédric Le Goater April 1, 2020, 9:02 a.m. UTC | #6
Hello, 

On 3/30/20 9:12 AM, Oliver O'Halloran wrote:
> On Sat, Mar 14, 2020 at 5:14 AM Cédric Le Goater <clg@kaod.org> wrote:
>>
>> On 3/12/20 3:09 PM, Naveen N. Rao wrote:
>>> Cédric Le Goater wrote:
>>>> On 2/29/20 10:27 AM, Nicholas Piggin wrote:
>>>>> Cédric Le Goater's on February 29, 2020 4:34 am:
>>>>>> Here is a proposal to collect OPAL call statistics, counts and duration,
>>>>>> and track areas we could possibly improve.
>>>>>>
>>>>>> With a small Linux driver to dump the stats in debugfs, here is what
>>>>>> we get on a P9 after boot:
>>>>>
>>>>> Seems interesting... you could just do it all on the Linux side though.
>>>>
>>>> I thought we might collect more data from OPAL in opal_exit.
>>>
>>> As Nick points out, this can be done from Linux through the use of tracepoints. We already have similar statistics for hcalls through a perf script. A similar script should be able to support OPAL calls.
>>>
>>> See:
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/perf/scripts/python/powerpc-hcalls.py
>>
>> Thanks,
>>
>>
>> I need to go a little deeper to collect the statistics  I am
>> interested in. Some low level HW procedures do polling.
>>
>> I have cooked a set of routines to collect statistics on
>> function calls in :
>>
>>         struct stat_range {
>>                 uint64_t count;
>>                 uint64_t sum;
>>                 uint64_t min;
>>                 uint64_t max;
>>         };
>>
>>         struct stat {
>>                 const char *name;
>>                 uint64_t nr_ranges;
>>                 uint64_t count;
>>                 struct stat_range all;
>>                 struct stat_range ranges[STAT_NR_RANGES];
>>         };
>>
>> The stat structure addresses are exported in the DT under
>> "ibm,opal/stats" and the values are exposed to user space
>> using a generic sysfs driver.
>>
>> It's simple and good enough for my needs.
> 
> If you're going to do this then put it in /ibm,opal/exports/ with
> hdat_map and friends. That'll work out of the box with existing
> kernels.

It's a bit more complex. 

Here is how the DT looks like on a boston : 

  root@boss01:~# ls  /proc/device-tree/ibm,opal/stats/
  '#address-cells'   phandle        stat@0   stat@2   stat@4   stat@6   stat@8   stat@a   stat@c
   name             '#size-cells'   stat@1   stat@3   stat@5   stat@7   stat@9   stat@b   stat@d

  root@boss01:~# ls  /proc/device-tree/ibm\,opal/stats/stat@0/
  addr  compatible  ibm,chip-id  label  name  phandle


and the sysfs files :

  root@boss01:~# ls  /sys/firmware/opal/stats/
  XIVE_EQC_SCRUB-0  XIVE_IVC_SCRUB-0  XIVE_PC_CACHE_KILL-0  XIVE_SYNC-0  XIVE_VC_CACHE_KILL-0  XIVE_VPC_SCRUB-0  XIVE_VPC_SCRUB_CLEAN-0
  XIVE_EQC_SCRUB-8  XIVE_IVC_SCRUB-8  XIVE_PC_CACHE_KILL-8  XIVE_SYNC-8  XIVE_VC_CACHE_KILL-8  XIVE_VPC_SCRUB-8  XIVE_VPC_SCRUB_CLEAN-8

  root@boss01:~# cat  /sys/firmware/opal/stats/*
                   XIVE_IVC_SCRUB-0: #1601 0/0/4 - #200 1/1/2 - #200 0/0/2 - #200 0/0/2 - #200 0/1/4 - #200 0/0/1 - #200 0/0/3 - #200 0/0/1 - #200 0/0/1 - #1 0/0/0 - #0 0/0/0 - 
                   XIVE_IVC_SCRUB-8: #551 0/1/5 - #200 1/1/2 - #200 0/1/5 - #151 0/0/1 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
               XIVE_PC_CACHE_KILL-0: #3 0/0/0 - #3 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
               XIVE_PC_CACHE_KILL-8: #3 0/0/0 - #3 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
                        XIVE_SYNC-0: #2628 0/9/1457 - #200 0/0/0 - #200 0/0/0 - #200 0/0/0 - #28 0/0/0 - #200 0/0/1 - #200 0/0/0 - #200 0/128/1457 - #200 0/0/1 - #200 0/0/1 - #200 0/0/1 - 
                        XIVE_SYNC-8: #536 0/36/1458 - #200 1/1/1 - #200 1/95/1458 - #136 0/0/1 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
               XIVE_VC_CACHE_KILL-0: #3 0/0/0 - #3 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
               XIVE_VC_CACHE_KILL-8: #3 0/0/0 - #3 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
             XIVE_VPC_SCRUB_CLEAN-0: #64 0/3/11 - #64 0/3/11 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 
             XIVE_VPC_SCRUB_CLEAN-8: #64 1/3/8 - #64 1/3/8 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - #0 0/0/0 - 

Nothing unexpected from the figures yet.


I can send out patches if this is interesting.

C.
diff mbox series

Patch

diff --git a/core/opal.c b/core/opal.c
index dc9944dcac5d..5712a42ebe94 100644
--- a/core/opal.c
+++ b/core/opal.c
@@ -44,6 +44,27 @@  static uint64_t opal_dynamic_events;
 extern uint32_t attn_trigger;
 extern uint32_t hir_trigger;
 
+/* Call stats */
+struct opal_call_stat {
+	uint64_t count;
+	uint64_t sum;
+	uint64_t min;
+	uint64_t max;
+};
+
+static struct opal_call_stat opal_call_stats[OPAL_LAST+1];
+
+static void opal_call_stat_update(uint64_t token, uint64_t call_time)
+{
+	struct opal_call_stat *s = &opal_call_stats[token];
+
+	s->count++;
+	s->sum += call_time;
+	if (call_time > s->max)
+		s->max = call_time;
+	if (call_time < s->min)
+		s->min = call_time;
+}
 
 void opal_table_init(void)
 {
@@ -55,6 +76,10 @@  void opal_table_init(void)
 	while(s < e) {
 		opal_branch_table[s->token] = function_entry_address(s->func);
 		opal_num_args[s->token] = s->nargs;
+		opal_call_stats[s->token].count = 0;
+		opal_call_stats[s->token].min = 0xFFFFFFFFF;
+		opal_call_stats[s->token].max = 0;
+		opal_call_stats[s->token].sum = 0;
 		s++;
 	}
 }
@@ -190,6 +215,8 @@  int64_t opal_exit_check(int64_t retval, struct stack_frame *eframe)
 		      call_time, token);
 	}
 
+	opal_call_stat_update(token, tb_to_usecs(now - cpu->entered_opal_call_at));
+
 	cpu->current_token = 0;
 
 	return retval;
@@ -420,6 +447,9 @@  void add_opal_node(void)
 	add_opal_firmware_node();
 	add_associativity_ref_point();
 	memcons_add_properties();
+
+	dt_add_property_u64(opal_node, "ibm,opal-call-stats",
+			    (uint64_t) &opal_call_stats);
 }
 
 static struct lock evt_lock = LOCK_UNLOCKED;