Message ID | 20200917163544.142593-5-clg@kaod.org |
---|---|
State | RFC |
Headers | show |
Series | OPAL debugfs calls | expand |
Context | Check | Description |
---|---|---|
snowpatch_ozlabs/apply_patch | success | Successfully applied on branch master (d362ae4f4c521a7faffb1befe2fbba467f2c4d18) |
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot | success | Test snowpatch/job/snowpatch-skiboot on branch master |
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco | success | Signed-off-by present |
On Fri, Sep 18, 2020 at 2:35 AM Cédric Le Goater <clg@kaod.org> wrote: > > This is a set of simple routines to collect statistics on function > calls. It can used to measure low level HW operations and track > possible issues in locking, polling, etc. I don't mind the idea, but I'm a bit iffy about implementing a whole new infrastructure for it. We've already got the per-core trace buffer thing which fills more or less the same role and we barely use it. If we could build something like this on top of the tracing infra I think it'd be better overall since the trace buffer provides timestamping and allows us to supply additional context along with the raw durations, etc. I think the main reason why we haven't used the trace stuff much is because of how cumbersome it is to use. You need to define an ABI structure then write a bunch of code in OPAL to pack the data into that structure and a bunch more code in the userspace tool to de-serialise and render to text in the userspace tool. Maybe we can use your OPAL_DEBUG_READ call just use native structures and have skiboot render them to text on-demand. > Signed-off-by: Cédric Le Goater <clg@kaod.org> > --- > include/stat.h | 49 ++++++++++++++++++++++++ > core/stat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++ > core/Makefile.inc | 2 +- > 3 files changed, 148 insertions(+), 1 deletion(-) > create mode 100644 include/stat.h > create mode 100644 core/stat.c > > diff --git a/include/stat.h b/include/stat.h > new file mode 100644 > index 000000000000..dc020862b199 > --- /dev/null > +++ b/include/stat.h > @@ -0,0 +1,49 @@ > +// SPDX-License-Identifier: Apache-2.0 > +/* > + * Statistics > + * > + * Copyright 2020 IBM Corp. > + */ > + > +#ifndef __STAT_H > +#define __STAT_H > + > +#include <lock.h> > + > +#define STAT_NR_RANGES 10 > +#define STAT_RANGE_WIDTH 200 /* values */ > +#define STAT_MAX_CALL_TIME 10 /* usecs */ unused > + > +struct stat_range { > + uint64_t count; > + uint64_t sum; > + uint64_t min; > + uint64_t max; > +}; The names here suggest we want to collect arbitrary stats, but as far as I can tell these are all used to store durations? Is that intentional? > +struct stat { > + const char *name; > + uint64_t max_call_time; > + uint64_t nr_ranges; > + uint64_t count; > + struct stat_range all; > + struct stat_range ranges[STAT_NR_RANGES]; > + > + struct lock lock; > +}; It might be a good idea to drop the lock and have the caller be responsible for ensuring mutual exclusion. Anything that we're interested in collecting statistics about is probably going to have a per-structure lock anyway (e.g. XIVEs, PHBs) > +extern void stat_init(struct stat *s, const char *name, uint64_t max_call_time); > +extern void stat_update(struct stat *s, uint64_t call_time); > +extern int stat_printf(struct stat *s, void *buf, int size); > + > +#define stat_call(expr, s) \ > +({ \ > + uint64_t before = mftb(); \ > + int64_t __rc = (expr); \ > + uint64_t call_time = tb_to_usecs(mftb() - before); \ > + stat_update(s, call_time); \ > + __rc; \ > +}) > + > +#endif /* __STAT_H */ > diff --git a/core/stat.c b/core/stat.c > new file mode 100644 > index 000000000000..6d1ddf2f1a80 > --- /dev/null > +++ b/core/stat.c > @@ -0,0 +1,98 @@ > +// SPDX-License-Identifier: Apache-2.0 > +/* > + * Statistics > + * > + * Copyright 2020 IBM Corp. > + */ > + > +#define pr_fmt(fmt) "STAT: " fmt > + > +#include <cpu.h> > +#include <opal.h> > +#include <stat.h> > +#include <stack.h> > + > +static void stat_range_reset(struct stat_range *r) > +{ > + r->count = 0; > + r->sum = 0; > + r->max = 0; > + r->min = 0xFFFFFFFFF; > +} > + > +void stat_init(struct stat *s, const char *name, uint64_t max_call_time) > +{ > + int i; > + > + s->name = name; > + s->max_call_time = max_call_time; > + s->nr_ranges = ARRAY_SIZE(s->ranges); > + for (i = 0; i < s->nr_ranges; i++) > + stat_range_reset(&s->ranges[i]); > + stat_range_reset(&s->all); > + init_lock(&s->lock); > +} > + > +static void stat_range_update(struct stat_range *r, uint64_t call_time) > +{ > + r->sum += call_time; > + if (call_time > r->max) > + r->max = call_time; > + if (call_time < r->min) > + r->min = call_time; > + r->count++; > +} > + > +#define stat_range_index(s) \ > + ((((s)->count) / STAT_RANGE_WIDTH) % (s)->nr_ranges) > + > +void stat_update(struct stat *s, uint64_t call_time) > +{ > + int ridx; > + struct stat_range *r; > + > + lock(&s->lock); > + > + ridx = stat_range_index(s); > + r = &s->ranges[ridx]; > + > + if (!(r->count % STAT_RANGE_WIDTH)) > + stat_range_reset(r); > + > + stat_range_update(r, call_time); > + if (s->max_call_time && call_time > s->max_call_time && > + call_time > s->all.max) { > + /* TODO: simulators need more time */ > + prlog(PR_WARNING, "CPU 0x%04x new %s maximum %lld at #%lld\n", > + this_cpu()->pir, s->name, r->max, s->count); > + backtrace(); > + } > + stat_range_update(&s->all, call_time); > + s->count++; > + > + unlock(&s->lock); > +} > +
diff --git a/include/stat.h b/include/stat.h new file mode 100644 index 000000000000..dc020862b199 --- /dev/null +++ b/include/stat.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Statistics + * + * Copyright 2020 IBM Corp. + */ + +#ifndef __STAT_H +#define __STAT_H + +#include <lock.h> + +#define STAT_NR_RANGES 10 +#define STAT_RANGE_WIDTH 200 /* values */ +#define STAT_MAX_CALL_TIME 10 /* usecs */ + +struct stat_range { + uint64_t count; + uint64_t sum; + uint64_t min; + uint64_t max; +}; + +struct stat { + const char *name; + uint64_t max_call_time; + uint64_t nr_ranges; + uint64_t count; + struct stat_range all; + struct stat_range ranges[STAT_NR_RANGES]; + + struct lock lock; +}; + + +extern void stat_init(struct stat *s, const char *name, uint64_t max_call_time); +extern void stat_update(struct stat *s, uint64_t call_time); +extern int stat_printf(struct stat *s, void *buf, int size); + +#define stat_call(expr, s) \ +({ \ + uint64_t before = mftb(); \ + int64_t __rc = (expr); \ + uint64_t call_time = tb_to_usecs(mftb() - before); \ + stat_update(s, call_time); \ + __rc; \ +}) + +#endif /* __STAT_H */ diff --git a/core/stat.c b/core/stat.c new file mode 100644 index 000000000000..6d1ddf2f1a80 --- /dev/null +++ b/core/stat.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: Apache-2.0 +/* + * Statistics + * + * Copyright 2020 IBM Corp. + */ + +#define pr_fmt(fmt) "STAT: " fmt + +#include <cpu.h> +#include <opal.h> +#include <stat.h> +#include <stack.h> + +static void stat_range_reset(struct stat_range *r) +{ + r->count = 0; + r->sum = 0; + r->max = 0; + r->min = 0xFFFFFFFFF; +} + +void stat_init(struct stat *s, const char *name, uint64_t max_call_time) +{ + int i; + + s->name = name; + s->max_call_time = max_call_time; + s->nr_ranges = ARRAY_SIZE(s->ranges); + for (i = 0; i < s->nr_ranges; i++) + stat_range_reset(&s->ranges[i]); + stat_range_reset(&s->all); + init_lock(&s->lock); +} + +static void stat_range_update(struct stat_range *r, uint64_t call_time) +{ + r->sum += call_time; + if (call_time > r->max) + r->max = call_time; + if (call_time < r->min) + r->min = call_time; + r->count++; +} + +#define stat_range_index(s) \ + ((((s)->count) / STAT_RANGE_WIDTH) % (s)->nr_ranges) + +void stat_update(struct stat *s, uint64_t call_time) +{ + int ridx; + struct stat_range *r; + + lock(&s->lock); + + ridx = stat_range_index(s); + r = &s->ranges[ridx]; + + if (!(r->count % STAT_RANGE_WIDTH)) + stat_range_reset(r); + + stat_range_update(r, call_time); + if (s->max_call_time && call_time > s->max_call_time && + call_time > s->all.max) { + /* TODO: simulators need more time */ + prlog(PR_WARNING, "CPU 0x%04x new %s maximum %lld at #%lld\n", + this_cpu()->pir, s->name, r->max, s->count); + backtrace(); + } + stat_range_update(&s->all, call_time); + s->count++; + + unlock(&s->lock); +} + +static int stat_range_printf(struct stat_range *r, void *buf, int size) +{ + return snprintf(buf, size, "#%lld %lld/%lld/%lld - ", + r->count, + r->count ? r->min : 0, + r->sum / r->count, + r->max); +} + +int stat_printf(struct stat *s, void *buf, int size) +{ + int n = 0; + int i; + + if (s->count) { + n += snprintf(buf + n, size - n, "%35s: ", s->name); + n += stat_range_printf(&s->all, buf + n, size - n); + for (i = 0; i < s->nr_ranges; i++) + n += stat_range_printf(&s->ranges[i], buf + n, size - n); + n += snprintf(buf + n, size - n,"\n"); + } + return n; +} diff --git a/core/Makefile.inc b/core/Makefile.inc index ed7003ed7a7a..326fc7917631 100644 --- a/core/Makefile.inc +++ b/core/Makefile.inc @@ -13,7 +13,7 @@ CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o CORE_OBJS += flash-firmware-versions.o opal-dump.o -CORE_OBJS += opal-debug.o +CORE_OBJS += opal-debug.o stat.o ifeq ($(SKIBOOT_GCOV),1) CORE_OBJS += gcov-profiling.o
This is a set of simple routines to collect statistics on function calls. It can used to measure low level HW operations and track possible issues in locking, polling, etc. Signed-off-by: Cédric Le Goater <clg@kaod.org> --- include/stat.h | 49 ++++++++++++++++++++++++ core/stat.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++ core/Makefile.inc | 2 +- 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 include/stat.h create mode 100644 core/stat.c