Message ID | DB6PR0801MB205326EE1564C219082724A983820@DB6PR0801MB2053.eurprd08.prod.outlook.com |
---|---|
State | New |
Headers | show |
On Wednesday 16 August 2017 08:52 PM, Wilco Dijkstra wrote: > Alexander Monakov wrote: >> I suggest using "reciprocal throughput" if you're looking for a short term >> for 'independent executions per unit time'. It's easier to recognize and >> already used in practice (e.g. in docs by Agner Fog). > > Right what about this? > > "workload-spec2006.wrf": { > "reciprocal throughput (ns)": 20, > "latency (ns)": 50, > "throughput (iters/s)": 5.0e+07 Please drop the spaces and special chars in the key names; they need to validate against benchout.schema and IIRC it doesn't allow spaces. Simple reciprocal-throughput, latency, etc. ought to be sufficient. > } > > This leads to a question, some targets use an odd header hp-timing.h. What units > does this use? Or is it completely undefined (and could potentially change between > GLIBC versions)? These change between architectures and may change between glibc versions if an architecture adds a hardware time source read later for example. Siddhesh > > > ChangeLog: > 2017-08-16 Wilco Dijkstra <wdijkstr@arm.com> > > * benchtests/bench-skeleton.c (main): Add support for > latency benchmarking. > * benchtests/scripts/bench.py: Add support for latency benchmarking. > -- > > diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c > index 3c6dad705594ac0a53edcb4e09686252c13127cf..48287be93b432b3acfc2431d1f7959bd00815b3b 100644 > --- a/benchtests/bench-skeleton.c > +++ b/benchtests/bench-skeleton.c > @@ -71,8 +71,10 @@ main (int argc, char **argv) > bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0; > double d_total_i = 0; > timing_t total = 0, max = 0, min = 0x7fffffffffffffff; > + timing_t throughput = 0, latency = 0; > int64_t c = 0; > uint64_t cur; > + BENCH_VARS; > while (1) > { > if (is_bench) > @@ -86,7 +88,16 @@ main (int argc, char **argv) > BENCH_FUNC (v, i); > TIMING_NOW (end); > TIMING_DIFF (cur, start, end); > - TIMING_ACCUM (total, cur); > + TIMING_ACCUM (throughput, cur); > + > + TIMING_NOW (start); > + for (k = 0; k < iters; k++) > + for (i = 0; i < NUM_SAMPLES (v); i++) > + BENCH_FUNC_LAT (v, i); > + TIMING_NOW (end); > + TIMING_DIFF (cur, start, end); > + TIMING_ACCUM (latency, cur); > + > d_total_i += iters * NUM_SAMPLES (v); > } > else > @@ -131,12 +142,18 @@ main (int argc, char **argv) > /* Begin variant. */ > json_attr_object_begin (&json_ctx, VARIANT (v)); > > - json_attr_double (&json_ctx, "duration", d_total_s); > - json_attr_double (&json_ctx, "iterations", d_total_i); > if (is_bench) > - json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i); > + { > + json_attr_double (&json_ctx, "reciprocal throughput (ns)", > + throughput / d_total_i); > + json_attr_double (&json_ctx, "latency (ns)", latency / d_total_i); > + json_attr_double (&json_ctx, "throughput (iters/s)", > + d_total_i / throughput * 1000000000.0); > + } > else > { > + json_attr_double (&json_ctx, "duration", d_total_s); > + json_attr_double (&json_ctx, "iterations", d_total_i); > json_attr_double (&json_ctx, "max", max / d_iters); > json_attr_double (&json_ctx, "min", min / d_iters); > json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); > diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py > index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755 > --- a/benchtests/scripts/bench.py > +++ b/benchtests/scripts/bench.py > @@ -45,7 +45,7 @@ DEFINES_TEMPLATE = ''' > # variant is represented by the _VARIANT structure. The ARGS structure > # represents a single set of arguments. > STRUCT_TEMPLATE = ''' > -#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s) > +#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s) > > struct args > { > @@ -84,7 +84,9 @@ EPILOGUE = ''' > #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing) > #define RESULT_ACCUM(r, v, i, old, new) \\ > ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1)) > -#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);}) > +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );}) > +#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);}) > +#define BENCH_VARS %(defvar)s > #define FUNCNAME "%(func)s" > #include "bench-skeleton.c"''' > > @@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals): > # If we have a return value from the function, make sure it is > # assigned to prevent the compiler from optimizing out the > # call. > + getret = '' > + latarg = '' > + defvar = '' > + > if directives['ret']: > print('static %s volatile ret;' % directives['ret']) > - getret = 'ret = ' > - else: > - getret = '' > + print('static %s zero __attribute__((used)) = 0;' % directives['ret']) > + getret = 'ret = func_res = ' > + latarg = 'func_res * zero +' > + defvar = '%s func_res = 0;' % directives['ret'] > > # Test initialization. > if directives['init']: > print('#define BENCH_INIT %s' % directives['init']) > > - print(EPILOGUE % {'getret': getret, 'func': func}) > + print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar }) > > > def _print_arg_data(func, directives, all_vals): > >
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c index 3c6dad705594ac0a53edcb4e09686252c13127cf..48287be93b432b3acfc2431d1f7959bd00815b3b 100644 --- a/benchtests/bench-skeleton.c +++ b/benchtests/bench-skeleton.c @@ -71,8 +71,10 @@ main (int argc, char **argv) bool is_bench = strncmp (VARIANT (v), "workload-", 9) == 0; double d_total_i = 0; timing_t total = 0, max = 0, min = 0x7fffffffffffffff; + timing_t throughput = 0, latency = 0; int64_t c = 0; uint64_t cur; + BENCH_VARS; while (1) { if (is_bench) @@ -86,7 +88,16 @@ main (int argc, char **argv) BENCH_FUNC (v, i); TIMING_NOW (end); TIMING_DIFF (cur, start, end); - TIMING_ACCUM (total, cur); + TIMING_ACCUM (throughput, cur); + + TIMING_NOW (start); + for (k = 0; k < iters; k++) + for (i = 0; i < NUM_SAMPLES (v); i++) + BENCH_FUNC_LAT (v, i); + TIMING_NOW (end); + TIMING_DIFF (cur, start, end); + TIMING_ACCUM (latency, cur); + d_total_i += iters * NUM_SAMPLES (v); } else @@ -131,12 +142,18 @@ main (int argc, char **argv) /* Begin variant. */ json_attr_object_begin (&json_ctx, VARIANT (v)); - json_attr_double (&json_ctx, "duration", d_total_s); - json_attr_double (&json_ctx, "iterations", d_total_i); if (is_bench) - json_attr_double (&json_ctx, "throughput", d_total_s / d_total_i); + { + json_attr_double (&json_ctx, "reciprocal throughput (ns)", + throughput / d_total_i); + json_attr_double (&json_ctx, "latency (ns)", latency / d_total_i); + json_attr_double (&json_ctx, "throughput (iters/s)", + d_total_i / throughput * 1000000000.0); + } else { + json_attr_double (&json_ctx, "duration", d_total_s); + json_attr_double (&json_ctx, "iterations", d_total_i); json_attr_double (&json_ctx, "max", max / d_iters); json_attr_double (&json_ctx, "min", min / d_iters); json_attr_double (&json_ctx, "mean", d_total_s / d_total_i); diff --git a/benchtests/scripts/bench.py b/benchtests/scripts/bench.py index 8c1c9eeb2bc67a16cb8a8e010fd2b8a2ef8ab6df..b7ccb7c8c2bf1822202a2377dfb0675516115cc5 100755 --- a/benchtests/scripts/bench.py +++ b/benchtests/scripts/bench.py @@ -45,7 +45,7 @@ DEFINES_TEMPLATE = ''' # variant is represented by the _VARIANT structure. The ARGS structure # represents a single set of arguments. STRUCT_TEMPLATE = ''' -#define CALL_BENCH_FUNC(v, i) %(func)s (%(func_args)s) +#define CALL_BENCH_FUNC(v, i, x) %(func)s (x %(func_args)s) struct args { @@ -84,7 +84,9 @@ EPILOGUE = ''' #define RESULT(__v, __i) (variants[(__v)].in[(__i)].timing) #define RESULT_ACCUM(r, v, i, old, new) \\ ((RESULT ((v), (i))) = (RESULT ((v), (i)) * (old) + (r)) / ((new) + 1)) -#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j);}) +#define BENCH_FUNC(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, );}) +#define BENCH_FUNC_LAT(i, j) ({%(getret)s CALL_BENCH_FUNC (i, j, %(latarg)s);}) +#define BENCH_VARS %(defvar)s #define FUNCNAME "%(func)s" #include "bench-skeleton.c"''' @@ -122,17 +124,22 @@ def gen_source(func, directives, all_vals): # If we have a return value from the function, make sure it is # assigned to prevent the compiler from optimizing out the # call. + getret = '' + latarg = '' + defvar = '' + if directives['ret']: print('static %s volatile ret;' % directives['ret']) - getret = 'ret = ' - else: - getret = '' + print('static %s zero __attribute__((used)) = 0;' % directives['ret']) + getret = 'ret = func_res = ' + latarg = 'func_res * zero +' + defvar = '%s func_res = 0;' % directives['ret'] # Test initialization. if directives['init']: print('#define BENCH_INIT %s' % directives['init']) - print(EPILOGUE % {'getret': getret, 'func': func}) + print(EPILOGUE % {'getret': getret, 'func': func, 'latarg': latarg, 'defvar': defvar }) def _print_arg_data(func, directives, all_vals):