New module to import and process bench.out
diff mbox

Message ID 20140612113706.GL10378@spoyarek.pnq.redhat.com
State New
Headers show

Commit Message

Siddhesh Poyarekar June 12, 2014, 11:37 a.m. UTC
Hi,

This is the beginning of a module to import and process benchmark
outputs.  Currently this is tied to the bench.out format, but in
future this needs to be generalized.

The module currently supports importing of a bench.out and validating
it against a schema file.  I have also added a function that
compresses detailed timings by grouping them into their means based on
how close they are to each other.

The idea here is to have a set of routines that benchmark consumers
may find useful to build their own analysis tools.  I have altered
validate_bench to use this module too.

Siddhesh

	* benchtests/scripts/import_bench.py: New file.
	* benchtests/scripts/validate_benchout.py: Import import_bench
	instead of jsonschema.
	(validate_bench): Remove function.
	(main): Use import_bench.

Comments

Siddhesh Poyarekar June 24, 2014, 9:07 a.m. UTC | #1
Ping!

On Thu, Jun 12, 2014 at 05:07:06PM +0530, Siddhesh Poyarekar wrote:
> Hi,
> 
> This is the beginning of a module to import and process benchmark
> outputs.  Currently this is tied to the bench.out format, but in
> future this needs to be generalized.
> 
> The module currently supports importing of a bench.out and validating
> it against a schema file.  I have also added a function that
> compresses detailed timings by grouping them into their means based on
> how close they are to each other.
> 
> The idea here is to have a set of routines that benchmark consumers
> may find useful to build their own analysis tools.  I have altered
> validate_bench to use this module too.
> 
> Siddhesh
> 
> 	* benchtests/scripts/import_bench.py: New file.
> 	* benchtests/scripts/validate_benchout.py: Import import_bench
> 	instead of jsonschema.
> 	(validate_bench): Remove function.
> 	(main): Use import_bench.
> 
> 
> diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
> new file mode 100755
> index 0000000..ffcb775
> --- /dev/null
> +++ b/benchtests/scripts/import_bench.py
> @@ -0,0 +1,141 @@
> +#!/usr/bin/python
> +# Copyright (C) 2014 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <http://www.gnu.org/licenses/>.
> +"""Functions to import benchmark data and process it"""
> +
> +import json
> +try:
> +    import jsonschema as validator
> +except ImportError:
> +    print('Could not find jsonschema module.')
> +    raise
> +
> +
> +def mean(lst):
> +    """Compute and return mean of numbers in a list
> +
> +    The pypy average function has horrible performance, so implement our
> +    own mean function.
> +
> +    Args:
> +        lst: The list of numbers to average.
> +    Return:
> +        The mean of members in the list.
> +    """
> +    return sum(lst) / len(lst)
> +
> +
> +def split_list(bench, func, var):
> +    """ Split the list into a smaller set of more distinct points
> +
> +    Group together points such that the difference between the smallest
> +    point and the mean is less than 1/3rd of the mean.  This means that
> +    the mean is at most 1.5x the smallest member of that group.
> +
> +    mean - xmin < mean / 3
> +    i.e. 2 * mean / 3 < xmin
> +    i.e. mean < 3 * xmin / 2
> +
> +    For an evenly distributed group, the largest member will be less than
> +    twice the smallest member of the group.
> +    Derivation:
> +
> +    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
> +
> +    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
> +    and max element is xmin + (n - 1) * d
> +
> +    Now, mean < 3 * xmin / 2
> +
> +    3 * xmin > 2 * mean
> +    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
> +    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
> +    n * xmin > n * (n - 1) * d
> +    xmin > (n - 1) * d
> +    2 * xmin > xmin + (n-1) * d
> +    2 * xmin > xmax
> +
> +    Hence, proved.
> +
> +    Similarly, it is trivial to prove that for a similar aggregation by using
> +    the maximum element, the maximum element in the group must be at most 4/3
> +    times the mean.
> +
> +    Args:
> +        bench: The benchmark object
> +        func: The function name
> +        var: The function variant name
> +    """
> +    means = []
> +    lst = bench['functions'][func][var]['timings']
> +    last = len(lst) - 1
> +    while lst:
> +        for i in range(last + 1):
> +            avg = mean(lst[i:])
> +            if avg > 0.75 * lst[last]:
> +                means.insert(0, avg)
> +                lst = lst[:i]
> +                last = i - 1
> +                break
> +    bench['functions'][func][var]['timings'] = means
> +
> +
> +def do_for_all_timings(bench, callback):
> +    """Call a function for all timing objects for each function and its
> +    variants.
> +
> +    Args:
> +        bench: The benchmark object
> +        callback: The callback function
> +    """
> +    for func in bench['functions'].keys():
> +        for k in bench['functions'][func].keys():
> +            if 'timings' not in bench['functions'][func][k].keys():
> +                continue
> +
> +            callback(bench, func, k)
> +
> +
> +def compress_timings(points):
> +    """Club points with close enough values into a single mean value
> +
> +    See split_list for details on how the clubbing is done.
> +
> +    Args:
> +        points: The set of points.
> +    """
> +    do_for_all_timings(points, split_list)
> +
> +
> +def parse_bench(filename, schema_filename):
> +    """Parse the input file
> +
> +    Parse and validate the json file containing the benchmark outputs.  Return
> +    the resulting object.
> +    Args:
> +        filename: Name of the benchmark output file.
> +    Return:
> +        The bench dictionary.
> +    """
> +    with open(schema_filename, 'r') as schemafile:
> +        schema = json.load(schemafile)
> +        with open(filename, 'r') as benchfile:
> +            bench = json.load(benchfile)
> +            validator.validate(bench, schema)
> +            do_for_all_timings(bench, lambda b, f, v:
> +                    b['functions'][f][v]['timings'].sort())
> +            return bench
> diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
> index 61a8cbd..9d3a5cb 100755
> --- a/benchtests/scripts/validate_benchout.py
> +++ b/benchtests/scripts/validate_benchout.py
> @@ -27,37 +27,26 @@ import sys
>  import os
>  
>  try:
> -    import jsonschema
> +    import import_bench as bench
>  except ImportError:
> -    print('Could not find jsonschema module.  Output not validated.')
> +    print('Import Error: Output will not be validated.')
>      # Return success because we don't want the bench target to fail just
>      # because the jsonschema module was not found.
>      sys.exit(os.EX_OK)
>  
>  
> -def validate_bench(benchfile, schemafile):
> -    """Validate benchmark file
> -
> -    Validate a benchmark output file against a JSON schema.
> +def print_and_exit(message, exitcode):
> +    """Prints message to stderr and returns the exit code.
>  
>      Args:
> -        benchfile: The file name of the bench.out file.
> -        schemafile: The file name of the JSON schema file to validate
> -        bench.out against.
> +        message: The message to print
> +        exitcode: The exit code to return
>  
> -    Exceptions:
> -        jsonschema.ValidationError: When bench.out is not valid
> -        jsonschema.SchemaError: When the JSON schema is not valid
> -        IOError: If any of the files are not found.
> +    Returns:
> +        The passed exit code
>      """
> -    with open(benchfile, 'r') as bfile:
> -        with open(schemafile, 'r') as sfile:
> -            bench = json.load(bfile)
> -            schema = json.load(sfile)
> -            jsonschema.validate(bench, schema)
> -
> -    # If we reach here, we're all good.
> -    print("Benchmark output in %s is valid." % benchfile)
> +    print(message, file=sys.stderr)
> +    return exitcode
>  
>  
>  def main(args):
> @@ -73,11 +62,23 @@ def main(args):
>          Exceptions thrown by validate_bench
>      """
>      if len(args) != 2:
> -        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
> -                file=sys.stderr)
> -        return os.EX_USAGE
> +        return print_and_exit("Usage: %s <bench.out file> <bench.out schema>"
> +                % sys.argv[0], os.EX_USAGE)
> +
> +    try:
> +        bench.parse_bench(args[0], args[1])
> +    except IOError as e:
> +        return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror),
> +                os.EX_OSFILE)
> +
> +    except bench.validator.ValidationError as e:
> +        return print_and_exit("Invalid benchmark output: %s" % e.message,
> +            os.EX_DATAERR)
> +
> +    except bench.validator.SchemaError as e:
> +        return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
>  
> -    validate_bench(args[0], args[1])
> +    print("Benchmark output in %s is valid." % args[0])
>      return os.EX_OK
>  
>
Will Newton June 24, 2014, 9:22 a.m. UTC | #2
On 12 June 2014 12:37, Siddhesh Poyarekar <siddhesh@redhat.com> wrote:
> Hi,
>
> This is the beginning of a module to import and process benchmark
> outputs.  Currently this is tied to the bench.out format, but in
> future this needs to be generalized.
>
> The module currently supports importing of a bench.out and validating
> it against a schema file.  I have also added a function that
> compresses detailed timings by grouping them into their means based on
> how close they are to each other.
>
> The idea here is to have a set of routines that benchmark consumers
> may find useful to build their own analysis tools.  I have altered
> validate_bench to use this module too.
>
> Siddhesh
>
>         * benchtests/scripts/import_bench.py: New file.
>         * benchtests/scripts/validate_benchout.py: Import import_bench
>         instead of jsonschema.
>         (validate_bench): Remove function.
>         (main): Use import_bench.
>
>
> diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
> new file mode 100755
> index 0000000..ffcb775
> --- /dev/null
> +++ b/benchtests/scripts/import_bench.py
> @@ -0,0 +1,141 @@
> +#!/usr/bin/python
> +# Copyright (C) 2014 Free Software Foundation, Inc.
> +# This file is part of the GNU C Library.
> +#
> +# The GNU C Library is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU Lesser General Public
> +# License as published by the Free Software Foundation; either
> +# version 2.1 of the License, or (at your option) any later version.
> +#
> +# The GNU C Library is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +# Lesser General Public License for more details.
> +#
> +# You should have received a copy of the GNU Lesser General Public
> +# License along with the GNU C Library; if not, see
> +# <http://www.gnu.org/licenses/>.
> +"""Functions to import benchmark data and process it"""
> +
> +import json
> +try:
> +    import jsonschema as validator
> +except ImportError:
> +    print('Could not find jsonschema module.')
> +    raise
> +
> +
> +def mean(lst):
> +    """Compute and return mean of numbers in a list
> +
> +    The pypy average function has horrible performance, so implement our
> +    own mean function.

I'm not sure I really understand this comment. What is the relevance
of pypy? How bad can the performance actually be? :-/

> +
> +    Args:
> +        lst: The list of numbers to average.
> +    Return:
> +        The mean of members in the list.
> +    """
> +    return sum(lst) / len(lst)
> +
> +
> +def split_list(bench, func, var):
> +    """ Split the list into a smaller set of more distinct points
> +
> +    Group together points such that the difference between the smallest
> +    point and the mean is less than 1/3rd of the mean.  This means that
> +    the mean is at most 1.5x the smallest member of that group.
> +
> +    mean - xmin < mean / 3
> +    i.e. 2 * mean / 3 < xmin
> +    i.e. mean < 3 * xmin / 2
> +
> +    For an evenly distributed group, the largest member will be less than
> +    twice the smallest member of the group.
> +    Derivation:
> +
> +    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
> +
> +    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
> +    and max element is xmin + (n - 1) * d
> +
> +    Now, mean < 3 * xmin / 2
> +
> +    3 * xmin > 2 * mean
> +    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
> +    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
> +    n * xmin > n * (n - 1) * d
> +    xmin > (n - 1) * d
> +    2 * xmin > xmin + (n-1) * d
> +    2 * xmin > xmax
> +
> +    Hence, proved.
> +
> +    Similarly, it is trivial to prove that for a similar aggregation by using
> +    the maximum element, the maximum element in the group must be at most 4/3
> +    times the mean.
> +
> +    Args:
> +        bench: The benchmark object
> +        func: The function name
> +        var: The function variant name
> +    """
> +    means = []
> +    lst = bench['functions'][func][var]['timings']
> +    last = len(lst) - 1
> +    while lst:
> +        for i in range(last + 1):
> +            avg = mean(lst[i:])
> +            if avg > 0.75 * lst[last]:
> +                means.insert(0, avg)
> +                lst = lst[:i]
> +                last = i - 1
> +                break
> +    bench['functions'][func][var]['timings'] = means
> +
> +
> +def do_for_all_timings(bench, callback):
> +    """Call a function for all timing objects for each function and its
> +    variants.
> +
> +    Args:
> +        bench: The benchmark object
> +        callback: The callback function
> +    """
> +    for func in bench['functions'].keys():
> +        for k in bench['functions'][func].keys():
> +            if 'timings' not in bench['functions'][func][k].keys():
> +                continue
> +
> +            callback(bench, func, k)
> +
> +
> +def compress_timings(points):
> +    """Club points with close enough values into a single mean value
> +
> +    See split_list for details on how the clubbing is done.
> +
> +    Args:
> +        points: The set of points.
> +    """
> +    do_for_all_timings(points, split_list)
> +

Does it make sense to add these functions without a user? At the
moment I find it quite difficult to understand what they are doing and
an example would certainly help.

> +
> +def parse_bench(filename, schema_filename):
> +    """Parse the input file
> +
> +    Parse and validate the json file containing the benchmark outputs.  Return
> +    the resulting object.
> +    Args:
> +        filename: Name of the benchmark output file.
> +    Return:
> +        The bench dictionary.
> +    """
> +    with open(schema_filename, 'r') as schemafile:
> +        schema = json.load(schemafile)
> +        with open(filename, 'r') as benchfile:
> +            bench = json.load(benchfile)
> +            validator.validate(bench, schema)
> +            do_for_all_timings(bench, lambda b, f, v:
> +                    b['functions'][f][v]['timings'].sort())
> +            return bench
> diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
> index 61a8cbd..9d3a5cb 100755
> --- a/benchtests/scripts/validate_benchout.py
> +++ b/benchtests/scripts/validate_benchout.py
> @@ -27,37 +27,26 @@ import sys
>  import os
>
>  try:
> -    import jsonschema
> +    import import_bench as bench
>  except ImportError:
> -    print('Could not find jsonschema module.  Output not validated.')
> +    print('Import Error: Output will not be validated.')
>      # Return success because we don't want the bench target to fail just
>      # because the jsonschema module was not found.
>      sys.exit(os.EX_OK)
>
>
> -def validate_bench(benchfile, schemafile):
> -    """Validate benchmark file
> -
> -    Validate a benchmark output file against a JSON schema.
> +def print_and_exit(message, exitcode):
> +    """Prints message to stderr and returns the exit code.
>
>      Args:
> -        benchfile: The file name of the bench.out file.
> -        schemafile: The file name of the JSON schema file to validate
> -        bench.out against.
> +        message: The message to print
> +        exitcode: The exit code to return
>
> -    Exceptions:
> -        jsonschema.ValidationError: When bench.out is not valid
> -        jsonschema.SchemaError: When the JSON schema is not valid
> -        IOError: If any of the files are not found.
> +    Returns:
> +        The passed exit code
>      """
> -    with open(benchfile, 'r') as bfile:
> -        with open(schemafile, 'r') as sfile:
> -            bench = json.load(bfile)
> -            schema = json.load(sfile)
> -            jsonschema.validate(bench, schema)
> -
> -    # If we reach here, we're all good.
> -    print("Benchmark output in %s is valid." % benchfile)
> +    print(message, file=sys.stderr)
> +    return exitcode
>
>
>  def main(args):
> @@ -73,11 +62,23 @@ def main(args):
>          Exceptions thrown by validate_bench
>      """
>      if len(args) != 2:
> -        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
> -                file=sys.stderr)
> -        return os.EX_USAGE
> +        return print_and_exit("Usage: %s <bench.out file> <bench.out schema>"
> +                % sys.argv[0], os.EX_USAGE)
> +
> +    try:
> +        bench.parse_bench(args[0], args[1])
> +    except IOError as e:
> +        return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror),
> +                os.EX_OSFILE)
> +
> +    except bench.validator.ValidationError as e:
> +        return print_and_exit("Invalid benchmark output: %s" % e.message,
> +            os.EX_DATAERR)
> +
> +    except bench.validator.SchemaError as e:
> +        return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
>
> -    validate_bench(args[0], args[1])
> +    print("Benchmark output in %s is valid." % args[0])
>      return os.EX_OK
>
>
Siddhesh Poyarekar June 24, 2014, 10:10 a.m. UTC | #3
On Tue, Jun 24, 2014 at 10:22:22AM +0100, Will Newton wrote:
> > +def mean(lst):
> > +    """Compute and return mean of numbers in a list
> > +
> > +    The pypy average function has horrible performance, so implement our
> > +    own mean function.
> 
> I'm not sure I really understand this comment. What is the relevance
> of pypy? How bad can the performance actually be? :-/

Sorry, that should be numpy, not pypy.  I guess the comment is not
necessary; I think I just vented my anger there ;) Reimplementing it
reduced computation from minutes to seconds, so yes, there is a fair
difference.  A simple list average can give an idea of how bad it is:

[15:30][siddhesh@spoyarek tmp ]$ time python <<EOF
import numpy
print(numpy.average(range(10000000)))
EOF
4999999.5

real    0m1.387s
user    0m1.248s
sys     0m0.131s
[15:30][siddhesh@spoyarek tmp ]$ time python <<EOF
l = range(10000000)
EOF                          

real    0m0.501s
user    0m0.358s
sys     0m0.140s
[15:30][siddhesh@spoyarek tmp ]$ time python <<EOF
l = range(10000000)
print(float(sum(l)) / len(l))
EOF
4999999.5

real    0m0.510s
user    0m0.376s
sys     0m0.129s

That's rather simplistic, but it should give you the idea - the custom
function hardly takes any time above the time required to build and
allocate the list and do any other sundry operations within the
interpreter.  numpy on the other hand takes a whole lot more time.  A
random list shows a lot of variance, presumably because it takes
variable time to build the list, but the custom function comes out on
top all the time.

> > +def compress_timings(points):
> > +    """Club points with close enough values into a single mean value
> > +
> > +    See split_list for details on how the clubbing is done.
> > +
> > +    Args:
> > +        points: The set of points.
> > +    """
> > +    do_for_all_timings(points, split_list)
> > +
> 
> Does it make sense to add these functions without a user? At the
> moment I find it quite difficult to understand what they are doing and
> an example would certainly help.

I thought it might demonstrate a use case, but I guess you're right,
it won't demonstrate a use case until a script is in place to
demonstrate it.  There is already such a script (see
siddhesh/benchmarks branch, although it may be outdated right now) so
I'll just remove this function from this patch and place it in the
patch where I post that script.

Thanks,
Siddhesh

Patch
diff mbox

diff --git a/benchtests/scripts/import_bench.py b/benchtests/scripts/import_bench.py
new file mode 100755
index 0000000..ffcb775
--- /dev/null
+++ b/benchtests/scripts/import_bench.py
@@ -0,0 +1,141 @@ 
+#!/usr/bin/python
+# Copyright (C) 2014 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, see
+# <http://www.gnu.org/licenses/>.
+"""Functions to import benchmark data and process it"""
+
+import json
+try:
+    import jsonschema as validator
+except ImportError:
+    print('Could not find jsonschema module.')
+    raise
+
+
+def mean(lst):
+    """Compute and return mean of numbers in a list
+
+    The pypy average function has horrible performance, so implement our
+    own mean function.
+
+    Args:
+        lst: The list of numbers to average.
+    Return:
+        The mean of members in the list.
+    """
+    return sum(lst) / len(lst)
+
+
+def split_list(bench, func, var):
+    """ Split the list into a smaller set of more distinct points
+
+    Group together points such that the difference between the smallest
+    point and the mean is less than 1/3rd of the mean.  This means that
+    the mean is at most 1.5x the smallest member of that group.
+
+    mean - xmin < mean / 3
+    i.e. 2 * mean / 3 < xmin
+    i.e. mean < 3 * xmin / 2
+
+    For an evenly distributed group, the largest member will be less than
+    twice the smallest member of the group.
+    Derivation:
+
+    An evenly distributed series would be xmin, xmin + d, xmin + 2d...
+
+    mean = (2 * n * xmin + n * (n - 1) * d) / 2 * n
+    and max element is xmin + (n - 1) * d
+
+    Now, mean < 3 * xmin / 2
+
+    3 * xmin > 2 * mean
+    3 * xmin > (2 * n * xmin + n * (n - 1) * d) / n
+    3 * n * xmin > 2 * n * xmin + n * (n - 1) * d
+    n * xmin > n * (n - 1) * d
+    xmin > (n - 1) * d
+    2 * xmin > xmin + (n-1) * d
+    2 * xmin > xmax
+
+    Hence, proved.
+
+    Similarly, it is trivial to prove that for a similar aggregation by using
+    the maximum element, the maximum element in the group must be at most 4/3
+    times the mean.
+
+    Args:
+        bench: The benchmark object
+        func: The function name
+        var: The function variant name
+    """
+    means = []
+    lst = bench['functions'][func][var]['timings']
+    last = len(lst) - 1
+    while lst:
+        for i in range(last + 1):
+            avg = mean(lst[i:])
+            if avg > 0.75 * lst[last]:
+                means.insert(0, avg)
+                lst = lst[:i]
+                last = i - 1
+                break
+    bench['functions'][func][var]['timings'] = means
+
+
+def do_for_all_timings(bench, callback):
+    """Call a function for all timing objects for each function and its
+    variants.
+
+    Args:
+        bench: The benchmark object
+        callback: The callback function
+    """
+    for func in bench['functions'].keys():
+        for k in bench['functions'][func].keys():
+            if 'timings' not in bench['functions'][func][k].keys():
+                continue
+
+            callback(bench, func, k)
+
+
+def compress_timings(points):
+    """Club points with close enough values into a single mean value
+
+    See split_list for details on how the clubbing is done.
+
+    Args:
+        points: The set of points.
+    """
+    do_for_all_timings(points, split_list)
+
+
+def parse_bench(filename, schema_filename):
+    """Parse the input file
+
+    Parse and validate the json file containing the benchmark outputs.  Return
+    the resulting object.
+    Args:
+        filename: Name of the benchmark output file.
+    Return:
+        The bench dictionary.
+    """
+    with open(schema_filename, 'r') as schemafile:
+        schema = json.load(schemafile)
+        with open(filename, 'r') as benchfile:
+            bench = json.load(benchfile)
+            validator.validate(bench, schema)
+            do_for_all_timings(bench, lambda b, f, v:
+                    b['functions'][f][v]['timings'].sort())
+            return bench
diff --git a/benchtests/scripts/validate_benchout.py b/benchtests/scripts/validate_benchout.py
index 61a8cbd..9d3a5cb 100755
--- a/benchtests/scripts/validate_benchout.py
+++ b/benchtests/scripts/validate_benchout.py
@@ -27,37 +27,26 @@  import sys
 import os
 
 try:
-    import jsonschema
+    import import_bench as bench
 except ImportError:
-    print('Could not find jsonschema module.  Output not validated.')
+    print('Import Error: Output will not be validated.')
     # Return success because we don't want the bench target to fail just
     # because the jsonschema module was not found.
     sys.exit(os.EX_OK)
 
 
-def validate_bench(benchfile, schemafile):
-    """Validate benchmark file
-
-    Validate a benchmark output file against a JSON schema.
+def print_and_exit(message, exitcode):
+    """Prints message to stderr and returns the exit code.
 
     Args:
-        benchfile: The file name of the bench.out file.
-        schemafile: The file name of the JSON schema file to validate
-        bench.out against.
+        message: The message to print
+        exitcode: The exit code to return
 
-    Exceptions:
-        jsonschema.ValidationError: When bench.out is not valid
-        jsonschema.SchemaError: When the JSON schema is not valid
-        IOError: If any of the files are not found.
+    Returns:
+        The passed exit code
     """
-    with open(benchfile, 'r') as bfile:
-        with open(schemafile, 'r') as sfile:
-            bench = json.load(bfile)
-            schema = json.load(sfile)
-            jsonschema.validate(bench, schema)
-
-    # If we reach here, we're all good.
-    print("Benchmark output in %s is valid." % benchfile)
+    print(message, file=sys.stderr)
+    return exitcode
 
 
 def main(args):
@@ -73,11 +62,23 @@  def main(args):
         Exceptions thrown by validate_bench
     """
     if len(args) != 2:
-        print("Usage: %s <bench.out file> <bench.out schema>" % sys.argv[0],
-                file=sys.stderr)
-        return os.EX_USAGE
+        return print_and_exit("Usage: %s <bench.out file> <bench.out schema>"
+                % sys.argv[0], os.EX_USAGE)
+
+    try:
+        bench.parse_bench(args[0], args[1])
+    except IOError as e:
+        return print_and_exit("IOError(%d): %s" % (e.errno, e.strerror),
+                os.EX_OSFILE)
+
+    except bench.validator.ValidationError as e:
+        return print_and_exit("Invalid benchmark output: %s" % e.message,
+            os.EX_DATAERR)
+
+    except bench.validator.SchemaError as e:
+        return print_and_exit("Invalid schema: %s" % e.message, os.EX_DATAERR)
 
-    validate_bench(args[0], args[1])
+    print("Benchmark output in %s is valid." % args[0])
     return os.EX_OK