diff mbox

[13/22] Add checkers/checker.py

Message ID 1501884293-9047-14-git-send-email-dmalcolm@redhat.com
State New
Headers show

Commit Message

David Malcolm Aug. 4, 2017, 10:04 p.m. UTC
This patch:
* creates a new "checkers" top-level directory to hold
  harnesses for 3rd-party code-checking tools.
* adds a "checker.py" Python module for use when implementing
  such harnesses

3rd-party code-checking tools are expected to be passed
command-line arguments by the frontend, and to return a JSON
result on stdout; the job of each harness is to coerce the
output from the tool into the expected JSON output format.

The JSON format to be used is the "Firehose" serialization
format:
  http://firehose.readthedocs.io/en/latest/index.html

checkers/ChangeLog:
	* ChangeLog: New file.
	* checker.py: New file.
---
 checkers/ChangeLog  |   9 ++
 checkers/checker.py | 367 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 376 insertions(+)
 create mode 100644 checkers/ChangeLog
 create mode 100755 checkers/checker.py
diff mbox

Patch

diff --git a/checkers/ChangeLog b/checkers/ChangeLog
new file mode 100644
index 0000000..9189883
--- /dev/null
+++ b/checkers/ChangeLog
@@ -0,0 +1,9 @@ 
+2017-08-03  David Malcolm  <dmalcolm@redhat.com>
+
+	* ChangeLog: New ChangeLog file.
+
+Copyright (C) 2017 Free Software Foundation, Inc.
+
+Copying and distribution of this file, with or without modification,
+are permitted in any medium without royalty provided the copyright
+notice and this notice are preserved.
diff --git a/checkers/checker.py b/checkers/checker.py
new file mode 100755
index 0000000..262bd72
--- /dev/null
+++ b/checkers/checker.py
@@ -0,0 +1,367 @@ 
+#!/usr/bin/env python
+#   Copyright 2012, 2013, 2015, 2017 David Malcolm <dmalcolm@redhat.com>
+#   Copyright 2012, 2013, 2015, 2017 Red Hat, Inc.
+#
+#   This is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#   General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see
+#   <http://www.gnu.org/licenses/>.
+
+"""
+A "checker" is an executable which takes GCC-style command-line
+arguments and writes a Firehose JSON file to stdout.
+"""
+
+import json
+import logging
+import os
+import re
+import sys
+import tempfile
+import time
+import traceback
+import unittest
+
+if sys.version_info[0] < 3:
+    # http://pypi.python.org/pypi/subprocess32
+    # so that we can use timeouts
+    from subprocess32 import Popen, PIPE, STDOUT, TimeoutExpired
+else:
+    from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
+
+from firehose.model import Analysis, Generator, Metadata, Failure, \
+    Location, File, Message, Issue, Trace
+
+from gccinvocation import GccInvocation
+
+def make_file(givenpath):
+    from firehose.model import File
+    return File(givenpath=givenpath,
+                abspath=None,
+                hash_=None)
+
+def make_stats(timer):
+    from firehose.model import Stats
+    return Stats(wallclocktime=timer.get_elapsed_time())
+
+class Timer:
+    """
+    Simple measurement of wallclock time taken
+    """
+    def __init__(self):
+        self.starttime = time.time()
+
+    def get_elapsed_time(self):
+        """Get elapsed time in seconds as a float"""
+        curtime = time.time()
+        return curtime - self.starttime
+
+    def elapsed_time_as_str(self):
+        """Get elapsed time as a string (with units)"""
+        elapsed = self.get_elapsed_time()
+        result = '%0.3f seconds' % elapsed
+        if elapsed > 120:
+            result += ' (%i minutes)' % int(elapsed / 60)
+        return result
+
+class Context:
+    def __init__(self, name, enable_logging=False, capture_exceptions=True):
+        self.name = name
+        self.enable_logging = enable_logging
+        self.capture_exceptions = capture_exceptions
+        if self.enable_logging:
+            format_ = '%(asctime)s ' + name + ': %(message)s'
+            logging.basicConfig(format=format_,
+                                level=logging.INFO,
+                                stream=sys.stderr)
+            self.log('logging initialized')
+
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+        self.returncode = None
+
+    def log(self, msg):
+        if self.enable_logging:
+            logging.info(msg)
+
+    def write_streams(self, toolname, out, err):
+        for line in out.splitlines():
+            self.log('stdout from %r: %s\n' % (toolname, line))
+        for line in err.splitlines():
+            self.log('stderr from %r: %s\n' % (toolname, line))
+
+class SubprocessResult:
+    """
+    A bundle of information relating to a subprocess invocation.
+    """
+    def __init__(self, sourcefile, argv, returncode, out, err, timer):
+        self.sourcefile = sourcefile
+        self.argv = argv
+        self.returncode = returncode
+        self.out = out
+        self.err = err
+        self.timer = timer
+
+    def set_custom_fields(self, analysis):
+        analysis.set_custom_field('returncode', self.returncode)
+        analysis.set_custom_field('stdout', self.out.decode('utf-8'))
+        analysis.set_custom_field('stderr', self.err.decode('utf-8'))
+
+    def __repr__(self):
+        return 'SubprocessResult(%r, %r, %r, %r, %r, %r)' \
+            % (self.sourcefile, self.argv, self.returncode,
+               self.out, self.err, self.timer)
+
+class Checker:
+    def __init__(self, ctxt):
+        self.name = ctxt.name
+        self.timeout = 60
+        self.ctxt = ctxt
+
+    def log(self, msg):
+        self.ctxt.log(msg)
+
+    def raw_invoke(self, gccinv, sourcefile):
+        """
+        Run the tool, with a timeout, returning an Analysis instance.
+        May well raise an exception if something major went wrong.
+        """
+        raise NotImplementedError
+
+    def checked_invoke(self, gccinv, sourcefile):
+        """
+        Call "invoke", handling exceptions.  Return an Analysis instance.
+        """
+        try:
+            self.log('about to invoke: %s with %r' % (self.name, gccinv))
+            analysis = self.raw_invoke(gccinv, sourcefile)
+        except TimeoutExpired:
+            analysis = self._make_failed_analysis(
+                sourcefile, None, msgtext='Timeout running %s' % self.name,
+                failureid='timeout')
+        except Exception as exc:
+            # Capture the exception as a Failure instance.
+            # Alternatively when debugging such failures, it can
+            # be easier to re-raise the exception:
+            if not self.ctxt.capture_exceptions:
+                raise
+            analysis = \
+                self._make_failed_analysis(
+                    sourcefile, None,
+                    msgtext=('Exception running %s: %s'
+                             % (self.name, exc)),
+                    failureid='exception')
+            tb_str = traceback.format_exc()
+            analysis.set_custom_field('traceback', tb_str)
+        if sourcefile:
+            if not analysis.metadata.file_:
+                analysis.metadata.file_ = make_file(givenpath=sourcefile)
+            analysis.metadata.file_.givenpath = sourcefile
+            analysis.metadata.file_.abspath = os.path.join(os.getcwd(),
+                                                           sourcefile)
+        return analysis
+
+    def handle_output(self, result):
+        """
+        Given a SubprocessResult, return an Analysis instance.
+        """
+        raise NotImplementedError
+
+    def _make_failed_analysis(self, sourcefile, t, msgtext, failureid):
+        """
+        Something went wrong; build a failure report.
+        """
+        generator = Generator(name=self.name,
+                              version=None)
+        if t:
+            stats = make_stats(t)
+        else:
+            stats = None
+
+        metadata = Metadata(generator=generator,
+                            sut=None,
+                            file_ = make_file(sourcefile),
+                            stats=stats)
+        file_ = File(givenpath=sourcefile,
+                     abspath=None,
+                     hash_=None)
+        location = Location(file=file_,
+                            function=None,
+                            point=None,
+                            range_=None)
+        message = Message(msgtext)
+        results = [Failure(failureid=failureid,
+                           location=location,
+                           message=message,
+                           customfields=None)]
+        analysis = Analysis(metadata, results)
+        return analysis
+
+    def _run_subprocess(self, sourcefile, argv, env=None):
+        """
+        Support for running the bulk of the side effect in a subprocess,
+        with timeout support.
+        """
+        self.log('%s: _run_subprocess(%r, %r)' % (self.name, sourcefile, argv))
+        if 0:
+            self.log('env: %s' % env)
+        p = Popen(argv,
+                  stdout=PIPE, stderr=PIPE, env=env)
+        try:
+            t = Timer()
+            out, err = p.communicate(timeout=self.timeout)
+            self.ctxt.write_streams(argv[0], out, err)
+            result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t)
+            analysis = self.handle_output(result)
+            return analysis
+        except TimeoutExpired:
+            analysis = self._make_failed_analysis(sourcefile, t,
+                                                  msgtext='Timeout running %s' % self.name,
+                                                  failureid='timeout')
+            analysis.set_custom_field('timeout', self.timeout)
+            analysis.set_custom_field('command-line', ' '.join(argv))
+            return analysis
+
+    def run_subprocess(self, sourcefile, argv, env=None):
+        """
+        Support for running the bulk of the side effect in a subprocess,
+        with timeout support.
+        """
+        self.log('%s: run_subprocess(%r, %r)' % (self.name, sourcefile, argv))
+        if 0:
+            self.log('env: %s' % env)
+        p = Popen(argv, stdout=PIPE, stderr=PIPE, env=env)
+        t = Timer()
+        out, err = p.communicate(timeout=self.timeout)
+        self.ctxt.write_streams(argv[0], out, err)
+        result = SubprocessResult(sourcefile, argv, p.returncode, out, err, t)
+        return result
+
+############################################################################
+# Test suite
+############################################################################
+
+class CheckerTests(unittest.TestCase):
+    def make_ctxt(self, name, capture_exceptions=False):
+        return Context(name, enable_logging=0, capture_exceptions=capture_exceptions)
+
+    def make_tool_from_class(self, tool_class):
+        ctxt = self.make_ctxt(tool_class.name)
+        return tool_class(ctxt)
+
+    def make_tool(self):
+        """Hook for self.make_compiler()"""
+        raise NotImplementedError
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+        """Hook for self.invoke()"""
+        raise NotImplementedError
+
+    def invoke(self, sourcefile, extraargs = None):
+        """Invoke a tool and sanity-check the result"""
+        tool = self.make_tool()
+        argv = ['gcc', '-c', sourcefile]
+        if extraargs:
+            argv += extraargs
+        gccinv = GccInvocation(argv)
+        analysis = tool.checked_invoke(gccinv, sourcefile)
+
+        if 0:
+            print(analysis)
+
+        # Call a subclass hook to check basic metadata:
+        self.verify_basic_metadata(analysis, sourcefile)
+
+        # Verify that we can serialize to XML:
+        xml_bytes = analysis.to_xml_bytes()
+        self.assertTrue(xml_bytes.startswith(b'<analysis>'))
+
+        # Verify it can roundtrip through JSON:
+        js_bytes = json.dumps(analysis.to_json(), indent=1)
+        other = Analysis.from_json(json.loads(js_bytes))
+        #self.assertEqual(analysis, other)
+
+        return analysis
+
+    def assert_metadata(self, analysis,
+                        expected_generator_name, expected_given_path):
+        self.assertEqual(analysis.metadata.generator.name,
+                         expected_generator_name)
+        self.assertEqual(analysis.metadata.file_.givenpath, expected_given_path)
+        self.assertIn(expected_given_path, analysis.metadata.file_.abspath)
+
+    def assert_has_custom_field(self, analysis, name):
+        self.assertTrue(analysis.customfields)
+        self.assertTrue(name in analysis.customfields)
+
+class BuggyCheckerTests(CheckerTests):
+    def make_tool(self):
+        """
+        Override base class impl, so that we can enable
+        exception-capture (and provide a custom tool)
+        """
+        class BuggyChecker(Checker):
+            def raw_invoke(self, gccinv, sourcefile):
+                raise ValueError('test of raising an exception')
+
+        ctxt = self.make_ctxt('buggy', capture_exceptions=True)
+        tool = BuggyChecker(ctxt)
+        return tool
+
+    def verify_basic_metadata(self, analysis, sourcefile):
+         self.assert_metadata(analysis, 'buggy', sourcefile)
+
+    def test_exception_handling(self):
+        analysis = self.invoke('test-sources/harmless.c')
+        #print(analysis)
+        self.assertEqual(len(analysis.results), 1)
+        r0 = analysis.results[0]
+        self.assertIsInstance(r0, Failure)
+        self.assertEqual(r0.failureid, 'exception')
+        self.assertEqual(r0.message.text,
+                         ('Exception running buggy:'
+                          ' test of raising an exception'))
+        self.assertTrue(analysis.customfields['traceback'].startswith(
+            'Traceback (most recent call last):\n'))
+
+############################################################################
+# Entrypoint
+############################################################################
+
+def tool_main(argv, tool_class):
+    """
+    Entrypoint for use by the various per-tool scripts
+    """
+    # If we're invoked with "unittest" as the first param,
+    # run the unit test suite:
+    if len(argv) >= 2:
+        if argv[1] == 'unittest':
+            sys.argv = [argv[0]] + argv[2:]
+            return unittest.main()
+
+    ctxt = Context(tool_class.name, enable_logging=False)
+    tool = tool_class(ctxt)
+
+    ctxt.log('argv: %r' % argv)
+
+    gccinv = GccInvocation(argv)
+    ctxt.log('  gccinv.sources: %r' % gccinv.sources)
+    assert len(gccinv.sources) == 1
+    sourcefile = gccinv.sources[0]
+    ctxt.log('    sourcefile: %r' % sourcefile)
+    analysis = tool.checked_invoke(gccinv, sourcefile)
+    json.dump(analysis.to_json(), sys.stdout, indent=1)
+
+# Run the unit tests
+
+if __name__ == '__main__':
+    sys.exit(unittest.main())