From patchwork Mon Aug 29 22:50:21 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Stephen Finucane X-Patchwork-Id: 663864 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3sNRhR70jmz9sBf for ; Tue, 30 Aug 2016 08:51:03 +1000 (AEST) Received: from ozlabs.org (lists.ozlabs.org [IPv6:2401:3900:2:1::3]) by lists.ozlabs.org (Postfix) with ESMTP id 3sNRhR2z1hzDrhF for ; Tue, 30 Aug 2016 08:51:03 +1000 (AEST) X-Original-To: patchwork@lists.ozlabs.org Delivered-To: patchwork@lists.ozlabs.org Received: from BLU004-OMC1S5.hotmail.com (blu004-omc1s5.hotmail.com [65.55.116.16]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-SHA384 (256/256 bits)) (No client certificate requested) by lists.ozlabs.org (Postfix) with ESMTPS id 3sNRhC2HL7zDrhQ for ; Tue, 30 Aug 2016 08:50:51 +1000 (AEST) Received: from BLU437-SMTP1 ([65.55.116.9]) by BLU004-OMC1S5.hotmail.com over TLS secured channel with Microsoft SMTPSVC(7.5.7601.23008); Mon, 29 Aug 2016 15:50:46 -0700 X-TMN: [PkKiGUNHPzC4oy+QfUD34H8pi2vkUui5] X-Originating-Email: [stephenfinucane@hotmail.com] Message-ID: From: Stephen Finucane To: patchwork@lists.ozlabs.org Subject: [PATCH v3 01/10] trivial: Cleanup of 'parser' Date: Mon, 29 Aug 2016 23:50:21 +0100 X-Mailer: git-send-email 2.7.4 In-Reply-To: <1472511030-6962-1-git-send-email-stephenfinucane@hotmail.com> References: <1472511030-6962-1-git-send-email-stephenfinucane@hotmail.com> X-OriginalArrivalTime: 29 Aug 2016 22:50:44.0317 (UTC) FILETIME=[C67CD8D0:01D20247] MIME-Version: 1.0 X-BeenThere: patchwork@lists.ozlabs.org X-Mailman-Version: 2.1.22 Precedence: list List-Id: Patchwork development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Cc: Stephen Finucane Errors-To: patchwork-bounces+incoming=patchwork.ozlabs.org@lists.ozlabs.org Sender: "Patchwork" From: Stephen Finucane Do some cleanup of the file by removing excess whitespace, adding some documentation, removing shadowing of keywords and renaming some functions to more accurately reflect their purpose. Signed-off-by: Stephen Finucane Reviewed-by: Andy Doan --- patchwork/bin/parsemail.py | 4 +-- patchwork/models.py | 4 +-- patchwork/parser.py | 74 +++++++++++++++++++++------------------------- 3 files changed, 38 insertions(+), 44 deletions(-) diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py index 48f809f..8913076 100755 --- a/patchwork/bin/parsemail.py +++ b/patchwork/bin/parsemail.py @@ -44,7 +44,7 @@ from django.utils.six.moves import map from patchwork.models import (Patch, Project, Person, Comment, State, DelegationRule, Submission, CoverLetter, get_default_initial_patch_state) -from patchwork.parser import parse_patch, patch_get_filenames +from patchwork.parser import parse_patch, find_filenames LOGGER = logging.getLogger(__name__) @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None): delegate = find_delegate(mail) if not delegate and diff: - filenames = patch_get_filenames(diff) + filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) patch = Patch( diff --git a/patchwork/models.py b/patchwork/models.py index 521b20c..ee2ee63 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -35,7 +35,7 @@ from django.utils.functional import cached_property from django.utils.six.moves import filter from patchwork.fields import HashField -from patchwork.parser import extract_tags, hash_patch +from patchwork.parser import extract_tags, hash_diff @python_2_unicode_compatible @@ -364,7 +364,7 @@ class Patch(Submission): self.state = get_default_initial_patch_state() if self.hash is None and self.diff is not None: - self.hash = hash_patch(self.diff).hexdigest() + self.hash = hash_diff(self.diff).hexdigest() super(Patch, self).save(**kwargs) diff --git a/patchwork/parser.py b/patchwork/parser.py index 8bf9b21..f173431 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile('^(---|\+\+\+) (\S+)') -def parse_patch(text): +def parse_patch(content): + """Split a mail's contents into a diff and comment. + + This is a state machine that takes a patch, generally in UNIX mbox + format, and splits it into the component comments and diff. + + Args: + patch: The patch to be split + + Returns: + A tuple containing the diff and comment. Either one or both of + these can be empty. + + Raises: + Exception: The state machine transitioned to an invalid state. + """ patchbuf = '' commentbuf = '' buf = '' @@ -68,7 +83,7 @@ def parse_patch(text): lc = (0, 0) hunk = 0 - for line in text.split('\n'): + for line in content.split('\n'): line += '\n' if state == 0: @@ -76,14 +91,11 @@ def parse_patch(text): or line.startswith('Index: '): state = 1 buf += line - elif line.startswith('--- '): state = 2 buf += line - else: commentbuf += line - elif state == 1: buf += line if line.startswith('--- '): @@ -91,25 +103,20 @@ def parse_patch(text): if line.startswith(('rename from ', 'rename to ')): state = 6 - elif state == 2: if line.startswith('+++ '): state = 3 buf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 3: match = _hunk_re.match(line) if match: - def fn(x): if not x: return 1 @@ -120,26 +127,21 @@ def parse_patch(text): state = 4 patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - elif hunk and line.startswith('\ No newline at end of file'): # If we had a hunk and now we see this, it's part of the patch, # and we're still expecting another @@ line. patchbuf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 4 or state == 5: if line.startswith('-'): lc[0] -= 1 @@ -159,21 +161,17 @@ def parse_patch(text): hunk += 1 else: state = 5 - elif state == 6: if line.startswith(('rename to ', 'rename from ')): patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - else: buf += line state = 1 - else: raise Exception("Unknown state %d! (line '%s')" % (state, line)) @@ -185,19 +183,19 @@ def parse_patch(text): if commentbuf == '': commentbuf = None - return (patchbuf, commentbuf) + return patchbuf, commentbuf -def hash_patch(str): +def hash_diff(diff): + """Generate a hash from a diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' prefixes = ['-', '+', ' '] hash = hashlib.sha1() - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -213,7 +211,6 @@ def hash_patch(str): filename += '/'.join(filename_match.group(2).split('/')[1:]) line = filename_match.group(1) + ' ' + filename - elif hunk_match: # remove line numbers, but leave line counts def fn(x): @@ -222,11 +219,9 @@ def hash_patch(str): return int(x) line_nos = list(map(fn, hunk_match.groups())) line = '@@ -%d +%d @@' % tuple(line_nos) - elif line[0] in prefixes: # if we have a +, - or context line, leave as-is pass - else: # other lines are ignored continue @@ -246,15 +241,15 @@ def extract_tags(content, tags): return counts -def patch_get_filenames(str): +def find_filenames(diff): + """Find files changes in a given diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' filenames = {} - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -291,21 +286,20 @@ def main(args): # decode from (assumed) UTF-8 content = sys.stdin.read().decode('utf-8') - - (patch, comment) = parse_patch(content) + patch, comment = parse_patch(content) if options.print_hash and patch: - print(hash_patch(patch).hexdigest()) + print(hash_diff(patch).hexdigest()) if options.print_patch and patch: - print("Patch: ------\n" + patch) + print('Patch: ------\n' + patch) if options.print_comment and comment: - print("Comment: ----\n" + comment) + print('Comment: ----\n' + comment) if options.print_filenames: - filenames = patch_get_filenames(content) - print("File names: ----\n" + '\n'.join(filenames)) + filenames = find_filenames(content) + print('File names: ----\n' + '\n'.join(filenames)) if __name__ == '__main__': import sys