Message ID | 1468402860-3409-2-git-send-email-stephen.finucane@intel.com |
---|---|
State | Superseded |
Headers | show |
On 07/13/2016 04:40 AM, Stephen Finucane wrote: > Do some cleanup of the file by removing excess whitespace, adding some > documentation, removing shadowing of keywords and renaming some > functions to more accurately reflect their purpose. > > Signed-off-by: Stephen Finucane <stephen.finucane@intel.com> Reviewed-by: Andy Doan <andy.doan@linaro.org> > --- > patchwork/bin/parsemail.py | 4 +- > patchwork/models.py | 4 +- > patchwork/parser.py | 74 ++++++++++++++++++++----------------------- > 3 files changed, 38 insertions(+), 44 deletions(-) > > diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py > index 8648d29..56cd126 100755 > --- a/patchwork/bin/parsemail.py > +++ b/patchwork/bin/parsemail.py > @@ -44,7 +44,7 @@ from django.utils.six.moves import map > from patchwork.models import (Patch, Project, Person, Comment, State, > DelegationRule, Submission, CoverLetter, > get_default_initial_patch_state) > -from patchwork.parser import parse_patch, patch_get_filenames > +from patchwork.parser import parse_patch, find_filenames > > LOGGER = logging.getLogger(__name__) > > @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None): > > delegate = find_delegate(mail) > if not delegate and diff: > - filenames = patch_get_filenames(diff) > + filenames = find_filenames(diff) > delegate = auto_delegate(project, filenames) > > patch = Patch( > diff --git a/patchwork/models.py b/patchwork/models.py > index 521b20c..ee2ee63 100644 > --- a/patchwork/models.py > +++ b/patchwork/models.py > @@ -35,7 +35,7 @@ from django.utils.functional import cached_property > from django.utils.six.moves import filter > > from patchwork.fields import HashField > -from patchwork.parser import extract_tags, hash_patch > +from patchwork.parser import extract_tags, hash_diff > > > @python_2_unicode_compatible > @@ -364,7 +364,7 @@ class Patch(Submission): > self.state = get_default_initial_patch_state() > > if self.hash is None and self.diff is not None: > - self.hash = hash_patch(self.diff).hexdigest() > + self.hash = hash_diff(self.diff).hexdigest() > > super(Patch, self).save(**kwargs) > > diff --git a/patchwork/parser.py b/patchwork/parser.py > index 8bf9b21..f173431 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') > _filename_re = re.compile('^(---|\+\+\+) (\S+)') > > > -def parse_patch(text): > +def parse_patch(content): > + """Split a mail's contents into a diff and comment. > + > + This is a state machine that takes a patch, generally in UNIX mbox > + format, and splits it into the component comments and diff. > + > + Args: > + patch: The patch to be split > + > + Returns: > + A tuple containing the diff and comment. Either one or both of > + these can be empty. > + > + Raises: > + Exception: The state machine transitioned to an invalid state. > + """ > patchbuf = '' > commentbuf = '' > buf = '' > @@ -68,7 +83,7 @@ def parse_patch(text): > lc = (0, 0) > hunk = 0 > > - for line in text.split('\n'): > + for line in content.split('\n'): > line += '\n' > > if state == 0: > @@ -76,14 +91,11 @@ def parse_patch(text): > or line.startswith('Index: '): > state = 1 > buf += line > - > elif line.startswith('--- '): > state = 2 > buf += line > - > else: > commentbuf += line > - > elif state == 1: > buf += line > if line.startswith('--- '): > @@ -91,25 +103,20 @@ def parse_patch(text): > > if line.startswith(('rename from ', 'rename to ')): > state = 6 > - > elif state == 2: > if line.startswith('+++ '): > state = 3 > buf += line > - > elif hunk: > state = 1 > buf += line > - > else: > state = 0 > commentbuf += buf + line > buf = '' > - > elif state == 3: > match = _hunk_re.match(line) > if match: > - > def fn(x): > if not x: > return 1 > @@ -120,26 +127,21 @@ def parse_patch(text): > state = 4 > patchbuf += buf + line > buf = '' > - > elif line.startswith('--- '): > patchbuf += buf + line > buf = '' > state = 2 > - > elif hunk and line.startswith('\ No newline at end of file'): > # If we had a hunk and now we see this, it's part of the patch, > # and we're still expecting another @@ line. > patchbuf += line > - > elif hunk: > state = 1 > buf += line > - > else: > state = 0 > commentbuf += buf + line > buf = '' > - > elif state == 4 or state == 5: > if line.startswith('-'): > lc[0] -= 1 > @@ -159,21 +161,17 @@ def parse_patch(text): > hunk += 1 > else: > state = 5 > - > elif state == 6: > if line.startswith(('rename to ', 'rename from ')): > patchbuf += buf + line > buf = '' > - > elif line.startswith('--- '): > patchbuf += buf + line > buf = '' > state = 2 > - > else: > buf += line > state = 1 > - > else: > raise Exception("Unknown state %d! (line '%s')" % (state, line)) > > @@ -185,19 +183,19 @@ def parse_patch(text): > if commentbuf == '': > commentbuf = None > > - return (patchbuf, commentbuf) > + return patchbuf, commentbuf > > > -def hash_patch(str): > +def hash_diff(diff): > + """Generate a hash from a diff.""" > # normalise spaces > - str = str.replace('\r', '') > - str = str.strip() + '\n' > + diff = diff.replace('\r', '') > + diff = diff.strip() + '\n' > > prefixes = ['-', '+', ' '] > hash = hashlib.sha1() > > - for line in str.split('\n'): > - > + for line in diff.split('\n'): > if len(line) <= 0: > continue > > @@ -213,7 +211,6 @@ def hash_patch(str): > filename += '/'.join(filename_match.group(2).split('/')[1:]) > > line = filename_match.group(1) + ' ' + filename > - > elif hunk_match: > # remove line numbers, but leave line counts > def fn(x): > @@ -222,11 +219,9 @@ def hash_patch(str): > return int(x) > line_nos = list(map(fn, hunk_match.groups())) > line = '@@ -%d +%d @@' % tuple(line_nos) > - > elif line[0] in prefixes: > # if we have a +, - or context line, leave as-is > pass > - > else: > # other lines are ignored > continue > @@ -246,15 +241,15 @@ def extract_tags(content, tags): > return counts > > > -def patch_get_filenames(str): > +def find_filenames(diff): > + """Find files changes in a given diff.""" > # normalise spaces > - str = str.replace('\r', '') > - str = str.strip() + '\n' > + diff = diff.replace('\r', '') > + diff = diff.strip() + '\n' > > filenames = {} > > - for line in str.split('\n'): > - > + for line in diff.split('\n'): > if len(line) <= 0: > continue > > @@ -291,21 +286,20 @@ def main(args): > > # decode from (assumed) UTF-8 > content = sys.stdin.read().decode('utf-8') > - > - (patch, comment) = parse_patch(content) > + patch, comment = parse_patch(content) > > if options.print_hash and patch: > - print(hash_patch(patch).hexdigest()) > + print(hash_diff(patch).hexdigest()) > > if options.print_patch and patch: > - print("Patch: ------\n" + patch) > + print('Patch: ------\n' + patch) > > if options.print_comment and comment: > - print("Comment: ----\n" + comment) > + print('Comment: ----\n' + comment) > > if options.print_filenames: > - filenames = patch_get_filenames(content) > - print("File names: ----\n" + '\n'.join(filenames)) > + filenames = find_filenames(content) > + print('File names: ----\n' + '\n'.join(filenames)) > > if __name__ == '__main__': > import sys >
diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py index 8648d29..56cd126 100755 --- a/patchwork/bin/parsemail.py +++ b/patchwork/bin/parsemail.py @@ -44,7 +44,7 @@ from django.utils.six.moves import map from patchwork.models import (Patch, Project, Person, Comment, State, DelegationRule, Submission, CoverLetter, get_default_initial_patch_state) -from patchwork.parser import parse_patch, patch_get_filenames +from patchwork.parser import parse_patch, find_filenames LOGGER = logging.getLogger(__name__) @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None): delegate = find_delegate(mail) if not delegate and diff: - filenames = patch_get_filenames(diff) + filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) patch = Patch( diff --git a/patchwork/models.py b/patchwork/models.py index 521b20c..ee2ee63 100644 --- a/patchwork/models.py +++ b/patchwork/models.py @@ -35,7 +35,7 @@ from django.utils.functional import cached_property from django.utils.six.moves import filter from patchwork.fields import HashField -from patchwork.parser import extract_tags, hash_patch +from patchwork.parser import extract_tags, hash_diff @python_2_unicode_compatible @@ -364,7 +364,7 @@ class Patch(Submission): self.state = get_default_initial_patch_state() if self.hash is None and self.diff is not None: - self.hash = hash_patch(self.diff).hexdigest() + self.hash = hash_diff(self.diff).hexdigest() super(Patch, self).save(**kwargs) diff --git a/patchwork/parser.py b/patchwork/parser.py index 8bf9b21..f173431 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') _filename_re = re.compile('^(---|\+\+\+) (\S+)') -def parse_patch(text): +def parse_patch(content): + """Split a mail's contents into a diff and comment. + + This is a state machine that takes a patch, generally in UNIX mbox + format, and splits it into the component comments and diff. + + Args: + patch: The patch to be split + + Returns: + A tuple containing the diff and comment. Either one or both of + these can be empty. + + Raises: + Exception: The state machine transitioned to an invalid state. + """ patchbuf = '' commentbuf = '' buf = '' @@ -68,7 +83,7 @@ def parse_patch(text): lc = (0, 0) hunk = 0 - for line in text.split('\n'): + for line in content.split('\n'): line += '\n' if state == 0: @@ -76,14 +91,11 @@ def parse_patch(text): or line.startswith('Index: '): state = 1 buf += line - elif line.startswith('--- '): state = 2 buf += line - else: commentbuf += line - elif state == 1: buf += line if line.startswith('--- '): @@ -91,25 +103,20 @@ def parse_patch(text): if line.startswith(('rename from ', 'rename to ')): state = 6 - elif state == 2: if line.startswith('+++ '): state = 3 buf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 3: match = _hunk_re.match(line) if match: - def fn(x): if not x: return 1 @@ -120,26 +127,21 @@ def parse_patch(text): state = 4 patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - elif hunk and line.startswith('\ No newline at end of file'): # If we had a hunk and now we see this, it's part of the patch, # and we're still expecting another @@ line. patchbuf += line - elif hunk: state = 1 buf += line - else: state = 0 commentbuf += buf + line buf = '' - elif state == 4 or state == 5: if line.startswith('-'): lc[0] -= 1 @@ -159,21 +161,17 @@ def parse_patch(text): hunk += 1 else: state = 5 - elif state == 6: if line.startswith(('rename to ', 'rename from ')): patchbuf += buf + line buf = '' - elif line.startswith('--- '): patchbuf += buf + line buf = '' state = 2 - else: buf += line state = 1 - else: raise Exception("Unknown state %d! (line '%s')" % (state, line)) @@ -185,19 +183,19 @@ def parse_patch(text): if commentbuf == '': commentbuf = None - return (patchbuf, commentbuf) + return patchbuf, commentbuf -def hash_patch(str): +def hash_diff(diff): + """Generate a hash from a diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' prefixes = ['-', '+', ' '] hash = hashlib.sha1() - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -213,7 +211,6 @@ def hash_patch(str): filename += '/'.join(filename_match.group(2).split('/')[1:]) line = filename_match.group(1) + ' ' + filename - elif hunk_match: # remove line numbers, but leave line counts def fn(x): @@ -222,11 +219,9 @@ def hash_patch(str): return int(x) line_nos = list(map(fn, hunk_match.groups())) line = '@@ -%d +%d @@' % tuple(line_nos) - elif line[0] in prefixes: # if we have a +, - or context line, leave as-is pass - else: # other lines are ignored continue @@ -246,15 +241,15 @@ def extract_tags(content, tags): return counts -def patch_get_filenames(str): +def find_filenames(diff): + """Find files changes in a given diff.""" # normalise spaces - str = str.replace('\r', '') - str = str.strip() + '\n' + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' filenames = {} - for line in str.split('\n'): - + for line in diff.split('\n'): if len(line) <= 0: continue @@ -291,21 +286,20 @@ def main(args): # decode from (assumed) UTF-8 content = sys.stdin.read().decode('utf-8') - - (patch, comment) = parse_patch(content) + patch, comment = parse_patch(content) if options.print_hash and patch: - print(hash_patch(patch).hexdigest()) + print(hash_diff(patch).hexdigest()) if options.print_patch and patch: - print("Patch: ------\n" + patch) + print('Patch: ------\n' + patch) if options.print_comment and comment: - print("Comment: ----\n" + comment) + print('Comment: ----\n' + comment) if options.print_filenames: - filenames = patch_get_filenames(content) - print("File names: ----\n" + '\n'.join(filenames)) + filenames = find_filenames(content) + print('File names: ----\n' + '\n'.join(filenames)) if __name__ == '__main__': import sys
Do some cleanup of the file by removing excess whitespace, adding some documentation, removing shadowing of keywords and renaming some functions to more accurately reflect their purpose. Signed-off-by: Stephen Finucane <stephen.finucane@intel.com> --- patchwork/bin/parsemail.py | 4 +- patchwork/models.py | 4 +- patchwork/parser.py | 74 ++++++++++++++++++++----------------------- 3 files changed, 38 insertions(+), 44 deletions(-)